livebook-apps / public-apps /nx-vad.livemd
acalejos's picture
Create nx-vad.livemd
cc2b025 verified
raw
history blame
2.38 kB
<!-- livebook:{"app_settings":{"access_type":"public","auto_shutdown_ms":60000,"multi_session":true,"output_type":"rich","show_existing_sessions":false,"show_source":true,"slug":"vad"}} -->
# Nx Voice-Activity Detection
```elixir
Mix.install([
{:ortex, "~> 0.1.9"},
{:kino_vega_lite, "~> 0.1.10"},
{:kino_live_audio, "~> 0.1"},
{:req, "~> 0.4"}
])
```
## Setup Model & Plot
```elixir
url = "https://raw.githubusercontent.com/snakers4/silero-vad/master/files/silero_vad.onnx"
filename = url |> String.split("/") |> Enum.reverse() |> hd
resp =
Req.get!(
url,
decode_body: false,
into: File.stream!(filename)
)
model = Ortex.load(filename)
chart =
VegaLite.new(title: "Voice-Activated Detection", width: 800, height: 400)
|> VegaLite.mark(:line)
|> VegaLite.encode_field(:x, "x",
type: :quantitative,
title: "Time",
axis: [ticks: false, domain: false, grid: false, labels: false]
)
|> VegaLite.encode_field(:y, "y",
type: :quantitative,
title: "Voice",
scale: [domain_max: 1, domain_min: 0]
)
|> Kino.VegaLite.new()
```
```elixir
chunk_size = Kino.Input.text("Chunk Size", default: "1")
sample_rate = Kino.Input.text("Sample Rate", default: "16000")
unit =
Kino.Input.select(
"Unit",
[samples: "Samples", s: "Seconds", ms: "Miliseconds", mu: "Microseconds"],
default: :s
)
clear = Kino.Control.button("Clear Plot")
clear |> Kino.listen(fn _ -> Kino.VegaLite.clear(chart) end)
top_row = Kino.Layout.grid([sample_rate, chunk_size, unit], columns: 3)
Kino.Layout.grid([top_row, clear])
```
```elixir
liveAudio =
KinoLiveAudio.new(
chunk_size: Kino.Input.read(chunk_size) |> Integer.parse() |> elem(0),
unit: Kino.Input.read(unit),
sample_rate: Kino.Input.read(sample_rate) |> Integer.parse() |> elem(0)
)
```
```elixir
liveAudio
|> Kino.Control.stream()
|> Kino.listen({Nx.broadcast(0.0, {2, 1, 64}), Nx.broadcast(0.0, {2, 1, 64})}, fn
%{event: :audio_chunk, chunk: data}, {hn, cn} ->
input = Nx.tensor(data) |> Nx.stack()
sr = Nx.tensor(Kino.Input.read(sample_rate) |> Integer.parse() |> elem(0), type: :s64)
{input, sr, hn, cn}
{output, hn, cn} = Ortex.run(model, {input, sr, hn, cn})
[output] = Nx.to_list(output |> Nx.flatten())
row = %{x: :os.system_time(), y: output}
Kino.VegaLite.push(chart, row, window: 1000)
{:cont, {hn, cn}}
end)
```