Spaces:
Runtime error
Runtime error
<!-- livebook:{"app_settings":{"access_type":"public","auto_shutdown_ms":60000,"multi_session":true,"output_type":"rich","show_existing_sessions":false,"show_source":true,"slug":"vad"}} --> | |
# Nx Voice-Activity Detection | |
```elixir | |
Mix.install([ | |
{:ortex, "~> 0.1.9"}, | |
{:kino_vega_lite, "~> 0.1.10"}, | |
{:kino_live_audio, "~> 0.1"}, | |
{:req, "~> 0.4"} | |
]) | |
``` | |
## Setup Model & Plot | |
```elixir | |
url = "https://raw.githubusercontent.com/snakers4/silero-vad/master/files/silero_vad.onnx" | |
filename = url |> String.split("/") |> Enum.reverse() |> hd | |
resp = | |
Req.get!( | |
url, | |
decode_body: false, | |
into: File.stream!(filename) | |
) | |
model = Ortex.load(filename) | |
chart = | |
VegaLite.new(title: "Voice-Activated Detection", width: 800, height: 400) | |
|> VegaLite.mark(:line) | |
|> VegaLite.encode_field(:x, "x", | |
type: :quantitative, | |
title: "Time", | |
axis: [ticks: false, domain: false, grid: false, labels: false] | |
) | |
|> VegaLite.encode_field(:y, "y", | |
type: :quantitative, | |
title: "Voice", | |
scale: [domain_max: 1, domain_min: 0] | |
) | |
|> Kino.VegaLite.new() | |
``` | |
```elixir | |
chunk_size = Kino.Input.text("Chunk Size", default: "1") | |
sample_rate = Kino.Input.text("Sample Rate", default: "16000") | |
unit = | |
Kino.Input.select( | |
"Unit", | |
[samples: "Samples", s: "Seconds", ms: "Miliseconds", mu: "Microseconds"], | |
default: :s | |
) | |
clear = Kino.Control.button("Clear Plot") | |
clear |> Kino.listen(fn _ -> Kino.VegaLite.clear(chart) end) | |
top_row = Kino.Layout.grid([sample_rate, chunk_size, unit], columns: 3) | |
Kino.Layout.grid([top_row, clear]) | |
``` | |
```elixir | |
liveAudio = | |
KinoLiveAudio.new( | |
chunk_size: Kino.Input.read(chunk_size) |> Integer.parse() |> elem(0), | |
unit: Kino.Input.read(unit), | |
sample_rate: Kino.Input.read(sample_rate) |> Integer.parse() |> elem(0) | |
) | |
``` | |
```elixir | |
liveAudio | |
|> Kino.Control.stream() | |
|> Kino.listen({Nx.broadcast(0.0, {2, 1, 64}), Nx.broadcast(0.0, {2, 1, 64})}, fn | |
%{event: :audio_chunk, chunk: data}, {hn, cn} -> | |
input = Nx.tensor(data) |> Nx.stack() | |
sr = Nx.tensor(Kino.Input.read(sample_rate) |> Integer.parse() |> elem(0), type: :s64) | |
{input, sr, hn, cn} | |
{output, hn, cn} = Ortex.run(model, {input, sr, hn, cn}) | |
[output] = Nx.to_list(output |> Nx.flatten()) | |
row = %{x: :os.system_time(), y: output} | |
Kino.VegaLite.push(chart, row, window: 1000) | |
{:cont, {hn, cn}} | |
end) | |
``` | |