acalejos commited on
Commit
cc2b025
1 Parent(s): 8b0a58c

Create nx-vad.livemd

Browse files
Files changed (1) hide show
  1. public-apps/nx-vad.livemd +85 -0
public-apps/nx-vad.livemd ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- livebook:{"app_settings":{"access_type":"public","auto_shutdown_ms":60000,"multi_session":true,"output_type":"rich","show_existing_sessions":false,"show_source":true,"slug":"vad"}} -->
2
+
3
+ # Nx Voice-Activity Detection
4
+
5
+ ```elixir
6
+ Mix.install([
7
+ {:ortex, "~> 0.1.9"},
8
+ {:kino_vega_lite, "~> 0.1.10"},
9
+ {:kino_live_audio, "~> 0.1"},
10
+ {:req, "~> 0.4"}
11
+ ])
12
+ ```
13
+
14
+ ## Setup Model & Plot
15
+
16
+ ```elixir
17
+ url = "https://raw.githubusercontent.com/snakers4/silero-vad/master/files/silero_vad.onnx"
18
+ filename = url |> String.split("/") |> Enum.reverse() |> hd
19
+
20
+ resp =
21
+ Req.get!(
22
+ url,
23
+ decode_body: false,
24
+ into: File.stream!(filename)
25
+ )
26
+
27
+ model = Ortex.load(filename)
28
+
29
+ chart =
30
+ VegaLite.new(title: "Voice-Activated Detection", width: 800, height: 400)
31
+ |> VegaLite.mark(:line)
32
+ |> VegaLite.encode_field(:x, "x",
33
+ type: :quantitative,
34
+ title: "Time",
35
+ axis: [ticks: false, domain: false, grid: false, labels: false]
36
+ )
37
+ |> VegaLite.encode_field(:y, "y",
38
+ type: :quantitative,
39
+ title: "Voice",
40
+ scale: [domain_max: 1, domain_min: 0]
41
+ )
42
+ |> Kino.VegaLite.new()
43
+ ```
44
+
45
+ ```elixir
46
+ chunk_size = Kino.Input.text("Chunk Size", default: "1")
47
+ sample_rate = Kino.Input.text("Sample Rate", default: "16000")
48
+
49
+ unit =
50
+ Kino.Input.select(
51
+ "Unit",
52
+ [samples: "Samples", s: "Seconds", ms: "Miliseconds", mu: "Microseconds"],
53
+ default: :s
54
+ )
55
+
56
+ clear = Kino.Control.button("Clear Plot")
57
+ clear |> Kino.listen(fn _ -> Kino.VegaLite.clear(chart) end)
58
+ top_row = Kino.Layout.grid([sample_rate, chunk_size, unit], columns: 3)
59
+ Kino.Layout.grid([top_row, clear])
60
+ ```
61
+
62
+ ```elixir
63
+ liveAudio =
64
+ KinoLiveAudio.new(
65
+ chunk_size: Kino.Input.read(chunk_size) |> Integer.parse() |> elem(0),
66
+ unit: Kino.Input.read(unit),
67
+ sample_rate: Kino.Input.read(sample_rate) |> Integer.parse() |> elem(0)
68
+ )
69
+ ```
70
+
71
+ ```elixir
72
+ liveAudio
73
+ |> Kino.Control.stream()
74
+ |> Kino.listen({Nx.broadcast(0.0, {2, 1, 64}), Nx.broadcast(0.0, {2, 1, 64})}, fn
75
+ %{event: :audio_chunk, chunk: data}, {hn, cn} ->
76
+ input = Nx.tensor(data) |> Nx.stack()
77
+ sr = Nx.tensor(Kino.Input.read(sample_rate) |> Integer.parse() |> elem(0), type: :s64)
78
+ {input, sr, hn, cn}
79
+ {output, hn, cn} = Ortex.run(model, {input, sr, hn, cn})
80
+ [output] = Nx.to_list(output |> Nx.flatten())
81
+ row = %{x: :os.system_time(), y: output}
82
+ Kino.VegaLite.push(chart, row, window: 1000)
83
+ {:cont, {hn, cn}}
84
+ end)
85
+ ```