File size: 2,625 Bytes
7f1afcd
297e244
7f1afcd
a4107b1
297e244
 
 
 
a4107b1
297e244
 
 
 
a4107b1
297e244
a4107b1
297e244
a4107b1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297e244
 
a4107b1
 
 
 
 
 
 
 
 
 
 
 
 
297e244
7f1afcd
a4107b1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import gradio as gr
from zeroshot import process, ZS_EXAMPLES

with gr.Blocks(css="style.css") as demo:
    gr.Markdown(
        "<p align='center' style='font-size: 20px;'>MMS Zero-shot ASR Demo. See our arXiV <a href='https://arxiv.org/'>paper</a> for model details.</p>"
    )
    gr.HTML(
        """<center>The demo works on input audio in any language, as long as you provide a list of words or sentences for that language and an optional n-gram language model (even a simple 1-gram model will work!) to help with accuracy.<br>We recommend having a minimum of 5000 distinct words in the textfile to acheive a good performance.</center>"""
    )
    with gr.Row():
        with gr.Column():
            audio = gr.Audio(label="Audio Input\n(use microphone or upload a file)")

            with gr.Row():
                words_file = gr.File(label="Text Data")
                lm_file = gr.File(label="Language Model\n(optional)")

            with gr.Accordion("Advanced Settings", open=False):
                gr.Markdown(
                    "The following parameters are used for beam-search decoding. Use the default values if you are not sure."
                )
                with gr.Row():
                    wscore = gr.Slider(
                        minimum=-10.0,
                        maximum=10.0,
                        value=0,
                        step=0.1,
                        interactive=True,
                        label="Word Insertion Score",
                    )
                    lmscore = gr.Slider(
                        minimum=-10.0,
                        maximum=10.0,
                        value=0,
                        step=0.1,
                        interactive=True,
                        label="Language Model Score",
                    )
                with gr.Row():
                    wscore_usedefault = gr.Checkbox(
                        label="Use Default Word Insertion Score", value=True
                    )
                    lmscore_usedefault = gr.Checkbox(
                        label="Use Default Language Model Score", value=True
                    )
            btn = gr.Button("Submit", elem_id="submit")
        with gr.Column():
            text = gr.Textbox(label="Transcript")
    btn.click(
        process,
        inputs=[
            audio,
            words_file,
            lm_file,
            wscore,
            lmscore,
            wscore_usedefault,
            lmscore_usedefault,
        ],
        outputs=text,
    )
    examples = gr.Examples(examples=ZS_EXAMPLES, inputs=[audio, words_file])

demo.launch()