import gradio as gr from zeroshot import process, ZS_EXAMPLES with gr.Blocks() as demo: gr.Markdown("") gr.Markdown( "

MMS Zero-shot ASR Demo. See our arXiV paper for model details.

" ) gr.HTML( """
The demo works on input audio in any language, as long as you provide a list of words for that language and an optional n-gram language model (even a simple 1-gram model will work!) to help with accuracy.
""" ) with gr.Row(): with gr.Column(): audio = gr.Audio(label="Audio Input\n(use microphone or upload a file)") with gr.Row(): words_file = gr.File(label="Words File\n(one word per line)") lm_file = gr.File(label="Language Model\n(optional)") btn = gr.Button("Submit") with gr.Column(): text = gr.Textbox(label="Transcript") btn.click(process, inputs=[audio, words_file, lm_file], outputs=text) examples = gr.Examples(examples=ZS_EXAMPLES, inputs=[audio, words_file]) demo.launch(share=True)