import gradio as gr from transformers import pipeline import numpy as np import librosa transcriber = pipeline("automatic-speech-recognition", model="Oysiyl/w2v-bert-2.0-ukrainian-colab-CV16.0") def transcribe(audio): sr, y = audio if sr != 16000: y = librosa.resample(y, orig_sr=sr, target_sr=16000) y = y.astype(np.float32) y /= np.max(np.abs(y)) return transcriber({"sampling_rate": sr, "raw": y})["text"] demo = gr.Interface( transcribe, gr.Audio(sources=["upload", "microphone"]), outputs="text", title="Automatic Speech Recognition for Ukrainian language demo", description="Click on the example below, upload audio from file or say something in microphone!", examples=[["examples/asr_example.wav"], ["examples/tts_example.wav"]], cache_examples=True ) demo.launch()