Oysiyl's picture
Update app.py
e951ff3 verified
raw
history blame
No virus
1.12 kB
import gradio as gr
from transformers import pipeline
import numpy as np
import librosa
from punctuators.models import PunctCapSegModelONNX
transcriber = pipeline("automatic-speech-recognition", model="Oysiyl/w2v-bert-2.0-ukrainian-colab-CV16.0")
punct_cap_model = PunctCapSegModelONNX.from_pretrained("1-800-BAD-CODE/xlm-roberta_punctuation_fullstop_truecase")
def transcribe(audio):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
if sr != 16000:
y = librosa.resample(y, orig_sr=sr, target_sr=16000)
transcribed_text = transcriber({"sampling_rate": 16000, "raw": y})["text"]
punct_cap_text = punct_cap_model.infer(texts=[ukr_text], apply_sbd=True)[0][0]
return punct_cap_text
demo = gr.Interface(
transcribe,
gr.Audio(sources=["upload", "microphone"]),
outputs="text",
title="Automatic Speech Recognition for Ukrainian language demo",
description="Click on the example below, upload audio from file or say something in microphone!",
examples=[["examples/asr_example.wav"], ["examples/tts_example.wav"]],
cache_examples=True
)
demo.launch()