Oysiyl's picture
Update app.py
c7932cb verified
raw
history blame
No virus
1.13 kB
import gradio as gr
from transformers import pipeline
import numpy as np
import librosa
from punctuators.models import PunctCapSegModelONNX
transcriber = pipeline("automatic-speech-recognition", model="Oysiyl/w2v-bert-2.0-ukrainian-colab-CV16.0")
punct_cap_model = PunctCapSegModelONNX.from_pretrained("1-800-BAD-CODE/xlm-roberta_punctuation_fullstop_truecase")
def transcribe(audio):
sr, y = audio
y = y.astype(np.float32)
y /= np.max(np.abs(y))
if sr != 16000:
y = librosa.resample(y, orig_sr=sr, target_sr=16000)
transcribed_text = transcriber({"sampling_rate": 16000, "raw": y})["text"]
punct_cap_text = punct_cap_model.infer(texts=[transcribed_text], apply_sbd=True)[0][0]
return punct_cap_text
demo = gr.Interface(
transcribe,
gr.Audio(sources=["upload", "microphone"]),
outputs="text",
title="Automatic Speech Recognition for Ukrainian language demo",
description="Click on the example below, upload audio from file or say something in microphone!",
examples=[["examples/asr_example.wav"], ["examples/tts_example.wav"]],
cache_examples=True
)
demo.launch()