Spaces:

Oysiyl
/

asr-ukrainian-w2v-bert

Running

File size: 1,132 Bytes

7568d0b
 
 
eeaadf2
e951ff3
7568d0b
 
e951ff3
7568d0b
 
 
 
 
10b5093
 
e951ff3
c7932cb
e951ff3
7568d0b
 
 
 
16d9c5d
7568d0b
 
16d9c5d
0ba2497
7568d0b

import gradio as gr
from transformers import pipeline
import numpy as np
import librosa
from punctuators.models import PunctCapSegModelONNX

transcriber = pipeline("automatic-speech-recognition", model="Oysiyl/w2v-bert-2.0-ukrainian-colab-CV16.0")
punct_cap_model = PunctCapSegModelONNX.from_pretrained("1-800-BAD-CODE/xlm-roberta_punctuation_fullstop_truecase")

def transcribe(audio):
    sr, y = audio
    y = y.astype(np.float32)
    y /= np.max(np.abs(y))
    if sr != 16000:
        y = librosa.resample(y, orig_sr=sr, target_sr=16000)
    transcribed_text = transcriber({"sampling_rate": 16000, "raw": y})["text"]
    punct_cap_text = punct_cap_model.infer(texts=[transcribed_text], apply_sbd=True)[0][0]
    return punct_cap_text


demo = gr.Interface(
    transcribe,
    gr.Audio(sources=["upload", "microphone"]),
    outputs="text",
    title="Automatic Speech Recognition for Ukrainian language demo",
    description="Click on the example below, upload audio from file or say something in microphone!",
    examples=[["examples/asr_example.wav"], ["examples/tts_example.wav"]],
    cache_examples=True
)

demo.launch()