call-sentiment / app.py
ktangri
Add punctuation correction
eb6ba59
raw
history blame
1.01 kB
import gradio as gr
from transformers import pipeline, Wav2Vec2ProcessorWithLM
from librosa import load, resample
from rpunct import RestorePuncts
asr_model = 'patrickvonplaten/wav2vec2-base-100h-with-lm'
processor = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model)
asr = pipeline('automatic-speech-recognition', model=asr_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder)
rpunct = RestorePuncts()
def transcribe(filepath):
speech, sampling_rate = load(filepath)
if sampling_rate != 16000:
speech = resample(speech, sampling_rate, 16000)
text = asr(speech)['text']
text = rpunct.punctuate(text.lower())
return text
mic = gr.inputs.Audio(source='microphone', type='filepath', label='Speech input', optional=False)
transcript = gr.outputs.Textbox(type='auto', label='Transcription')
iface = gr.Interface(
theme='huggingface',
description='Testing transcription',
fn=transcribe,
inputs=[mic],
outputs=[transcript]
)
iface.launch()