Spaces:

ktangri
/

call-sentiment

Runtime error

App Files Files Community

call-sentiment / app.py

ktangri

Adding speaker segmentation

c150302 over 2 years ago

raw

history blame

No virus

1.75 kB

	import gradio as gr
	from transformers import pipeline, Wav2Vec2ProcessorWithLM
	from pyannote.audio import Pipeline
	from librosa import load, resample
	from rpunct import RestorePuncts

	asr_model = 'patrickvonplaten/wav2vec2-base-100h-with-lm'
	processor = Wav2Vec2ProcessorWithLM.from_pretrained(asr_model)
	asr = pipeline('automatic-speech-recognition', model=asr_model, tokenizer=processor.tokenizer, feature_extractor=processor.feature_extractor, decoder=processor.decoder)
	speaker_segmentation = Pipeline.from_pretrained("pyannote/speaker-segmentation")

	rpunct = RestorePuncts()

	def transcribe(filepath):
	speech, sampling_rate = load(filepath)
	if sampling_rate != 16000:
	speech = resample(speech, sampling_rate, 16000)
	speaker_output = speaker_segmentation(speech)
	text = asr(speech, return_timestamps="word")

	full_text = text['text'].lower()
	chunks = text['chunks']

	diarizaed_output = ""
	i = 0
	for turn, _, speaker in speaker_output.itertracks(yield_label=True):
	diarized = ""
	while i < len(chunks) and chunks[i]['timestamp'][1] <= turn.end:
	diarized += chunks[i]['text'].lower() + ' '
	i += 1

	if diarized != "":
	diarized = rpunct.punctuate(diarized)
	diarized_output += "{}: ''{}'' from {:.3f}-{:.3f}\n".format(speaker,diarized,turn.start,turn.end)

	return diarizaed_output, full_text

	mic = gr.inputs.Audio(source='microphone', type='filepath', label='Speech input', optional=False)

	diarized_transcript = gr.outputs.Textbox(type='auto', label='Diarized Output')
	full_transcript = gr.outputs.Textbox(type='auto', label='Full Transcript')

	iface = gr.Interface(
	theme='huggingface',
	description='Testing transcription',
	fn=transcribe,
	inputs=[mic],
	outputs=[diarized_transcript, full_transcript]
	)
	iface.launch()