|
import gradio as gr |
|
import speech_recognition as sr |
|
from Levenshtein import ratio |
|
import tempfile |
|
import numpy as np |
|
import soundfile as sf |
|
import os |
|
|
|
def transcribe_audio(file_info): |
|
r = sr.Recognizer() |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile: |
|
sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV') |
|
tmpfile.seek(0) |
|
with sr.AudioFile(tmpfile.name) as source: |
|
audio_data = r.record(source) |
|
os.remove(tmpfile.name) |
|
try: |
|
text = r.recognize_google(audio_data) |
|
return text |
|
except sr.UnknownValueError: |
|
return "Could not understand audio" |
|
except sr.RequestError as e: |
|
return f"Could not request results; {e}" |
|
|
|
def pronunciation_correction(expected_text, file_info): |
|
print(f"Expected text: {expected_text}") |
|
user_spoken_text = transcribe_audio(file_info) |
|
print(f"User spoken text: {user_spoken_text}") |
|
|
|
similarity = ratio(expected_text.lower(), user_spoken_text.lower()) |
|
description = f"{similarity:.2f}" |
|
|
|
if similarity >= 0.9: |
|
feedback = "Excellent pronunciation!" |
|
elif similarity >= 0.7: |
|
feedback = "Good pronunciation!" |
|
elif similarity >= 0.5: |
|
feedback = "Needs improvement." |
|
else: |
|
feedback = "Poor pronunciation, try to focus more on clarity." |
|
|
|
print(f"Similarity: {similarity}, Feedback: {feedback}") |
|
return feedback, description |
|
|
|
def validate_sentence(sentence): |
|
if not sentence.strip(): |
|
return "Please enter a sentence." |
|
return sentence |
|
|
|
def download_audio(file_info): |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile: |
|
sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV') |
|
tmpfile.seek(0) |
|
return tmpfile.name |
|
|
|
with gr.Blocks() as app: |
|
with gr.Row(): |
|
sentence_input = gr.Textbox(label="Enter Your Sentence Here") |
|
validated_sentence = gr.Textbox(label="Valid Sentence", interactive=False) |
|
audio_input = gr.Audio(label="Upload or Record Audio File", type="numpy") |
|
check_pronunciation_button = gr.Button("Check Pronunciation") |
|
pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback") |
|
pronunciation_score = gr.Number(label="Pronunciation Accuracy Score: 0 (No Match) ~ 1 (Perfect)") |
|
download_audio_button = gr.Button("Download Recording") |
|
download_output = gr.File(label="Download Your Recording") |
|
|
|
sentence_input.change( |
|
validate_sentence, |
|
inputs=sentence_input, |
|
outputs=validated_sentence |
|
) |
|
|
|
check_pronunciation_button.click( |
|
pronunciation_correction, |
|
inputs=[validated_sentence, audio_input], |
|
outputs=[pronunciation_feedback, pronunciation_score] |
|
) |
|
|
|
download_audio_button.click( |
|
download_audio, |
|
inputs=[audio_input], |
|
outputs=download_output |
|
) |
|
|
|
app.launch(debug=True) |
|
|