Spaces:

bori0824
/

CTQAndSpeechFeedback

File size: 2,995 Bytes

a0c3c1e

import gradio as gr
import speech_recognition as sr
from Levenshtein import ratio
import tempfile
import numpy as np
import soundfile as sf
import os

def transcribe_audio(file_info):
    r = sr.Recognizer()
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
        sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
        tmpfile.seek(0)
        with sr.AudioFile(tmpfile.name) as source:
            audio_data = r.record(source)
        os.remove(tmpfile.name)  # Cleanup temporary file
    try:
        text = r.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results; {e}"

def pronunciation_correction(expected_text, file_info):
    print(f"Expected text: {expected_text}")
    user_spoken_text = transcribe_audio(file_info)
    print(f"User spoken text: {user_spoken_text}")
    
    similarity = ratio(expected_text.lower(), user_spoken_text.lower())
    description = f"{similarity:.2f}"
    
    if similarity >= 0.9:
        feedback = "Excellent pronunciation!"
    elif similarity >= 0.7:
        feedback = "Good pronunciation!"
    elif similarity >= 0.5:
        feedback = "Needs improvement."
    else:
        feedback = "Poor pronunciation, try to focus more on clarity."
    
    print(f"Similarity: {similarity}, Feedback: {feedback}")
    return feedback, description

def validate_sentence(sentence):
    if not sentence.strip():
        return "Please enter a sentence."
    return sentence

def download_audio(file_info):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
        sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
        tmpfile.seek(0)
        return tmpfile.name  # Return file path for download

with gr.Blocks() as app:
    with gr.Row():
        sentence_input = gr.Textbox(label="Enter Your Sentence Here")
    validated_sentence = gr.Textbox(label="Valid Sentence", interactive=False)
    audio_input = gr.Audio(label="Upload or Record Audio File", type="numpy")
    check_pronunciation_button = gr.Button("Check Pronunciation")
    pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback")
    pronunciation_score = gr.Number(label="Pronunciation Accuracy Score: 0 (No Match) ~ 1 (Perfect)")
    download_audio_button = gr.Button("Download Recording")
    download_output = gr.File(label="Download Your Recording")

    sentence_input.change(
        validate_sentence,
        inputs=sentence_input,
        outputs=validated_sentence
    )

    check_pronunciation_button.click(
        pronunciation_correction,
        inputs=[validated_sentence, audio_input],
        outputs=[pronunciation_feedback, pronunciation_score]
    )

    download_audio_button.click(
        download_audio,
        inputs=[audio_input],
        outputs=download_output
    )

app.launch(debug=True)