File size: 2,995 Bytes
a0c3c1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import gradio as gr
import speech_recognition as sr
from Levenshtein import ratio
import tempfile
import numpy as np
import soundfile as sf
import os

def transcribe_audio(file_info):
    r = sr.Recognizer()
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
        sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
        tmpfile.seek(0)
        with sr.AudioFile(tmpfile.name) as source:
            audio_data = r.record(source)
        os.remove(tmpfile.name)  # Cleanup temporary file
    try:
        text = r.recognize_google(audio_data)
        return text
    except sr.UnknownValueError:
        return "Could not understand audio"
    except sr.RequestError as e:
        return f"Could not request results; {e}"

def pronunciation_correction(expected_text, file_info):
    print(f"Expected text: {expected_text}")
    user_spoken_text = transcribe_audio(file_info)
    print(f"User spoken text: {user_spoken_text}")
    
    similarity = ratio(expected_text.lower(), user_spoken_text.lower())
    description = f"{similarity:.2f}"
    
    if similarity >= 0.9:
        feedback = "Excellent pronunciation!"
    elif similarity >= 0.7:
        feedback = "Good pronunciation!"
    elif similarity >= 0.5:
        feedback = "Needs improvement."
    else:
        feedback = "Poor pronunciation, try to focus more on clarity."
    
    print(f"Similarity: {similarity}, Feedback: {feedback}")
    return feedback, description

def validate_sentence(sentence):
    if not sentence.strip():
        return "Please enter a sentence."
    return sentence

def download_audio(file_info):
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
        sf.write(tmpfile.name, data=file_info[1], samplerate=44100, format='WAV')
        tmpfile.seek(0)
        return tmpfile.name  # Return file path for download

with gr.Blocks() as app:
    with gr.Row():
        sentence_input = gr.Textbox(label="Enter Your Sentence Here")
    validated_sentence = gr.Textbox(label="Valid Sentence", interactive=False)
    audio_input = gr.Audio(label="Upload or Record Audio File", type="numpy")
    check_pronunciation_button = gr.Button("Check Pronunciation")
    pronunciation_feedback = gr.Textbox(label="Pronunciation Feedback")
    pronunciation_score = gr.Number(label="Pronunciation Accuracy Score: 0 (No Match) ~ 1 (Perfect)")
    download_audio_button = gr.Button("Download Recording")
    download_output = gr.File(label="Download Your Recording")

    sentence_input.change(
        validate_sentence,
        inputs=sentence_input,
        outputs=validated_sentence
    )

    check_pronunciation_button.click(
        pronunciation_correction,
        inputs=[validated_sentence, audio_input],
        outputs=[pronunciation_feedback, pronunciation_score]
    )

    download_audio_button.click(
        download_audio,
        inputs=[audio_input],
        outputs=download_output
    )

app.launch(debug=True)