File size: 2,869 Bytes
f9a6181
 
 
 
 
 
097a36f
f9a6181
 
8f85a53
f9a6181
 
 
ffb422b
097a36f
f9a6181
 
5b088d2
ffb422b
5b088d2
ffb422b
f9a6181
5b088d2
 
f9a6181
 
 
 
 
5b088d2
 
 
 
 
c92892e
5b088d2
 
 
 
 
 
 
 
 
f9a6181
 
 
 
 
 
ffb422b
f9a6181
 
 
ffb422b
5b088d2
 
ffb422b
5b088d2
f9a6181
 
 
ffb422b
f9a6181
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}

def load_data(level):
    csv_file_path = f"./{csv_files[level]}"
    data = pd.read_csv(csv_file_path)
    return data

def generate_speech(level, x, y, audio_option, pos_filter):
    data = load_data(level)
    
    x, y = int(x), int(y)
    if pos_filter != "ALL":  # If a specific POS is selected, filter by it
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == pos_filter.lower())]
    else:  # Include all words if "ALL" is selected
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]

    if len(filtered_df) == 0:  # Check if the filtered DataFrame is empty
        sentence = f"There is no {pos_filter} in the selected range."
        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
        sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
        combined_audio = sentence_audio
    else:
        combined_audio = AudioSegment.silent(duration=1000)
        for _, row in filtered_df.iterrows():
            if audio_option == "Audio with number":
                sentence = f"Number {row['SID']}. {row['WORD']}!  {row['WORD']} is a {row['POS']}."
            else:  # "Audio without number"
                sentence = f"{row['WORD']}!"
            
            tts = gTTS(text=sentence, lang='en')
            mp3_fp = io.BytesIO()
            tts.write_to_fp(mp3_fp)
            mp3_fp.seek(0)
            sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
            combined_audio += sentence_audio + AudioSegment.silent(duration=1500)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()


iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Dropdown(label="Select Level", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
        gr.Number(label="Start Number (x)"),
        gr.Number(label="End Number (y)"),
        gr.Radio(label="Audio Option", choices=["Audio with number", "Audio without number"]),
        gr.Dropdown(label="Select Part of Speech", choices=["ALL", "Noun", "Verb", "Adjective", "Adverb", "Preposition"])  # Updated POS filter input
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
    description="Choose a level, define the starting and ending numbers, select the audio option, and filter by Part of Speech if desired. The system will create a single audio file. After submission, you have the option to download the audio file."
)

iface.launch(share=True, debug=True)