Spaces:

MK-316
/

oxford5k-audio

File size: 2,869 Bytes

f9a6181
 
 
 
 
 
097a36f
f9a6181
 
8f85a53
f9a6181
 
 
ffb422b
097a36f
f9a6181
 
5b088d2
ffb422b
5b088d2
ffb422b
f9a6181
5b088d2
 
f9a6181
 
 
 
 
5b088d2
 
 
 
 
c92892e
5b088d2
 
 
 
 
 
 
 
 
f9a6181
 
 
 
 
 
ffb422b
f9a6181
 
 
ffb422b
5b088d2
 
ffb422b
5b088d2
f9a6181
 
 
ffb422b
f9a6181

import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}

def load_data(level):
    csv_file_path = f"./{csv_files[level]}"
    data = pd.read_csv(csv_file_path)
    return data

def generate_speech(level, x, y, audio_option, pos_filter):
    data = load_data(level)
    
    x, y = int(x), int(y)
    if pos_filter != "ALL":  # If a specific POS is selected, filter by it
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == pos_filter.lower())]
    else:  # Include all words if "ALL" is selected
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]

    if len(filtered_df) == 0:  # Check if the filtered DataFrame is empty
        sentence = f"There is no {pos_filter} in the selected range."
        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
        sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
        combined_audio = sentence_audio
    else:
        combined_audio = AudioSegment.silent(duration=1000)
        for _, row in filtered_df.iterrows():
            if audio_option == "Audio with number":
                sentence = f"Number {row['SID']}. {row['WORD']}!  {row['WORD']} is a {row['POS']}."
            else:  # "Audio without number"
                sentence = f"{row['WORD']}!"
            
            tts = gTTS(text=sentence, lang='en')
            mp3_fp = io.BytesIO()
            tts.write_to_fp(mp3_fp)
            mp3_fp.seek(0)
            sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
            combined_audio += sentence_audio + AudioSegment.silent(duration=1500)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()


iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Dropdown(label="Select Level", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
        gr.Number(label="Start Number (x)"),
        gr.Number(label="End Number (y)"),
        gr.Radio(label="Audio Option", choices=["Audio with number", "Audio without number"]),
        gr.Dropdown(label="Select Part of Speech", choices=["ALL", "Noun", "Verb", "Adjective", "Adverb", "Preposition"])  # Updated POS filter input
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
    description="Choose a level, define the starting and ending numbers, select the audio option, and filter by Part of Speech if desired. The system will create a single audio file. After submission, you have the option to download the audio file."
)

iface.launch(share=True, debug=True)