Spaces:

MK-316
/

oxford5k-audio

File size: 2,438 Bytes

f9a6181
 
 
 
 
 
097a36f
f9a6181
 
8f85a53
f9a6181
 
 
ffb422b
097a36f
f9a6181
 
ffb422b
 
 
 
f9a6181
097a36f
f9a6181
 
097a36f
ffb422b
 
097a36f
 
 
f9a6181
 
 
 
 
 
 
 
 
 
 
 
ffb422b
f9a6181
 
 
ffb422b
 
 
 
1b82471
f9a6181
 
 
ffb422b
f9a6181

import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}

def load_data(level):
    csv_file_path = f"./{csv_files[level]}"
    data = pd.read_csv(csv_file_path)
    return data

def generate_speech(level, x, y, audio_option, pos_filter):
    data = load_data(level)
    
    x, y = int(x), int(y)
    if pos_filter != "Any":
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == pos_filter.lower())]
    else:
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]

    combined_audio = AudioSegment.silent(duration=1000)

    for _, row in filtered_df.iterrows():
        if audio_option == "Audio with number":
            # Correcting the sentence structure to include both SID and the word's details properly
            sentence = f"Number {row['SID']}. {row['WORD']}! {row['WORD']} is a {row['POS']}."
        else:  # "Audio without number"
            sentence = f"{row['WORD']}!"
        
        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
        sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
        combined_audio += sentence_audio + AudioSegment.silent(duration=1500)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()


iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Dropdown(label="Select Level", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
        gr.Number(label="Range: Start Number (x)"),
        gr.Number(label="Range: End Number (y)"),
        gr.Radio(label="Audio Option", choices=["Audio with number", "Audio without number"]),
        gr.Dropdown(label="Select Part of Speech", choices=["ALL", "Noun", "Verb", "Adjective", "Adverb", "Preposition"])  # Corrected line
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
    description="Choose a level, define the starting and ending numbers, select the audio option, and filter by Part of Speech if desired. The system will create a single audio file. After submission, you have the option to download the audio file."
)

iface.launch(share=True, debug=True)