Spaces:

MK-316
/

oxford5k-audio

File size: 4,437 Bytes

f9a6181
 
 
 
 
 
097a36f
f9a6181
 
8f85a53
f9a6181
 
 
3d3b58a
097a36f
f9a6181
 
3d3b58a
 
ffb422b
3d3b58a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f9a6181
3d3b58a
 
 
 
f9a6181
 
 
 
5b088d2
 
3d3b58a
09aa674
3d3b58a
09aa674
3d3b58a
 
 
5b088d2
 
 
 
 
 
3d3b58a
 
f9a6181
 
 
 
 
 
ffb422b
3d3b58a
f9a6181
 
 
3d3b58a
ffb422b
9f0ed87
3d3b58a
 
fcac469
1798aa6
3d3b58a
 
 
 
 
 
 
f9a6181
 
 
ffb422b
f9a6181

import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}

def load_data(level):
    csv_file_path = f"./{csv_files[level]}"
    data = pd.read_csv(csv_file_path)
    return data

def generate_speech(t1, level, t2, x, y, t3, audio_option, t4, all_pos, noun, verb, adjective, adverb, preposition):
    data = load_data(level)
    
    x, y = int(x), int(y)

    if all_pos:  # Include all words if "ALL" is selected
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]
    else:
        # Initialize an empty DataFrame to accumulate results
        filtered_df = pd.DataFrame()

        # Check each selected POS and append matches to the filtered_df
        if noun:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'noun')])
        if verb:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'verb')])
        if adjective:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'adjective')])
        if adverb:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'adverb')])
        if preposition:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'preposition')])
    
    # Ensure the filtered_df is unique in case of overlapping conditions
    filtered_df = filtered_df.drop_duplicates()


    # Proceed with generating the speech
    combined_audio = AudioSegment.silent(duration=1000)  # Initial silence
    if filtered_df.empty:
        sentence = "No matching words found."
        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
    else:
        for _, row in filtered_df.iterrows():
            # Adjust the sentence based on the audio_option
            if audio_option == "Audio with POS":
                sentence = f"{row['WORD']} is {row['POS']}"
            elif audio_option == "Audio with number/POS":
                sentence = f"{row['SID']}. {row['WORD']}. {row['WORD']} is {row['POS']}"
            else:  # "Word only"
                sentence = f"{row['WORD']}"
            
            tts = gTTS(text=sentence, lang='en')
            mp3_fp = io.BytesIO()
            tts.write_to_fp(mp3_fp)
            mp3_fp.seek(0)
            sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
            # Add a pause after each word
            combined_audio += sentence_audio + AudioSegment.silent(duration=1000)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()


# Adjust inputs for POS with checkboxes
iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Markdown("#### [1] Select Level"),  # Adding a label for POS options
        gr.Dropdown(label="Select Level", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
        gr.Markdown("#### [2] Select Range: A1(1~734), A2(819), B1(769), B2(717), C1(1392), 5K(1392)"),
        gr.Number(label= "Start Number (x)"),
        gr.Number(label = "End Number (y)"),
        gr.Markdown("#### [3] Audio options (text): Two, 'able'. 'Able' is noun."),
        gr.Radio(label="Audio Option", choices=["Audio with number/POS", "Audio with POS", "Word only"]),
        gr.Markdown("#### Select Part of Speech"),  # Adding a label for POS options
        gr.Checkbox(label="Any", value=False),
        gr.Checkbox(label="Noun", value=False),
        gr.Checkbox(label="Verb", value=False),
        gr.Checkbox(label="Adjective", value=False),
        gr.Checkbox(label="Adverb", value=False),
        gr.Checkbox(label="Preposition", value=False)
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
    description="Choose a level, define the starting and ending numbers, select the audio option, and filter by Part of Speech if desired. The system will create a single audio file. After submission, you have the option to download the audio file."
)

iface.launch(share=True, debug=True)