import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io

csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}

def load_data(level):
    csv_file_path = f"./{csv_files[level]}"
    data = pd.read_csv(csv_file_path)
    return data

def generate_speech(t1, level, t2, x, y, t3, audio_option, t4, all_pos, noun, verb, adjective, adverb, preposition):
    data = load_data(level)
    
    x, y = int(x), int(y)

    if all_pos:  # Include all words if "ALL" is selected
        filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]
    else:
        # Initialize an empty DataFrame to accumulate results
        filtered_df = pd.DataFrame()

        # Check each selected POS and append matches to the filtered_df
        if noun:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'noun')])
        if verb:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'verb')])
        if adjective:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'adjective')])
        if adverb:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'adverb')])
        if preposition:
            filtered_df = filtered_df.append(data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == 'preposition')])
    
    # Ensure the filtered_df is unique in case of overlapping conditions
    filtered_df = filtered_df.drop_duplicates()


    # Proceed with generating the speech
    combined_audio = AudioSegment.silent(duration=1000)  # Initial silence
    if filtered_df.empty:
        sentence = "No matching words found."
        tts = gTTS(text=sentence, lang='en')
        mp3_fp = io.BytesIO()
        tts.write_to_fp(mp3_fp)
        mp3_fp.seek(0)
    else:
        for _, row in filtered_df.iterrows():
            # Adjust the sentence based on the audio_option
            if audio_option == "Audio with POS":
                sentence = f"{row['WORD']} is {row['POS']}"
            elif audio_option == "Audio with number/POS":
                sentence = f"{row['SID']}. {row['WORD']}. {row['WORD']} is {row['POS']}"
            else:  # "Word only"
                sentence = f"{row['WORD']}"
            
            tts = gTTS(text=sentence, lang='en')
            mp3_fp = io.BytesIO()
            tts.write_to_fp(mp3_fp)
            mp3_fp.seek(0)
            sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
            # Add a pause after each word
            combined_audio += sentence_audio + AudioSegment.silent(duration=1000)

    mp3_io = io.BytesIO()
    combined_audio.export(mp3_io, format='mp3')
    mp3_io.seek(0)
    return mp3_io.getvalue()


# Adjust inputs for POS with checkboxes
iface = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Markdown("#### [1] Select Level"),  # Adding a label for POS options
        gr.Dropdown(label="Select Level", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
        gr.Markdown("#### [2] Select Range: A1(1~734), A2(819), B1(769), B2(717), C1(1392), 5K(1392)"),
        gr.Number(label= "Start Number (x)"),
        gr.Number(label = "End Number (y)"),
        gr.Markdown("#### [3] Audio options (text): Two, 'able'. 'Able' is noun."),
        gr.Radio(label="Audio Option", choices=["Audio with number/POS", "Audio with POS", "Word only"]),
        gr.Markdown("#### Select Part of Speech"),  # Adding a label for POS options
        gr.Checkbox(label="Any", value=False),
        gr.Checkbox(label="Noun", value=False),
        gr.Checkbox(label="Verb", value=False),
        gr.Checkbox(label="Adjective", value=False),
        gr.Checkbox(label="Adverb", value=False),
        gr.Checkbox(label="Preposition", value=False)
    ],
    outputs=gr.Audio(label="Generated Speech"),
    title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
    description="Choose a level, define the starting and ending numbers, select the audio option, and filter by Part of Speech if desired. The system will create a single audio file. After submission, you have the option to download the audio file."
)

iface.launch(share=True, debug=True)