oxford5k-audio / app.py
MK-316's picture
Update app.py
8f85a53 verified
raw
history blame
2.01 kB
import gradio as gr
from gtts import gTTS
from pydub import AudioSegment
import pandas as pd
import io
csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}
def load_data(level):
csv_file_path = f"./{csv_files[level]}"
data = pd.read_csv(csv_file_path)
return data
def generate_speech(level, x, y, audio_option):
data = load_data(level)
x, y = int(x), int(y)
filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]
combined_audio = AudioSegment.silent(duration=1000)
for _, row in filtered_df.iterrows():
if audio_option == "Audio with number":
sentence = f"Number {row['SID']}. {row['SID']} is {row['POS']}."
else: # "Audio without number"
sentence = f"{row['WORD']}!"
tts = gTTS(text=sentence, lang='en')
mp3_fp = io.BytesIO()
tts.write_to_fp(mp3_fp)
mp3_fp.seek(0)
sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
combined_audio += sentence_audio + AudioSegment.silent(duration=1500)
mp3_io = io.BytesIO()
combined_audio.export(mp3_io, format='mp3')
mp3_io.seek(0)
return mp3_io.getvalue()
iface = gr.Interface(
fn=generate_speech,
inputs=[
gr.Dropdown(label="Select Level (3K: A1, A2, B1, B2, C1; 5K: additional B2 and C1)", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
gr.Number(label="Start Number (x)"),
gr.Number(label="End Number (y)"),
gr.Radio(label="Audio Option", choices=["Audio with number", "Audio without number"])
],
outputs=gr.Audio(label="Generated Speech"),
title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
description="Choose a level, define the starting and ending numbers, and select the audio option. The system will create a single audio file. After submission, you have the option to download the audio file."
)
iface.launch(share=True, debug=True)