Spaces:

MK-316
/

oxford5k-audio

App Files Files Community

oxford5k-audio / app.py

MK-316's picture

Update app.py

5b088d2 verified 9 months ago

2.85 kB

	import gradio as gr
	from gtts import gTTS
	from pydub import AudioSegment
	import pandas as pd
	import io

	csv_files = {"A1": "OF3KA1.csv", "A2": "OF3KA2.csv", "B1": "OF3KB1.csv", "B2": "OF3KB2.csv", "C1": "OF3KC1.csv", "5K": "OF5K.csv"}

	def load_data(level):
	csv_file_path = f"./{csv_files[level]}"
	data = pd.read_csv(csv_file_path)
	return data

	def generate_speech(level, x, y, audio_option, pos_filter):
	data = load_data(level)

	x, y = int(x), int(y)
	if pos_filter != "ALL": # If a specific POS is selected, filter by it
	filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y) & (data['POS'].str.lower() == pos_filter.lower())]
	else: # Include all words if "ALL" is selected
	filtered_df = data[(data['SID'] >= x) & (data['SID'] <= y)]

	if len(filtered_df) == 0: # Check if the filtered DataFrame is empty
	sentence = f"There is no {pos_filter} in the selected range."
	tts = gTTS(text=sentence, lang='en')
	mp3_fp = io.BytesIO()
	tts.write_to_fp(mp3_fp)
	mp3_fp.seek(0)
	sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
	combined_audio = sentence_audio
	else:
	combined_audio = AudioSegment.silent(duration=1000)
	for _, row in filtered_df.iterrows():
	if audio_option == "Audio with number":
	sentence = f"Number {row['SID']}. {row['WORD']} is a {row['POS']}."
	else: # "Audio without number"
	sentence = f"{row['WORD']}!"

	tts = gTTS(text=sentence, lang='en')
	mp3_fp = io.BytesIO()
	tts.write_to_fp(mp3_fp)
	mp3_fp.seek(0)
	sentence_audio = AudioSegment.from_file(mp3_fp, format="mp3")
	combined_audio += sentence_audio + AudioSegment.silent(duration=1500)

	mp3_io = io.BytesIO()
	combined_audio.export(mp3_io, format='mp3')
	mp3_io.seek(0)
	return mp3_io.getvalue()


	iface = gr.Interface(
	fn=generate_speech,
	inputs=[
	gr.Dropdown(label="Select Level", choices=['A1', 'A2', 'B1', 'B2', 'C1', '5K']),
	gr.Number(label="Start Number (x)"),
	gr.Number(label="End Number (y)"),
	gr.Radio(label="Audio Option", choices=["Audio with number", "Audio without number"]),
	gr.Dropdown(label="Select Part of Speech", choices=["ALL", "Noun", "Verb", "Adjective", "Adverb", "Preposition"]) # Updated POS filter input
	],
	outputs=gr.Audio(label="Generated Speech"),
	title="Oxford Learner Vocabulary by CEFR levels: Learn with Sound",
	description="Choose a level, define the starting and ending numbers, select the audio option, and filter by Part of Speech if desired. The system will create a single audio file. After submission, you have the option to download the audio file."
	)

	iface.launch(share=True, debug=True)