import requests from deep_translator import GoogleTranslator import gradio as gr import soundfile as sf def speech_translation(audio, language): if audio is None: return "No audio input provided!", "No audio input provided!" # Convert audio to .wav format if not already if not audio.endswith(".wav"): wav_data, samplerate = sf.read(audio) sf.write("temp_audio.wav", wav_data, samplerate) audio_file = "temp_audio.wav" else: audio_file = audio # ASR processing files = { 'file': open(audio_file, "rb"), 'language': (None, language), 'vtt': (None, 'true'), } response = requests.post('https://asr.iitm.ac.in/internal/asr/decode', files=files) print(response.json()) try: asr_output = response.json()['transcript'] except: asr_output = "Error in ASR processing" asr_output = asr_output.replace("ред", "") asr_output = asr_output.replace(".", "") translator = GoogleTranslator(source=language, target='en') translation = translator.translate(asr_output) return translation iface = gr.Interface( fn=speech_translation, inputs=[ gr.Audio(type="filepath", label="Record your speech"), gr.Dropdown(["telugu", "hindi", "marathi", "bengali"], label="Select Language") ], outputs=["text"], title="Speech Translation", description="Record your speech and get the English translation.", ) iface.launch(share=True)