import gradio as gr
from transformers import pipeline
import numpy as np 

# Load the Whisper model and pipeline
pipe = pipeline("automatic-speech-recognition", model="openai/whisper-small")

def transcribe(audio):
  """Transcribe the audio using the loaded Whisper pipeline."""
  audio_np = np.frombuffer(audio, dtype=np.int16)
  text = pipe(audio_np)["text"]
  return text


iface = gr.Interface(
    fn=transcribe,
    inputs=gr.Audio(sources=["microphone", "upload"], type="filepath"),
    outputs="text",
    title="Whisper Small ASR",
    description="Transcribe audio using the OpenAI Whisper Small model."
)

iface.launch()