File size: 1,848 Bytes
b8b135b
c2110e8
e559d03
b8b135b
e559d03
8549c9b
e559d03
 
 
 
 
 
 
 
 
 
 
 
77fc3c3
e559d03
77fc3c3
accb4e2
77fc3c3
 
accb4e2
77fc3c3
 
c2110e8
accb4e2
 
 
77fc3c3
e559d03
 
 
accb4e2
 
 
e559d03
accb4e2
 
 
e559d03
 
 
 
 
 
accb4e2
5440420
e559d03
accb4e2
e559d03
c1335fa
accb4e2
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import gradio as gr
import torch
from transformers import pipeline
from timestamp import format_timestamp

MODEL_NAME = "openai/whisper-medium"
BATCH_SIZE = 8

device = 0 if torch.cuda.is_available() else "cpu"

pipe = pipeline(
    task="automatic-speech-recognition",
    model=MODEL_NAME,
    chunk_length_s=30,
    device=device,
)

def transcribe(file, task, return_timestamps):
    outputs = pipe(file, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)
    text = outputs["text"]
    timestamps = outputs["chunks"]

    if return_timestamps==True:
      timestamps = [f"[{format_timestamp(chunk['timestamp'][0])} -> {format_timestamp(chunk['timestamp'][1])}] {chunk['text']}" for chunk in timestamps]

    else:
      timestamps = [f"{chunk['text']}" for chunk in timestamps]

    text = "<br>".join(str(feature) for feature in timestamps)
    text = f"<h4>Transcription</h4><div style='overflow-y: scroll; height: 400px;'>{text}</div>"
    return file, text

file_transcribe = gr.Interface(
    fn=transcribe,
    inputs=[
        gr.Audio(source="upload", label="Audio file", type="filepath"),
        gr.Radio(["transcribe"], label="Task", value="transcribe"),
        gr.Checkbox(value=True, label="Return timestamps"),
    ],
    outputs= [gr.Audio(label="Processed Audio", type="filepath"),
        gr.outputs.HTML("text")
        ],
    title="Whisper Demo: Transcribe Audio",
    description=(
        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses the"
        f" checkpoint [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe audio files"
        " of arbitrary length."
    ),

    cache_examples=True,
    allow_flagging="never",

)

file_transcribe.queue(concurrency_count=3)
file_transcribe.launch(share=True, debug = True)