|
import gradio as gr |
|
import whisper |
|
import subprocess |
|
import os |
|
import tempfile |
|
from pytube import YouTube |
|
|
|
def download_video_from_link(link): |
|
temp_dir = tempfile.mkdtemp() |
|
yt = YouTube(link) |
|
video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first() |
|
video_path = video.download(output_path=temp_dir) |
|
return video_path, yt.title |
|
|
|
def format_timestamp(seconds): |
|
"""Converte segundos para o formato HH:MM:SS""" |
|
hours = int(seconds // 3600) |
|
minutes = int((seconds % 3600) // 60) |
|
seconds = int(seconds % 60) |
|
return f"{hours:02d}:{minutes:02d}:{seconds:02d}" |
|
|
|
def transcribe_video(video_file): |
|
with tempfile.TemporaryDirectory() as temp_dir: |
|
if isinstance(video_file, str): |
|
video_path, title = download_video_from_link(video_file) |
|
else: |
|
video_path = os.path.join(temp_dir, "uploaded_video.mp4") |
|
with open(video_path, 'wb') as file: |
|
file.write(video_file) |
|
|
|
audio_path = os.path.join(temp_dir, "temp_audio.wav") |
|
command = f"ffmpeg -i \"{video_path}\" -acodec pcm_s16le -ar 16000 -ac 1 \"{audio_path}\"" |
|
subprocess.run(command, shell=True) |
|
|
|
model = whisper.load_model("base") |
|
result = model.transcribe(audio_path) |
|
|
|
transcription_with_timestamps = "" |
|
transcription_plain = "" |
|
if "segments" in result: |
|
for segment in result["segments"]: |
|
start_time = format_timestamp(segment["start"]) |
|
end_time = format_timestamp(segment["end"]) |
|
text = segment["text"] |
|
transcription_with_timestamps += f"[{start_time} - {end_time}] {text}\n" |
|
transcription_plain += f"{text} " |
|
else: |
|
transcription_plain = result["text"] |
|
|
|
return transcription_with_timestamps, transcription_plain.strip() |
|
|
|
def clear_fields(): |
|
return "", "" |
|
|
|
def start_transcription_interface(): |
|
css = """ |
|
body { font-family: Arial, sans-serif; } |
|
.gr-button { background-color: #4CAF50; color: white; border: none; padding: 10px 24px; border-radius: 4px; cursor: pointer; } |
|
.gr-button:hover { background-color: #45a049; } |
|
.gr-textbox { border-radius: 4px; border: 1px solid #ccc; padding: 10px; } |
|
.gr-row { margin-bottom: 20px; } |
|
.gr-markdown a { color: #3182ce; } |
|
.show-api {display: none !important;} .built-with {display: none !important;} |
|
""" |
|
|
|
with gr.Blocks(css=css) as app: |
|
gr.Markdown("### Transcritor de Vídeo ou Áudio em Texto") |
|
|
|
with gr.Row(): |
|
upload = gr.File(label="Fazer upload do vídeo", type="binary") |
|
link_input = gr.Textbox(label="Ou insira o link do vídeo") |
|
|
|
|
|
gr.Markdown("[Baixar vídeos do Youtube, Facebook e Instagram](https://ummy.net/pt33ZN/)") |
|
submit_btn = gr.Button("Transcrever vídeo ou áudio p/ texto") |
|
clear_btn = gr.Button("Limpar os Campos") |
|
output_with_timestamps = gr.Textbox(label="Transcrição com tempo", lines=6) |
|
output_plain = gr.Textbox(label="Transcrição sem tempo", lines=4) |
|
|
|
def handle_input(upload_file, link): |
|
if upload_file is not None: |
|
return transcribe_video(upload_file) |
|
elif link: |
|
return transcribe_video(link) |
|
else: |
|
return "Por favor, faça upload de um vídeo ou insira um link.", "" |
|
|
|
submit_btn.click(fn=handle_input, inputs=[upload, link_input], outputs=[output_with_timestamps, output_plain]) |
|
clear_btn.click(fn=clear_fields, inputs=[], outputs=[output_with_timestamps, output_plain]) |
|
|
|
app.launch(share=True) |
|
|
|
start_transcription_interface() |