SiteOn's picture
Upload 2 files
4662286 verified
raw
history blame contribute delete
No virus
3.78 kB
import gradio as gr
import whisper
import subprocess
import os
import tempfile
from pytube import YouTube
def download_video_from_link(link):
temp_dir = tempfile.mkdtemp()
yt = YouTube(link)
video = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
video_path = video.download(output_path=temp_dir)
return video_path, yt.title
def format_timestamp(seconds):
"""Converte segundos para o formato HH:MM:SS"""
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = int(seconds % 60)
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
def transcribe_video(video_file):
with tempfile.TemporaryDirectory() as temp_dir:
if isinstance(video_file, str):
video_path, title = download_video_from_link(video_file)
else:
video_path = os.path.join(temp_dir, "uploaded_video.mp4")
with open(video_path, 'wb') as file:
file.write(video_file)
audio_path = os.path.join(temp_dir, "temp_audio.wav")
command = f"ffmpeg -i \"{video_path}\" -acodec pcm_s16le -ar 16000 -ac 1 \"{audio_path}\""
subprocess.run(command, shell=True)
model = whisper.load_model("base")
result = model.transcribe(audio_path)
transcription_with_timestamps = ""
transcription_plain = ""
if "segments" in result:
for segment in result["segments"]:
start_time = format_timestamp(segment["start"])
end_time = format_timestamp(segment["end"])
text = segment["text"]
transcription_with_timestamps += f"[{start_time} - {end_time}] {text}\n"
transcription_plain += f"{text} "
else:
transcription_plain = result["text"]
return transcription_with_timestamps, transcription_plain.strip()
def clear_fields():
return "", ""
def start_transcription_interface():
css = """
body { font-family: Arial, sans-serif; }
.gr-button { background-color: #4CAF50; color: white; border: none; padding: 10px 24px; border-radius: 4px; cursor: pointer; }
.gr-button:hover { background-color: #45a049; }
.gr-textbox { border-radius: 4px; border: 1px solid #ccc; padding: 10px; }
.gr-row { margin-bottom: 20px; }
.gr-markdown a { color: #3182ce; }
.show-api {display: none !important;} .built-with {display: none !important;}
"""
with gr.Blocks(css=css) as app:
gr.Markdown("### Transcritor de Vídeo ou Áudio em Texto")
with gr.Row():
upload = gr.File(label="Fazer upload do vídeo", type="binary")
link_input = gr.Textbox(label="Ou insira o link do vídeo")
gr.Markdown("[Baixar vídeos do Youtube, Facebook e Instagram](https://ummy.net/pt33ZN/)")
submit_btn = gr.Button("Transcrever vídeo ou áudio p/ texto")
clear_btn = gr.Button("Limpar os Campos")
output_with_timestamps = gr.Textbox(label="Transcrição com tempo", lines=6)
output_plain = gr.Textbox(label="Transcrição sem tempo", lines=4)
def handle_input(upload_file, link):
if upload_file is not None:
return transcribe_video(upload_file)
elif link:
return transcribe_video(link)
else:
return "Por favor, faça upload de um vídeo ou insira um link.", ""
submit_btn.click(fn=handle_input, inputs=[upload, link_input], outputs=[output_with_timestamps, output_plain])
clear_btn.click(fn=clear_fields, inputs=[], outputs=[output_with_timestamps, output_plain])
app.launch(share=True)
start_transcription_interface()