YouTubeSummary / app.py
WildKratts
Edited Prompt
371ac33
raw
history blame
No virus
5.93 kB
import openai
from openai import Audio
import os
import yt_dlp as youtube_dl
import guidance
from moviepy.editor import AudioFileClip
import streamlit as st
import os
output_dir = "./Chunks"
output_file = "video_audio.mp3"
chunk_length = 120 * 15
transcripts = []
folder_path = "./Chunks"
audio_file_path = "./.DownloadedAudio"
summary_llm = guidance.llms.OpenAI('gpt-3.5-turbo-16k-0613', caching=False)
st.set_page_config(
page_title='YouTube Video to Summary',
page_icon='🐾',
initial_sidebar_state="auto",
layout="wide"
)
st.markdown("<h1 style='text-align: center; color: white;'>YouTube Video to Summary</h1>", unsafe_allow_html=True)
st.sidebar.subheader("Enter Your API Key πŸ—οΈ")
open_api_key = st.sidebar.text_input(
"Open API Key",
value=st.session_state.get('open_api_key', ''),
help="Get your API key from https://openai.com/",
type='password'
)
st.session_state['open_api_key'] = open_api_key
#load_dotenv(find_dotenv())
openai.api_key = open_api_key
#Function to download the audio from a youtube video
def download_audio(url, output_file):
ydl_opts = {
'format': 'bestaudio/best',
'outtmpl': '.DownloadedAudio/' + output_file,
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': '192'
}],
#'ffmpeg_location': "C:/ffmpeg/bin"
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
print("Download Complete")
# Function to split the audio file into smaller chunks
def split_audio_file(audio_file_path, chunk_length, output_dir):
print("== Splitting audio...")
audio = AudioFileClip(audio_file_path)
duration = audio.duration
chunks = []
start_time = 0
while start_time < duration:
end_time = min(start_time + chunk_length, duration)
chunk = audio.subclip(start_time, end_time)
chunk_file = os.path.join(output_dir, f"chunk_{start_time}-{end_time}.mp3")
chunk.write_audiofile(chunk_file)
chunks.append(chunk_file)
start_time += chunk_length
return chunks
def transcribe_audio(audio_file_path):
with open(audio_file_path, "rb") as audio_file:
transcript = openai.Audio.transcribe("whisper-1", audio_file)
print(transcript['text'])
return transcript['text']
def transcribe_audio_dir(output_dir):
global transcripts
for filename in os.listdir(output_dir):
if filename.endswith(".mp3"):
file_path = os.path.join(output_dir, filename)
transcript = transcribe_audio(file_path)
summary = generate_summary(transcript)
transcripts.append(summary)
os.remove(file_path)
print("Chunk Transcription and Summarization Complete")
print(transcripts)
#Summarize the transcripts using the LLM and write to a file
output_path = os.path.join(os.getcwd(), "Transcripts", "summary.txt")
with open(output_path, "w", encoding="utf-8") as file:
for transcript in transcripts:
file.write(transcript + "\n")
return transcripts
def generate_summary(text):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo-16k-0613",
messages=[
{"role": "system", "content": "You are a world's best summarizer. Condense the transcript text, capturing essential points and core points. Include relevant examples, omit excess details, and ensure the summary's length matches the original's complexity."},
{"role": "user", "content": f"Please summarize the following text:\n{text}\nSummary:"},
],
max_tokens=11000,
stop=None,
temperature=0.2,
)
summary = response['choices'][0]['message']['content'].strip()
return summary
#download_audio(url, output_file)
import gradio as gr
def main():
source = st.radio("Select audio source", ["YouTube Video", "Audio File"])
if source == "YouTube Video":
url = st.text_input(label="Video URL")
audio_file = None
else: # Audio File
audio_file = st.file_uploader("Upload audio file", type=["mp3", "wav"])
url = None
chunk_length = st.number_input(label="Chunk Length (seconds)", value=900, step=1)
return url, "default_output", chunk_length, audio_file
def download_and_split_video(url, output_file, chunk_length, transcripts):
download_audio(url, output_file)
audio_file_path = '.DownloadedAudio/' + f"{output_file}.mp3"
split_audio_file(audio_file_path, chunk_length, output_dir)
os.remove(audio_file_path)
return transcribe_audio_dir(output_dir)
#return transcribe_audio_dir(transcripts)
if __name__ == "__main__":
url, output_file, chunk_length, audio_file = main() # get the values here
if open_api_key: # Check if API key is provided
if st.button("Generate Summary"):
with st.spinner("Summary Generating..."):
if url: # If YouTube URL is provided
transcripts = download_and_split_video(url, output_file, chunk_length, transcripts)
elif audio_file: # If an audio file is uploaded
audio_file_path = os.path.join('.DownloadedAudio/', output_file)
with open(audio_file_path, "wb") as f:
f.write(audio_file.getvalue()) # Write the content of the uploaded file to a new file
split_audio_file(audio_file_path, chunk_length, output_dir)
transcripts = transcribe_audio_dir(output_dir)
os.remove(audio_file_path) # Delete saved audio file
st.subheader("Summary")
for transcript in transcripts: # Loop through the transcripts list
st.write(transcript)
else: # If API key is not provided
st.warning("Please Enter OpenAI API Key") # Display warning message