File size: 1,358 Bytes
ebcf092
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#based on:
#https://huggingface.co/spaces/Sarath2002/YouTube_Video_Summarizer
#https://huggingface.co/spaces/themanas021/Youtube-Video-Summarizer

from youtube_transcript_api import YouTubeTranscriptApi
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

def Summarizer(link, model):
  
  video_id = link.split("=")[1]

  try:
    transcript = YouTubeTranscriptApi.get_transcript(video_id)
    FinalTranscript = ' '.join([i['text'] for i in transcript])
    
    if model == "Pegasus":
      checkpoint = "google/pegasus-large"
    elif model == "mT5":
      checkpoint = "csebuetnlp/mT5_multilingual_XLSum"
    elif model == "BART":
      checkpoint = "sshleifer/distilbart-cnn-12-6"
      
    tokenizer = AutoTokenizer.from_pretrained(checkpoint)
    model = AutoModelForSeq2SeqLM.from_pretrained(checkpoint)


    inputs = tokenizer(FinalTranscript, 
                    max_length=1024, 
                    truncation=True,
                    return_tensors="pt")
    
    summary_ids = model.generate(inputs["input_ids"])
    summary = tokenizer.batch_decode(summary_ids, 
                                  skip_special_tokens=True, 
                                  clean_up_tokenization_spaces=False)
    

    return summary[0]
  except Exception as e:
    return "TranscriptsDisabled: Transcript is not available \nTry another video"