mrsk1883 commited on
Commit
797680d
1 Parent(s): 2a10acb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -8
app.py CHANGED
@@ -1,14 +1,12 @@
1
  import gradio as gr
2
  from PyPDF2 import PdfReader
3
- from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
4
  from gtts import gTTS
5
  from io import BytesIO
6
  import re
7
  import os
8
 
9
- model_name = "pszemraj/led-base-book-summary"
10
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
- tokenizer = AutoTokenizer.from_pretrained(model_name)
12
 
13
  def extract_abstract_and_summarize(pdf_file):
14
  try:
@@ -30,9 +28,17 @@ def extract_abstract_and_summarize(pdf_file):
30
  break
31
 
32
  # Summarize the extracted abstract
33
- inputs = tokenizer(abstract_text, return_tensors="pt")
34
- outputs = model.generate(**inputs, max_length=50, min_length=30)
35
- summary = tokenizer.decode(outputs[0])
 
 
 
 
 
 
 
 
36
 
37
  # Generate audio
38
  speech = gTTS(text=summary, lang="en")
@@ -50,7 +56,8 @@ interface = gr.Interface(
50
  inputs=[gr.File(label="Upload PDF")],
51
  outputs=[gr.Textbox(label="Summary"), gr.Audio()],
52
  title="PDF Summarization & Audio Tool",
53
- description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it in one sentence with information till "Introduction", and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
54
  )
55
 
56
  interface.launch()
 
 
1
  import gradio as gr
2
  from PyPDF2 import PdfReader
3
+ from transformers import pipeline
4
  from gtts import gTTS
5
  from io import BytesIO
6
  import re
7
  import os
8
 
9
+ summarizer = pipeline("summarization")
 
 
10
 
11
  def extract_abstract_and_summarize(pdf_file):
12
  try:
 
28
  break
29
 
30
  # Summarize the extracted abstract
31
+ result = summarizer(
32
+ abstract_text,
33
+ min_length=16,
34
+ max_length=256,
35
+ no_repeat_ngram_size=3,
36
+ encoder_no_repeat_ngram_size=3,
37
+ repetition_penalty=3.5,
38
+ num_beams=4,
39
+ early_stopping=True,
40
+ )
41
+ summary = result[0]['summary']
42
 
43
  # Generate audio
44
  speech = gTTS(text=summary, lang="en")
 
56
  inputs=[gr.File(label="Upload PDF")],
57
  outputs=[gr.Textbox(label="Summary"), gr.Audio()],
58
  title="PDF Summarization & Audio Tool",
59
+ description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'summarizer' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
60
  )
61
 
62
  interface.launch()
63
+