mrsk1883 commited on
Commit
cdb128e
1 Parent(s): 797680d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -16
app.py CHANGED
@@ -1,12 +1,13 @@
1
  import gradio as gr
2
  from PyPDF2 import PdfReader
3
- from transformers import pipeline
4
  from gtts import gTTS
5
  from io import BytesIO
6
  import re
7
- import os
8
 
9
- summarizer = pipeline("summarization")
 
 
10
 
11
  def extract_abstract_and_summarize(pdf_file):
12
  try:
@@ -27,17 +28,8 @@ def extract_abstract_and_summarize(pdf_file):
27
  abstract_text = text[start_index:end_index]
28
  break
29
 
30
- # Summarize the extracted abstract
31
- result = summarizer(
32
- abstract_text,
33
- min_length=16,
34
- max_length=256,
35
- no_repeat_ngram_size=3,
36
- encoder_no_repeat_ngram_size=3,
37
- repetition_penalty=3.5,
38
- num_beams=4,
39
- early_stopping=True,
40
- )
41
  summary = result[0]['summary']
42
 
43
  # Generate audio
@@ -56,8 +48,7 @@ interface = gr.Interface(
56
  inputs=[gr.File(label="Upload PDF")],
57
  outputs=[gr.Textbox(label="Summary"), gr.Audio()],
58
  title="PDF Summarization & Audio Tool",
59
- description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'summarizer' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
60
  )
61
 
62
  interface.launch()
63
-
 
1
  import gradio as gr
2
  from PyPDF2 import PdfReader
3
+ from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
4
  from gtts import gTTS
5
  from io import BytesIO
6
  import re
 
7
 
8
+ # Load the LED-large model for summarization
9
+ model_name = "pszemraj/led-large-book-summary"
10
+ summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
11
 
12
  def extract_abstract_and_summarize(pdf_file):
13
  try:
 
28
  abstract_text = text[start_index:end_index]
29
  break
30
 
31
+ # Summarize the extracted abstract using the LED-large model
32
+ result = summarizer(abstract_text, max_length=256, min_length=16, length_penalty=2.0)
 
 
 
 
 
 
 
 
 
33
  summary = result[0]['summary']
34
 
35
  # Generate audio
 
48
  inputs=[gr.File(label="Upload PDF")],
49
  outputs=[gr.Textbox(label="Summary"), gr.Audio()],
50
  title="PDF Summarization & Audio Tool",
51
+ description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
52
  )
53
 
54
  interface.launch()