mrsk1883 commited on
Commit
ac303b5
1 Parent(s): cdb128e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +45 -37
app.py CHANGED
@@ -10,45 +10,53 @@ model_name = "pszemraj/led-large-book-summary"
10
  summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
11
 
12
  def extract_abstract_and_summarize(pdf_file):
13
- try:
14
- with open(pdf_file, "rb") as file:
15
- pdf_reader = PdfReader(file)
16
- abstract_text = ""
17
- for page_num in range(len(pdf_reader.pages)):
18
- page = pdf_reader.pages[page_num]
19
- text = page.extract_text()
20
- abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE)
21
- if abstract_match:
22
- start_index = abstract_match.end()
23
- introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE)
24
- if introduction_match:
25
- end_index = start_index + introduction_match.start()
26
- else:
27
- end_index = None
28
- abstract_text = text[start_index:end_index]
29
- break
30
-
31
- # Summarize the extracted abstract using the LED-large model
32
- result = summarizer(abstract_text, max_length=256, min_length=16, length_penalty=2.0)
33
- summary = result[0]['summary']
34
-
35
- # Generate audio
36
- speech = gTTS(text=summary, lang="en")
37
- speech_bytes = BytesIO()
38
- speech.write_to_fp(speech_bytes)
39
-
40
- # Return individual output values
41
- return summary, speech_bytes.getvalue(), abstract_text.strip()
42
-
43
- except Exception as e:
44
- raise Exception(str(e))
 
 
 
 
 
 
 
 
45
 
46
  interface = gr.Interface(
47
- fn=extract_abstract_and_summarize,
48
- inputs=[gr.File(label="Upload PDF")],
49
- outputs=[gr.Textbox(label="Summary"), gr.Audio()],
50
- title="PDF Summarization & Audio Tool",
51
- description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
52
  )
53
 
54
  interface.launch()
 
10
  summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
11
 
12
  def extract_abstract_and_summarize(pdf_file):
13
+ try:
14
+ with open(pdf_file, "rb") as file:
15
+ pdf_reader = PdfReader(file)
16
+ abstract_text = ""
17
+ for page_num in range(len(pdf_reader.pages)):
18
+ page = pdf_reader.pages[page_num]
19
+ text = page.extract_text()
20
+ abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE)
21
+ if abstract_match:
22
+ start_index = abstract_match.end()
23
+ introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE)
24
+ if introduction_match:
25
+ end_index = start_index + introduction_match.start()
26
+ else:
27
+ end_index = None
28
+ abstract_text = text[start_index:end_index]
29
+ break
30
+
31
+ # Summarize the extracted abstract using the LED-large model
32
+ result = summarizer(abstract_text)
33
+
34
+ # Print the entire result for debugging
35
+ print("Result:", result)
36
+
37
+ # Check if 'summary' is present in the result
38
+ if result and isinstance(result, list) and len(result) > 0:
39
+ summary = result[0].get('summary', 'Summary not available.')
40
+ else:
41
+ summary = "Summary not available."
42
+
43
+ # Generate audio
44
+ speech = gTTS(text=summary, lang="en")
45
+ speech_bytes = BytesIO()
46
+ speech.write_to_fp(speech_bytes)
47
+
48
+ # Return individual output values
49
+ return summary, speech_bytes.getvalue(), abstract_text.strip()
50
+
51
+ except Exception as e:
52
+ raise Exception(str(e))
53
 
54
  interface = gr.Interface(
55
+ fn=extract_abstract_and_summarize,
56
+ inputs=[gr.File(label="Upload PDF")],
57
+ outputs=[gr.Textbox(label="Summary"), gr.Audio()],
58
+ title="PDF Summarization & Audio Tool",
59
+ description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
60
  )
61
 
62
  interface.launch()