mrsk1883 commited on
Commit
6416b3a
1 Parent(s): ac303b5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +48 -45
app.py CHANGED
@@ -10,53 +10,56 @@ model_name = "pszemraj/led-large-book-summary"
10
  summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
11
 
12
  def extract_abstract_and_summarize(pdf_file):
13
- try:
14
- with open(pdf_file, "rb") as file:
15
- pdf_reader = PdfReader(file)
16
- abstract_text = ""
17
- for page_num in range(len(pdf_reader.pages)):
18
- page = pdf_reader.pages[page_num]
19
- text = page.extract_text()
20
- abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE)
21
- if abstract_match:
22
- start_index = abstract_match.end()
23
- introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE)
24
- if introduction_match:
25
- end_index = start_index + introduction_match.start()
26
- else:
27
- end_index = None
28
- abstract_text = text[start_index:end_index]
29
- break
30
-
31
- # Summarize the extracted abstract using the LED-large model
32
- result = summarizer(abstract_text)
33
-
34
- # Print the entire result for debugging
35
- print("Result:", result)
36
-
37
- # Check if 'summary' is present in the result
38
- if result and isinstance(result, list) and len(result) > 0:
39
- summary = result[0].get('summary', 'Summary not available.')
40
- else:
41
- summary = "Summary not available."
42
-
43
- # Generate audio
44
- speech = gTTS(text=summary, lang="en")
45
- speech_bytes = BytesIO()
46
- speech.write_to_fp(speech_bytes)
47
-
48
- # Return individual output values
49
- return summary, speech_bytes.getvalue(), abstract_text.strip()
50
-
51
- except Exception as e:
52
- raise Exception(str(e))
 
 
 
53
 
54
  interface = gr.Interface(
55
- fn=extract_abstract_and_summarize,
56
- inputs=[gr.File(label="Upload PDF")],
57
- outputs=[gr.Textbox(label="Summary"), gr.Audio()],
58
- title="PDF Summarization & Audio Tool",
59
- description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
60
  )
61
 
62
  interface.launch()
 
10
  summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
11
 
12
  def extract_abstract_and_summarize(pdf_file):
13
+ try:
14
+ if pdf_file is None:
15
+ raise ValueError("PDF file is not provided.")
16
+
17
+ with open(pdf_file, "rb") as file:
18
+ pdf_reader = PdfReader(file)
19
+ abstract_text = ""
20
+ for page_num in range(len(pdf_reader.pages)):
21
+ page = pdf_reader.pages[page_num]
22
+ text = page.extract_text()
23
+ abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE)
24
+ if abstract_match:
25
+ start_index = abstract_match.end()
26
+ introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE)
27
+ if introduction_match:
28
+ end_index = start_index + introduction_match.start()
29
+ else:
30
+ end_index = None
31
+ abstract_text = text[start_index:end_index]
32
+ break
33
+
34
+ # Summarize the extracted abstract using the LED-large model
35
+ result = summarizer(abstract_text)
36
+
37
+ # Print the entire result for debugging
38
+ print("Result:", result)
39
+
40
+ # Check if 'summary' is present in the result
41
+ if result and isinstance(result, list) and len(result) > 0:
42
+ summary = result[0].get('summary', 'Summary not available.')
43
+ else:
44
+ summary = "Summary not available."
45
+
46
+ # Generate audio
47
+ speech = gTTS(text=summary, lang="en")
48
+ speech_bytes = BytesIO()
49
+ speech.write_to_fp(speech_bytes)
50
+
51
+ # Return individual output values
52
+ return summary, speech_bytes.getvalue(), abstract_text.strip()
53
+
54
+ except Exception as e:
55
+ raise Exception(str(e))
56
 
57
  interface = gr.Interface(
58
+ fn=extract_abstract_and_summarize,
59
+ inputs=[gr.File(label="Upload PDF")],
60
+ outputs=[gr.Textbox(label="Summary"), gr.Audio()],
61
+ title="PDF Summarization & Audio Tool",
62
+ description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
63
  )
64
 
65
  interface.launch()