mrsk1883 commited on
Commit
2a10acb
1 Parent(s): ad0dad6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -13
app.py CHANGED
@@ -10,11 +10,7 @@ model_name = "pszemraj/led-base-book-summary"
10
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
 
13
-
14
  def extract_abstract_and_summarize(pdf_file):
15
- """
16
- Extracts the abstract and summarizes it in one sentence with information till "Introduction".
17
- """
18
  try:
19
  with open(pdf_file, "rb") as file:
20
  pdf_reader = PdfReader(file)
@@ -31,28 +27,30 @@ def extract_abstract_and_summarize(pdf_file):
31
  else:
32
  end_index = None
33
  abstract_text = text[start_index:end_index]
34
- break # Exit loop once abstract is found
 
35
  # Summarize the extracted abstract
36
  inputs = tokenizer(abstract_text, return_tensors="pt")
37
- outputs = model.generate(**inputs)
38
  summary = tokenizer.decode(outputs[0])
39
- # Extract only the first sentence
40
- summary_sentence = extract_first_sentence(summary)
41
  # Generate audio
42
- speech = gTTS(text=summary_sentence, lang="en")
43
  speech_bytes = BytesIO()
44
  speech.write_to_fp(speech_bytes)
 
45
  # Return individual output values
46
- return summary_sentence, speech_bytes.getvalue(), abstract_text.strip()
 
47
  except Exception as e:
48
  raise Exception(str(e))
49
 
50
-
51
  interface = gr.Interface(
52
  fn=extract_abstract_and_summarize,
53
  inputs=[gr.File(label="Upload PDF")],
54
  outputs=[gr.Textbox(label="Summary"), gr.Audio()],
55
  title="PDF Summarization & Audio Tool",
56
- description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it in one sentence with information till "Introduction", and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs.""",
57
  )
58
- interface.launch(share=True)
 
 
10
  model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
11
  tokenizer = AutoTokenizer.from_pretrained(model_name)
12
 
 
13
  def extract_abstract_and_summarize(pdf_file):
 
 
 
14
  try:
15
  with open(pdf_file, "rb") as file:
16
  pdf_reader = PdfReader(file)
 
27
  else:
28
  end_index = None
29
  abstract_text = text[start_index:end_index]
30
+ break
31
+
32
  # Summarize the extracted abstract
33
  inputs = tokenizer(abstract_text, return_tensors="pt")
34
+ outputs = model.generate(**inputs, max_length=50, min_length=30)
35
  summary = tokenizer.decode(outputs[0])
36
+
 
37
  # Generate audio
38
+ speech = gTTS(text=summary, lang="en")
39
  speech_bytes = BytesIO()
40
  speech.write_to_fp(speech_bytes)
41
+
42
  # Return individual output values
43
+ return summary, speech_bytes.getvalue(), abstract_text.strip()
44
+
45
  except Exception as e:
46
  raise Exception(str(e))
47
 
 
48
  interface = gr.Interface(
49
  fn=extract_abstract_and_summarize,
50
  inputs=[gr.File(label="Upload PDF")],
51
  outputs=[gr.Textbox(label="Summary"), gr.Audio()],
52
  title="PDF Summarization & Audio Tool",
53
+ description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it in one sentence with information till "Introduction", and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
54
  )
55
+
56
+ interface.launch()