Spaces:

mrsk1883
/

testingwspace

Sleeping

App Files Files Community

mrsk1883 commited on Dec 9, 2023

Commit

2a10acb

•

1 Parent(s): ad0dad6

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -13

app.py CHANGED Viewed

@@ -10,11 +10,7 @@ model_name = "pszemraj/led-base-book-summary"
 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 def extract_abstract_and_summarize(pdf_file):
-    """
-    Extracts the abstract and summarizes it in one sentence with information till "Introduction".
-    """
     try:
         with open(pdf_file, "rb") as file:
             pdf_reader = PdfReader(file)
@@ -31,28 +27,30 @@ def extract_abstract_and_summarize(pdf_file):
                     else:
                         end_index = None
                     abstract_text = text[start_index:end_index]
-                    break  # Exit loop once abstract is found
             # Summarize the extracted abstract
             inputs = tokenizer(abstract_text, return_tensors="pt")
-            outputs = model.generate(**inputs)
             summary = tokenizer.decode(outputs[0])
-            # Extract only the first sentence
-            summary_sentence = extract_first_sentence(summary)
             # Generate audio
-            speech = gTTS(text=summary_sentence, lang="en")
             speech_bytes = BytesIO()
             speech.write_to_fp(speech_bytes)
             # Return individual output values
-            return summary_sentence, speech_bytes.getvalue(), abstract_text.strip()
     except Exception as e:
         raise Exception(str(e))
 interface = gr.Interface(
     fn=extract_abstract_and_summarize,
     inputs=[gr.File(label="Upload PDF")],
     outputs=[gr.Textbox(label="Summary"), gr.Audio()],
     title="PDF Summarization & Audio Tool",
-    description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it in one sentence with information till "Introduction", and generates an audio of it. Only upload PDFs with abstracts.     Please read the README.MD for information about the app and sample PDFs.""",
 )
-interface.launch(share=True)

 model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
 tokenizer = AutoTokenizer.from_pretrained(model_name)
 def extract_abstract_and_summarize(pdf_file):
     try:
         with open(pdf_file, "rb") as file:
             pdf_reader = PdfReader(file)
                     else:
                         end_index = None
                     abstract_text = text[start_index:end_index]
+                    break
             # Summarize the extracted abstract
             inputs = tokenizer(abstract_text, return_tensors="pt")
+            outputs = model.generate(**inputs, max_length=50, min_length=30)
             summary = tokenizer.decode(outputs[0])
             # Generate audio
+            speech = gTTS(text=summary, lang="en")
             speech_bytes = BytesIO()
             speech.write_to_fp(speech_bytes)
             # Return individual output values
+            return summary, speech_bytes.getvalue(), abstract_text.strip()
     except Exception as e:
         raise Exception(str(e))
 interface = gr.Interface(
     fn=extract_abstract_and_summarize,
     inputs=[gr.File(label="Upload PDF")],
     outputs=[gr.Textbox(label="Summary"), gr.Audio()],
     title="PDF Summarization & Audio Tool",
+    description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it in one sentence with information till "Introduction", and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
 )
+interface.launch()