Spaces:

mrsk1883
/

testingwspace

Sleeping

mrsk1883 commited on Dec 9, 2023

Commit

797680d

•

1 Parent(s): 2a10acb

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,14 +1,12 @@
 import gradio as gr
 from PyPDF2 import PdfReader
-from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 from gtts import gTTS
 from io import BytesIO
 import re
 import os
-model_name = "pszemraj/led-base-book-summary"
-model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 def extract_abstract_and_summarize(pdf_file):
     try:
@@ -30,9 +28,17 @@ def extract_abstract_and_summarize(pdf_file):
                     break
             # Summarize the extracted abstract
-            inputs = tokenizer(abstract_text, return_tensors="pt")
-            outputs = model.generate(**inputs, max_length=50, min_length=30)
-            summary = tokenizer.decode(outputs[0])
             # Generate audio
             speech = gTTS(text=summary, lang="en")
@@ -50,7 +56,8 @@ interface = gr.Interface(
     inputs=[gr.File(label="Upload PDF")],
     outputs=[gr.Textbox(label="Summary"), gr.Audio()],
     title="PDF Summarization & Audio Tool",
-    description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it in one sentence with information till "Introduction", and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
 )
 interface.launch()

 import gradio as gr
 from PyPDF2 import PdfReader
+from transformers import pipeline
 from gtts import gTTS
 from io import BytesIO
 import re
 import os
+summarizer = pipeline("summarization")
 def extract_abstract_and_summarize(pdf_file):
     try:
                     break
             # Summarize the extracted abstract
+            result = summarizer(
+                abstract_text,
+                min_length=16,
+                max_length=256,
+                no_repeat_ngram_size=3,
+                encoder_no_repeat_ngram_size=3,
+                repetition_penalty=3.5,
+                num_beams=4,
+                early_stopping=True,
+            )
+            summary = result[0]['summary']
             # Generate audio
             speech = gTTS(text=summary, lang="en")
     inputs=[gr.File(label="Upload PDF")],
     outputs=[gr.Textbox(label="Summary"), gr.Audio()],
     title="PDF Summarization & Audio Tool",
+    description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'summarizer' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
 )
 interface.launch()