Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -1,12 +1,13 @@
|
|
1 |
import gradio as gr
|
2 |
from PyPDF2 import PdfReader
|
3 |
-
from transformers import pipeline
|
4 |
from gtts import gTTS
|
5 |
from io import BytesIO
|
6 |
import re
|
7 |
-
import os
|
8 |
|
9 |
-
|
|
|
|
|
10 |
|
11 |
def extract_abstract_and_summarize(pdf_file):
|
12 |
try:
|
@@ -27,17 +28,8 @@ def extract_abstract_and_summarize(pdf_file):
|
|
27 |
abstract_text = text[start_index:end_index]
|
28 |
break
|
29 |
|
30 |
-
# Summarize the extracted abstract
|
31 |
-
result = summarizer(
|
32 |
-
abstract_text,
|
33 |
-
min_length=16,
|
34 |
-
max_length=256,
|
35 |
-
no_repeat_ngram_size=3,
|
36 |
-
encoder_no_repeat_ngram_size=3,
|
37 |
-
repetition_penalty=3.5,
|
38 |
-
num_beams=4,
|
39 |
-
early_stopping=True,
|
40 |
-
)
|
41 |
summary = result[0]['summary']
|
42 |
|
43 |
# Generate audio
|
@@ -56,8 +48,7 @@ interface = gr.Interface(
|
|
56 |
inputs=[gr.File(label="Upload PDF")],
|
57 |
outputs=[gr.Textbox(label="Summary"), gr.Audio()],
|
58 |
title="PDF Summarization & Audio Tool",
|
59 |
-
description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the '
|
60 |
)
|
61 |
|
62 |
interface.launch()
|
63 |
-
|
|
|
1 |
import gradio as gr
|
2 |
from PyPDF2 import PdfReader
|
3 |
+
from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
|
4 |
from gtts import gTTS
|
5 |
from io import BytesIO
|
6 |
import re
|
|
|
7 |
|
8 |
+
# Load the LED-large model for summarization
|
9 |
+
model_name = "pszemraj/led-large-book-summary"
|
10 |
+
summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)
|
11 |
|
12 |
def extract_abstract_and_summarize(pdf_file):
|
13 |
try:
|
|
|
28 |
abstract_text = text[start_index:end_index]
|
29 |
break
|
30 |
|
31 |
+
# Summarize the extracted abstract using the LED-large model
|
32 |
+
result = summarizer(abstract_text, max_length=256, min_length=16, length_penalty=2.0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
summary = result[0]['summary']
|
34 |
|
35 |
# Generate audio
|
|
|
48 |
inputs=[gr.File(label="Upload PDF")],
|
49 |
outputs=[gr.Textbox(label="Summary"), gr.Audio()],
|
50 |
title="PDF Summarization & Audio Tool",
|
51 |
+
description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
|
52 |
)
|
53 |
|
54 |
interface.launch()
|
|