Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -10,11 +10,7 @@ model_name = "pszemraj/led-base-book-summary"
|
|
10 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
12 |
|
13 |
-
|
14 |
def extract_abstract_and_summarize(pdf_file):
|
15 |
-
"""
|
16 |
-
Extracts the abstract and summarizes it in one sentence with information till "Introduction".
|
17 |
-
"""
|
18 |
try:
|
19 |
with open(pdf_file, "rb") as file:
|
20 |
pdf_reader = PdfReader(file)
|
@@ -31,28 +27,30 @@ def extract_abstract_and_summarize(pdf_file):
|
|
31 |
else:
|
32 |
end_index = None
|
33 |
abstract_text = text[start_index:end_index]
|
34 |
-
break
|
|
|
35 |
# Summarize the extracted abstract
|
36 |
inputs = tokenizer(abstract_text, return_tensors="pt")
|
37 |
-
outputs = model.generate(**inputs)
|
38 |
summary = tokenizer.decode(outputs[0])
|
39 |
-
|
40 |
-
summary_sentence = extract_first_sentence(summary)
|
41 |
# Generate audio
|
42 |
-
speech = gTTS(text=
|
43 |
speech_bytes = BytesIO()
|
44 |
speech.write_to_fp(speech_bytes)
|
|
|
45 |
# Return individual output values
|
46 |
-
return
|
|
|
47 |
except Exception as e:
|
48 |
raise Exception(str(e))
|
49 |
|
50 |
-
|
51 |
interface = gr.Interface(
|
52 |
fn=extract_abstract_and_summarize,
|
53 |
inputs=[gr.File(label="Upload PDF")],
|
54 |
outputs=[gr.Textbox(label="Summary"), gr.Audio()],
|
55 |
title="PDF Summarization & Audio Tool",
|
56 |
-
description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it in one sentence with information till "Introduction", and generates an audio of it. Only upload PDFs with abstracts.
|
57 |
)
|
58 |
-
|
|
|
|
10 |
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
11 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
12 |
|
|
|
13 |
def extract_abstract_and_summarize(pdf_file):
|
|
|
|
|
|
|
14 |
try:
|
15 |
with open(pdf_file, "rb") as file:
|
16 |
pdf_reader = PdfReader(file)
|
|
|
27 |
else:
|
28 |
end_index = None
|
29 |
abstract_text = text[start_index:end_index]
|
30 |
+
break
|
31 |
+
|
32 |
# Summarize the extracted abstract
|
33 |
inputs = tokenizer(abstract_text, return_tensors="pt")
|
34 |
+
outputs = model.generate(**inputs, max_length=50, min_length=30)
|
35 |
summary = tokenizer.decode(outputs[0])
|
36 |
+
|
|
|
37 |
# Generate audio
|
38 |
+
speech = gTTS(text=summary, lang="en")
|
39 |
speech_bytes = BytesIO()
|
40 |
speech.write_to_fp(speech_bytes)
|
41 |
+
|
42 |
# Return individual output values
|
43 |
+
return summary, speech_bytes.getvalue(), abstract_text.strip()
|
44 |
+
|
45 |
except Exception as e:
|
46 |
raise Exception(str(e))
|
47 |
|
|
|
48 |
interface = gr.Interface(
|
49 |
fn=extract_abstract_and_summarize,
|
50 |
inputs=[gr.File(label="Upload PDF")],
|
51 |
outputs=[gr.Textbox(label="Summary"), gr.Audio()],
|
52 |
title="PDF Summarization & Audio Tool",
|
53 |
+
description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it in one sentence with information till "Introduction", and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
|
54 |
)
|
55 |
+
|
56 |
+
interface.launch()
|