Spaces:

mrsk1883
/

testingwspace

Sleeping

App Files Files Community

testingwspace / app.py

mrsk1883

Update app.py

cdb128e 10 months ago

raw

history blame

No virus

2.3 kB

	import gradio as gr
	from PyPDF2 import PdfReader
	from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
	from gtts import gTTS
	from io import BytesIO
	import re

	# Load the LED-large model for summarization
	model_name = "pszemraj/led-large-book-summary"
	summarizer = pipeline("summarization", model=model_name, tokenizer=model_name)

	def extract_abstract_and_summarize(pdf_file):
	try:
	with open(pdf_file, "rb") as file:
	pdf_reader = PdfReader(file)
	abstract_text = ""
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text = page.extract_text()
	abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE)
	if abstract_match:
	start_index = abstract_match.end()
	introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE)
	if introduction_match:
	end_index = start_index + introduction_match.start()
	else:
	end_index = None
	abstract_text = text[start_index:end_index]
	break

	# Summarize the extracted abstract using the LED-large model
	result = summarizer(abstract_text, max_length=256, min_length=16, length_penalty=2.0)
	summary = result[0]['summary']

	# Generate audio
	speech = gTTS(text=summary, lang="en")
	speech_bytes = BytesIO()
	speech.write_to_fp(speech_bytes)

	# Return individual output values
	return summary, speech_bytes.getvalue(), abstract_text.strip()

	except Exception as e:
	raise Exception(str(e))

	interface = gr.Interface(
	fn=extract_abstract_and_summarize,
	inputs=[gr.File(label="Upload PDF")],
	outputs=[gr.Textbox(label="Summary"), gr.Audio()],
	title="PDF Summarization & Audio Tool",
	description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs."""
	)

	interface.launch()