import gradio as gr from PyPDF2 import PdfReader from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer from gtts import gTTS from io import BytesIO import re # Load the LED-large model for summarization model_name = "pszemraj/led-large-book-summary" summarizer = pipeline("summarization", model=model_name, tokenizer=model_name) def extract_abstract_and_summarize(pdf_file): try: if pdf_file is None: raise ValueError("PDF file is not provided.") with open(pdf_file, "rb") as file: pdf_reader = PdfReader(file) abstract_text = "" for page_num in range(len(pdf_reader.pages)): page = pdf_reader.pages[page_num] text = page.extract_text() abstract_match = re.search(r"\bAbstract\b", text, re.IGNORECASE) if abstract_match: start_index = abstract_match.end() introduction_match = re.search(r"\bIntroduction\b", text[start_index:], re.IGNORECASE) if introduction_match: end_index = start_index + introduction_match.start() else: end_index = None abstract_text = text[start_index:end_index] break # Summarize the extracted abstract using the LED-large model result = summarizer(abstract_text) # Print the entire result for debugging print("Result:", result) # Check if 'summary' is present in the result if result and isinstance(result, list) and len(result) > 0: summary = result[0].get('summary', 'Summary not available.') else: summary = "Summary not available." # Generate audio speech = gTTS(text=summary, lang="en") speech_bytes = BytesIO() speech.write_to_fp(speech_bytes) # Return individual output values return summary, speech_bytes.getvalue(), abstract_text.strip() except Exception as e: raise Exception(str(e)) interface = gr.Interface( fn=extract_abstract_and_summarize, inputs=[gr.File(label="Upload PDF")], outputs=[gr.Textbox(label="Summary"), gr.Audio()], title="PDF Summarization & Audio Tool", description="""PDF Summarization App. This app extracts the abstract from a PDF, summarizes it using the 'pszemraj/led-large-book-summary' model, and generates an audio of it. Only upload PDFs with abstracts. Please read the README.MD for information about the app and sample PDFs.""" ) interface.launch()