import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

import fitz
import gradio as gr
from langchain.prompts import PromptTemplate
from pathlib import Path
from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
from langdetect import detect
CONTEXT_WINDOW = 50_000
from transformers import BitsAndBytesConfig

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
    bnb_4bit_use_double_quant=True
)
llm = HuggingFaceEndpoint(
    repo_id="mistralai/Mistral-Nemo-Instruct-2407", #"mistralai/Mistral-7B-Instruct-v0.3",
    task="text-generation",
    model_kwargs={"quantization_config": quantization_config},
    max_new_tokens=4096,
    temperature=0.5,
    do_sample=False,
)
#llm_engine_hf = ChatHuggingFace(llm=llm)

def read_pdf(file_path):
    logger.info("Reading a PDF file")
    try:
        pdf_document = fitz.open(file_path)
        text = ""
        for page_num in range(len(pdf_document)):
            page = pdf_document[page_num]
            text += page.get_text()
        
        if not text.strip():
            message = "PDF contains no text. It may be due to the PDF being password-protected, collapsed, or full of images."
            logger.info(message)
            return message
        
        return text
        
    except Exception as e:
        error_message = f"Error reading PDF file: {e}"
        logger.error(error_message)
        return error_message
        
def read_txt(file_path):
    logger.info("Reading a TXT file")
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            text = f.read()
        return text
    except Exception as e:
        error_message = f"Error reading TXT file: {e}"
        logger.error(error_message)
        return error_message

def summarize(file):
    global llm
    # Read the content of the uploaded file
    file_path = file.name
    if file_path.endswith('.pdf'):
        text = read_pdf(file_path)
    else:
        text = read_txt(file_path)

    logger.info("Length of text is %d", len(text))
                
    lang = detect(text[:CONTEXT_WINDOW])
    template_translate = '''
Please carefully read the following document:
<document>
{TEXT}
</document>
After reading through the document, pinpoint the key points and main ideas covered in the text. 
Organize these key points into a concise bulleted list that summarizes the essential information from the document. 
The summary should be in {LANG} language.
'''
    
    prompt_summarize = PromptTemplate(
        template=template_translate,
        input_variables=["TEXT", "LANG"]
    )
    
    summaries = []
    for i in range(0, len(text), CONTEXT_WINDOW):
        chunk = text[i:i + CONTEXT_WINDOW]
        formatted_prompt = prompt_summarize.format(TEXT=chunk, LANG=lang)
        summary = llm.invoke(formatted_prompt)
        summaries.append(summary)

    logger.info(f"Chunked into {len(summaries)}.")
    
    final_summary = "\n\n".join(summaries)
    return final_summary

def download_summary(output_text):
    if output_text:
        file_path = Path('summary.txt')
        with open(file_path, 'w', encoding='utf-8') as f:
            f.write(output_text)
        return file_path
    else:
        return None
def create_download_file(summary_text):
    file_path = download_summary(summary_text)
    return str(file_path) if file_path else None

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("## Document Summarizer")

    with gr.Row():
        with gr.Column():
            file = gr.File(label="Submit a file")
        
        with gr.Column():
            output_text = gr.Textbox(label="Summary", lines=20)

    submit_button = gr.Button("Summarize")
    submit_button.click(summarize, inputs=[file], outputs=output_text)

    def generate_file():
        summary_text = output_text
        file_path = download_summary(summary_text)
        return file_path

    download_button = gr.Button("Download Summary")
    download_button.click(
        fn=create_download_file,
        inputs=[output_text],
        outputs=gr.File()
    )
# Run the Gradio app
demo.launch(share=True)