import gradio as gr from arxiv2text import arxiv_to_text import torch from transformers import AutoTokenizer, AutoModelForCausalLM def get_model(model_url="thepowerfuldeez/Qwen2-1.5B-Summarize", use_cpu=False): tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-1.5B-Instruct") if use_cpu: model = AutoModelForCausalLM.from_pretrained( model_url, device_map="cpu", # load_in_4bit=True, attn_implementation="flash_attention_2", ) else: model = AutoModelForCausalLM.from_pretrained( model_url, bnb_4bit_compute_dtype=torch.bfloat16, load_in_4bit=True, attn_implementation="flash_attention_2", ) return model, tokenizer def call_llm(model, tokenizer, text): messages = [ {"role": "system", "content": "You are helpful AI assistant."}, {"role": "user", "content": text}, ] input_ids = tokenizer.apply_chat_template( messages, add_generation_prompt=True, return_tensors="pt" ) new_tokens = model.generate(input_ids, max_new_tokens=512)[0][len(input_ids[0]) :] output = tokenizer.decode(new_tokens, skip_special_tokens=True) return output model, tokenizer = get_model(use_cpu=True) def summarize_pdf(pdf_url): extracted_text = arxiv_to_text(pdf_url) summary = call_llm(model, tokenizer, f"Summarize following text: {extracted_text[:71000]}") return summary interface = gr.Interface( fn=summarize_pdf, inputs="text", outputs="text", title="Arxiv PDF Summarizer", description="Enter the URL of an Arxiv PDF to get a summary." ) interface.launch()