from transformers import AutoTokenizer, AutoModelForCausalLM from peft import PeftModel import accelerate import streamlit as st st.set_page_config(page_title = "Q&A Demo") st.header("Langchain Application") model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0").cuda() tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v1.0") title = st.text_input("Title : ") prompt = tokenizer.apply_chat_template([ {"role": "system", "content": "You are an experienced researcher and a reviewer of scientific papers. Given a title of the paper, write a review about it in one sentence."}, {"role": "user", "content": title} ], tokenize=False, add_generation_prompt=True) inputs = tokenizer(prompt, return_tensors="pt").to("cuda") generate_ids = model.generate(inputs.input_ids, max_new_tokens=50, do_sample=True, temperature=0.5, top_k=50, top_p=0.95) submit = st.button("Submit") if submit: st.subheader("Reviewer #2:") st.write(tokenizer.batch_decode(generate_ids, skip_special_tokens=True, clean_up_tokenization_spaces=False)[0])