import transformers import torch model_id = "unsloth/llama-3-8b-Instruct-bnb-4bit" pipeline = transformers.pipeline( "text-generation", model=model_id, model_kwargs={ "torch_dtype": torch.float16, "quantization_config": {"load_in_4bit": True}, "low_cpu_mem_usage": True, }, ) messages = [ {"role" : "system", "content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"}, {"role" : "user", "content": """hi there!"""}, ] prompt = pipeline.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) terminators = [ pipeline.tokenizer.eos_token_id, pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") ] outputs = pipeline( prompt, max_new_tokens=256, eos_token_id=terminators, do_sample=True, temperature=0.6, top_p=0.9, ) print(outputs[0]["generated_text"][len(prompt):]) import gradio as gr messages = [{"role" : "system", "content": "You are an interviewer testing the user whether he can be a good manager or not. When the user says hi there!, i want you to begin"}, {"role" : "user", "content": """hi there!"""},] def add_text(history, text): global messages #message[list] is defined globally history = history + [(text,'')] messages = messages + [{"role":'user', 'content': text}] return history, '' def generate(history): global messages prompt = pipeline.tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) terminators = [ pipeline.tokenizer.eos_token_id, pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>") ] outputs = pipeline( prompt, max_new_tokens=256, eos_token_id=terminators, do_sample=True, temperature=0.6, top_p=0.9, ) response_msg = outputs[0]["generated_text"][len(prompt):] for char in response_msg: history[-1][1] += char yield history pass with gr.Blocks() as demo: chatbot = gr.Chatbot(value=[], elem_id="chatbot") with gr.Row(): txt = gr.Textbox( show_label=False, placeholder="Enter text and press enter", ) txt.submit(add_text, [chatbot, txt], [chatbot, txt], queue=False).then( generate, inputs =[chatbot,],outputs = chatbot,) demo.queue() demo.launch(debug=True)