import gradio as gr import spaces import torch import transformers import torch from transformers import AutoModelForCausalLM, AutoTokenizer model_name = "microsoft/Phi-3-mini-128k-instruct" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16) pipeline = transformers.pipeline( "text-generation", model=model, tokenizer=tokenizer, device="cpu", trust_remote_code=True ) @spaces.GPU def chat_function(message, history, system_prompt,max_new_tokens,temperature): prompt = system_prompt for msg in history: prompt += f'{msg["role"]}: {msg["content"]}\n' prompt += f'user: {message}\n' temp = temperature + 0.1 outputs = pipeline( prompt, max_new_tokens=max_new_tokens, do_sample=True, temperature=temp, top_p=0.9, ) generated_text = outputs[0]['generated_text'] new_text = generated_text[len(prompt):] return new_text gr.ChatInterface( chat_function, chatbot=gr.Chatbot(height=400), textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7), title="microsoft/Phi-3-mini-128k-instruct", description=""" This space is dedicated for chatting with Meta's Latest LLM - Llama 8b Instruct. Find this model here: https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct Feel free to play with customization in the "Additional Inputs". """, theme="soft", additional_inputs=[ gr.Textbox("You are helpful AI.", label="System Prompt"), gr.Slider(512, 4096, label="Max New Tokens"), gr.Slider(0, 1, label="Temperature") ] ).launch()