gemma-2b-it / app.py
jordigonzm's picture
Update app.py
2398722 verified
raw
history blame
No virus
1.65 kB
import gradio as gr
import spaces
import llama_cpp
import llama_cpp.llama_tokenizer
import gradio as gr
llama = llama_cpp.Llama.from_pretrained(
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
filename="*q8_0.gguf",
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
verbose=False
)
model = "gpt-3.5-turbo"
def predict(message, history):
messages = []
for user_message, assistant_message in history:
messages.append({"role": "user", "content": user_message})
messages.append({"role": "assistant", "content": assistant_message})
messages.append({"role": "user", "content": message})
response = llama.create_chat_completion_openai_v1(
model=model,
messages=messages,
stream=True
)
text = ""
for chunk in response:
content = chunk.choices[0].delta.content
if content:
text += content
yield text
chat_interface = gr.ChatInterface(
fn=predict,
chatbot=gr.Chatbot(height=400),
textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
title="Chat with AI Model",
description="""
Custom description based on the new GGUF model capabilities and features.
""",
theme="soft",
additional_inputs=[
gr.Textbox(value="Hello!", label="System Prompt", placeholder="Enter a system prompt"),
gr.Slider(minimum=50, maximum=1000, step=50, value=150, label="Max New Tokens"),
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.7, label="Temperature")
],
allow_flagging="never"
)
chat_interface.launch()