Spaces:
Build error
Build error
jordigonzm
commited on
Commit
•
ddef98f
1
Parent(s):
e584b28
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,57 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import spaces
|
3 |
+
import torch
|
4 |
+
import llama_cpp
|
5 |
+
import llama_cpp.llama_tokenizer
|
6 |
+
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
llama = llama_cpp.Llama.from_pretrained(
|
10 |
+
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
|
11 |
+
filename="*q8_0.gguf",
|
12 |
+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
|
13 |
+
verbose=False
|
14 |
+
)
|
15 |
+
|
16 |
+
model = "gpt-3.5-turbo"
|
17 |
+
|
18 |
+
def predict(message, history):
|
19 |
+
messages = []
|
20 |
+
|
21 |
+
for user_message, assistant_message in history:
|
22 |
+
messages.append({"role": "user", "content": user_message})
|
23 |
+
messages.append({"role": "assistant", "content": assistant_message})
|
24 |
+
|
25 |
+
messages.append({"role": "user", "content": message})
|
26 |
+
|
27 |
+
response = llama.create_chat_completion_openai_v1(
|
28 |
+
model=model,
|
29 |
+
messages=messages,
|
30 |
+
stream=True
|
31 |
+
)
|
32 |
+
|
33 |
+
text = ""
|
34 |
+
for chunk in response:
|
35 |
+
content = chunk.choices[0].delta.content
|
36 |
+
if content:
|
37 |
+
text += content
|
38 |
+
yield text
|
39 |
+
|
40 |
+
chat_interface = gr.ChatInterface(
|
41 |
+
fn=chat_function,
|
42 |
+
chatbot=gr.Chatbot(height=400),
|
43 |
+
textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
|
44 |
+
title="Chat with AI Model",
|
45 |
+
description="""
|
46 |
+
Custom description based on the new GGUF model capabilities and features.
|
47 |
+
""",
|
48 |
+
theme="soft",
|
49 |
+
additional_inputs=[
|
50 |
+
gr.Textbox(value="Hello!", label="System Prompt", placeholder="Enter a system prompt"),
|
51 |
+
gr.Slider(minimum=50, maximum=1000, step=50, value=150, label="Max New Tokens"),
|
52 |
+
gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.7, label="Temperature")
|
53 |
+
],
|
54 |
+
allow_flagging="never"
|
55 |
+
)
|
56 |
+
|
57 |
+
chat_interface.launch()
|