jordigonzm commited on
Commit
ddef98f
1 Parent(s): e584b28

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import spaces
3
+ import torch
4
+ import llama_cpp
5
+ import llama_cpp.llama_tokenizer
6
+
7
+ import gradio as gr
8
+
9
+ llama = llama_cpp.Llama.from_pretrained(
10
+ repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
11
+ filename="*q8_0.gguf",
12
+ tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
13
+ verbose=False
14
+ )
15
+
16
+ model = "gpt-3.5-turbo"
17
+
18
+ def predict(message, history):
19
+ messages = []
20
+
21
+ for user_message, assistant_message in history:
22
+ messages.append({"role": "user", "content": user_message})
23
+ messages.append({"role": "assistant", "content": assistant_message})
24
+
25
+ messages.append({"role": "user", "content": message})
26
+
27
+ response = llama.create_chat_completion_openai_v1(
28
+ model=model,
29
+ messages=messages,
30
+ stream=True
31
+ )
32
+
33
+ text = ""
34
+ for chunk in response:
35
+ content = chunk.choices[0].delta.content
36
+ if content:
37
+ text += content
38
+ yield text
39
+
40
+ chat_interface = gr.ChatInterface(
41
+ fn=chat_function,
42
+ chatbot=gr.Chatbot(height=400),
43
+ textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
44
+ title="Chat with AI Model",
45
+ description="""
46
+ Custom description based on the new GGUF model capabilities and features.
47
+ """,
48
+ theme="soft",
49
+ additional_inputs=[
50
+ gr.Textbox(value="Hello!", label="System Prompt", placeholder="Enter a system prompt"),
51
+ gr.Slider(minimum=50, maximum=1000, step=50, value=150, label="Max New Tokens"),
52
+ gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.7, label="Temperature")
53
+ ],
54
+ allow_flagging="never"
55
+ )
56
+
57
+ chat_interface.launch()