Spaces:

jordigonzm
/

gemma-2b-it

Build error

App Files Files Community

jordigonzm commited on Apr 25

Commit

ddef98f

•

1 Parent(s): e584b28

Create app.py

Browse files

Files changed (1) hide show

app.py +57 -0

app.py ADDED Viewed

	@@ -0,0 +1,57 @@

+import gradio as gr
+import spaces
+import torch
+import llama_cpp
+import llama_cpp.llama_tokenizer
+import gradio as gr
+llama = llama_cpp.Llama.from_pretrained(
+    repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
+    filename="*q8_0.gguf",
+    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
+    verbose=False
+)
+model = "gpt-3.5-turbo"
+def predict(message, history):
+    messages = []
+    for user_message, assistant_message in history:
+        messages.append({"role": "user", "content": user_message})
+        messages.append({"role": "assistant", "content": assistant_message})
+    messages.append({"role": "user", "content": message})
+    response = llama.create_chat_completion_openai_v1(
+        model=model,
+        messages=messages,
+        stream=True
+    )
+    text = ""
+    for chunk in response:
+        content = chunk.choices[0].delta.content
+        if content:
+            text += content
+            yield text
+chat_interface = gr.ChatInterface(
+    fn=chat_function,
+    chatbot=gr.Chatbot(height=400),
+    textbox=gr.Textbox(placeholder="Enter message here", container=False, scale=7),
+    title="Chat with AI Model",
+    description="""
+    Custom description based on the new GGUF model capabilities and features.
+    """,
+    theme="soft",
+    additional_inputs=[
+        gr.Textbox(value="Hello!", label="System Prompt", placeholder="Enter a system prompt"),
+        gr.Slider(minimum=50, maximum=1000, step=50, value=150, label="Max New Tokens"),
+        gr.Slider(minimum=0.0, maximum=1.0, step=0.01, value=0.7, label="Temperature")
+    ],
+    allow_flagging="never"
+)
+chat_interface.launch()