Spaces:
Build error
Build error
File size: 1,409 Bytes
ddef98f e5173b6 74124d3 e5173b6 ddef98f e5173b6 ddef98f 01a3834 57e69ab ddef98f 26e9ade 57e69ab ddef98f 57e69ab ddef98f b8c5b39 2398722 b8c5b39 4dde6bd b8c5b39 e5173b6 4dde6bd e5173b6 ddef98f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
import gradio as gr
import spaces
import llama_cpp
import llama_cpp.llama_tokenizer
import gradio as gr
REPO_ID = "jordigonzm/gemma-2b-it"
MODEL_FILENAME = "gemma-2b-it_v1p1-Q4_K_M.gguf"
llama = llama_cpp.Llama.from_pretrained(
repo_id=REPO_ID,
filename=MODEL_FILENAME,
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(REPO_ID),
verbose=False
)
model = "gpt-3.5-turbo"
def predict(message, history):
messages = []
# Añadir el nuevo mensaje del usuario.
messages.append({"role": "user", "content": message})
# Crear la respuesta del modelo.
response = llama.create_chat_completion_openai_v1(
model=model,
messages=messages,
stream=True
)
# Recopilar y devolver la respuesta.
text = ""
for chunk in response:
content = chunk.choices[0].delta.content
if content:
text += content
yield text
chat_interface = gr.Interface(
fn=predict,
inputs=[
gr.Textbox(lines=6, placeholder="Ask"),
],
outputs="text",
title="Chat with Gemma-2B-it Model, LlamaCPP",
description=" \
Welcome to the Gemma-2B-IT LlamaCPP Assistant, an interactive platform powered by the state-of-the-art \
Gemma-2B-it language model, seamlessly integrated with Llama-CPP-Python.<br>" + MODEL_FILENAME,
theme="soft",
)
chat_interface.launch()
|