gemma-2b-it / app.py
jordigonzm's picture
Update app.py
74124d3 verified
raw
history blame
No virus
1.41 kB
import gradio as gr
import spaces
import llama_cpp
import llama_cpp.llama_tokenizer
import gradio as gr
REPO_ID = "jordigonzm/gemma-2b-it"
MODEL_FILENAME = "gemma-2b-it_v1p1-Q4_K_M.gguf"
llama = llama_cpp.Llama.from_pretrained(
repo_id=REPO_ID,
filename=MODEL_FILENAME,
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(REPO_ID),
verbose=False
)
model = "gpt-3.5-turbo"
def predict(message, history):
messages = []
# Añadir el nuevo mensaje del usuario.
messages.append({"role": "user", "content": message})
# Crear la respuesta del modelo.
response = llama.create_chat_completion_openai_v1(
model=model,
messages=messages,
stream=True
)
# Recopilar y devolver la respuesta.
text = ""
for chunk in response:
content = chunk.choices[0].delta.content
if content:
text += content
yield text
chat_interface = gr.Interface(
fn=predict,
inputs=[
gr.Textbox(lines=6, placeholder="Ask"),
],
outputs="text",
title="Chat with Gemma-2B-it Model, LlamaCPP",
description=" \
Welcome to the Gemma-2B-IT LlamaCPP Assistant, an interactive platform powered by the state-of-the-art \
Gemma-2B-it language model, seamlessly integrated with Llama-CPP-Python.<br>" + MODEL_FILENAME,
theme="soft",
)
chat_interface.launch()