Spaces:

jordigonzm
/

gemma-2b-it

Build error

File size: 1,409 Bytes

ddef98f
 
 
 
 
 
 
e5173b6
74124d3
e5173b6
ddef98f
e5173b6
 
 
ddef98f
 
 
 
 
 
 
01a3834
57e69ab
ddef98f
26e9ade
57e69ab
ddef98f
 
 
 
 
57e69ab
 
ddef98f
 
 
 
 
 
 
b8c5b39
2398722
b8c5b39
4dde6bd
b8c5b39
 
e5173b6
4dde6bd
 
e5173b6
ddef98f

import gradio as gr
import spaces
import llama_cpp
import llama_cpp.llama_tokenizer

import gradio as gr

REPO_ID = "jordigonzm/gemma-2b-it"
MODEL_FILENAME = "gemma-2b-it_v1p1-Q4_K_M.gguf"

llama = llama_cpp.Llama.from_pretrained(
    repo_id=REPO_ID,
    filename=MODEL_FILENAME,
    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(REPO_ID),
    verbose=False
)

model = "gpt-3.5-turbo"

def predict(message, history):
    messages = []
        
    # Añadir el nuevo mensaje del usuario.
    messages.append({"role": "user", "content": message})
    
    # Crear la respuesta del modelo.
    response = llama.create_chat_completion_openai_v1(
        model=model,
        messages=messages,
        stream=True
    )
    
    # Recopilar y devolver la respuesta.
    text = ""
    for chunk in response:
        content = chunk.choices[0].delta.content
        if content:
            text += content
            yield text

chat_interface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(lines=6, placeholder="Ask"),
    ],
    outputs="text",
    title="Chat with Gemma-2B-it Model, LlamaCPP",
    description=" \
    Welcome to the Gemma-2B-IT LlamaCPP Assistant, an interactive platform powered by the state-of-the-art \
    Gemma-2B-it language model, seamlessly integrated with Llama-CPP-Python.<br>" + MODEL_FILENAME,
    theme="soft",
)

chat_interface.launch()