File size: 1,409 Bytes
ddef98f
 
 
 
 
 
 
e5173b6
74124d3
e5173b6
ddef98f
e5173b6
 
 
ddef98f
 
 
 
 
 
 
01a3834
57e69ab
ddef98f
26e9ade
57e69ab
ddef98f
 
 
 
 
57e69ab
 
ddef98f
 
 
 
 
 
 
b8c5b39
2398722
b8c5b39
4dde6bd
b8c5b39
 
e5173b6
4dde6bd
 
e5173b6
ddef98f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
import gradio as gr
import spaces
import llama_cpp
import llama_cpp.llama_tokenizer

import gradio as gr

REPO_ID = "jordigonzm/gemma-2b-it"
MODEL_FILENAME = "gemma-2b-it_v1p1-Q4_K_M.gguf"

llama = llama_cpp.Llama.from_pretrained(
    repo_id=REPO_ID,
    filename=MODEL_FILENAME,
    tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(REPO_ID),
    verbose=False
)

model = "gpt-3.5-turbo"

def predict(message, history):
    messages = []
        
    # Añadir el nuevo mensaje del usuario.
    messages.append({"role": "user", "content": message})
    
    # Crear la respuesta del modelo.
    response = llama.create_chat_completion_openai_v1(
        model=model,
        messages=messages,
        stream=True
    )
    
    # Recopilar y devolver la respuesta.
    text = ""
    for chunk in response:
        content = chunk.choices[0].delta.content
        if content:
            text += content
            yield text

chat_interface = gr.Interface(
    fn=predict,
    inputs=[
        gr.Textbox(lines=6, placeholder="Ask"),
    ],
    outputs="text",
    title="Chat with Gemma-2B-it Model, LlamaCPP",
    description=" \
    Welcome to the Gemma-2B-IT LlamaCPP Assistant, an interactive platform powered by the state-of-the-art \
    Gemma-2B-it language model, seamlessly integrated with Llama-CPP-Python.<br>" + MODEL_FILENAME,
    theme="soft",
)

chat_interface.launch()