File size: 1,475 Bytes
62c56f5
 
d15855c
 
 
 
62c56f5
 
 
 
 
 
bb85ff8
62c56f5
d15855c
 
62c56f5
 
 
5059db6
62c56f5
 
 
 
dc76d14
bb85ff8
d15855c
bb85ff8
 
5059db6
62c56f5
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
import gradio as gr
from threading import Thread
import os
from ctransformers import AutoModelForCausalLM


llm = AutoModelForCausalLM.from_pretrained("TheBloke/Mistral-7B-Instruct-v0.1-GGUF",
                                            model_file="mistral-7b-instruct-v0.1.Q4_K_M.gguf",
                                            model_type="mistral",
                                            temperature=0.7,
                                            gpu_layers=0,
                                            stream=True,
                                            threads=int(os.cpu_count()),
                                            max_new_tokens=10000)


# Function to generate model predictions.
def predict(message, history):
    history_transformer_format = history + [[message, ""]]

    # Formatting the input for the model.
    messages = "</s>".join(["</s>".join(["\n<|user|>:" + item[0], "\n<|assistant|>:" + item[1]])
                        for item in history_transformer_format])
    
    prompt = f"[INST]{messages}[/INST]"
    message_out = ""
    for text in llm(prompt=prompt):
        message_out += text
        yield message_out

# Setting up the Gradio chat interface.
gr.ChatInterface(predict,
                 title="Test Mistral 7B",
                 description="Ask Mistral any questions",
                 examples=['How to cook a fish?', 'Who is the president of US now?']
                 ).launch()  # Launching the web interface.