import gradio as gr
from huggingface_hub import hf_hub_download
from llama_cpp import Llama

hf_hub_download(repo_id="LLukas22/gpt4all-lora-quantized-ggjt", filename="ggjt-model.bin", local_dir=".")
llm = Llama(model_path="./ggjt-model.bin")

ins = '''### Instruction:
{}
### Response:
'''

theme = gr.themes.Monochrome(
    primary_hue="indigo",
    secondary_hue="blue",
    neutral_hue="slate",
    radius_size=gr.themes.sizes.radius_sm,
    font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
)

def generate(instruction): 
    result = ""
    for x in llm(ins.format(instruction), stop=['### Instruction:', '### End'], stream=True):
        result += x['choices'][0]['text']
        yield result

gr.ChatInterface(
    fn=generate,
    chatbot=gr.Chatbot(height=300),
    textbox=gr.Textbox(placeholder="Ask me a question"),
    title="Healthcare Bot",
    description="Ask the Healthcare Bot any question",
    examples = [
        "Give me treatments for heart disease",
        "I hate exercise, what else can I do to treat my high blood pressure",
        "How can I avoid lung disease",
    ],
    theme=theme,
).launch()