import gradio as gr from huggingface_hub import hf_hub_download from llama_cpp import Llama hf_hub_download(repo_id="LLukas22/gpt4all-lora-quantized-ggjt", filename="ggjt-model.bin", local_dir=".") llm = Llama(model_path="./ggjt-model.bin") ins = '''### Instruction: {} ### Response: ''' theme = gr.themes.Monochrome( primary_hue="indigo", secondary_hue="blue", neutral_hue="slate", radius_size=gr.themes.sizes.radius_sm, font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"], ) def generate(instruction): result = "" for x in llm(ins.format(instruction), stop=['### Instruction:', '### End'], stream=True): result += x['choices'][0]['text'] yield result gr.ChatInterface( fn=generate, chatbot=gr.Chatbot(height=300), textbox=gr.Textbox(placeholder="Ask me a question"), title="Healthcare Bot", description="Ask the Healthcare Bot any question", examples = [ "Give me treatments for heart disease", "I hate exercise, what else can I do to treat my high blood pressure", "How can I avoid lung disease", ], theme=theme, ).launch()