import streamlit as st from huggingface_hub import hf_hub_download from llama_cpp import Llama hf_hub_download(repo_id="LLukas22/gpt4all-lora-quantized-ggjt", filename="ggjt-model.bin", local_dir=".") llm = Llama(model_path="./ggjt-model.bin") ins = '''### Instruction: {} ### Response: ''' fixed_instruction = "You are a healthcare bot designed to give advice for the prevention and treatment of various illnesses." def respond(message): full_instruction = fixed_instruction + " " + message formatted_instruction = ins.format(full_instruction) bot_message = llm(formatted_instruction, stop=['### Instruction:', '### End']) bot_message = bot_message['choices'][0]['text'] return bot_message st.title("Healthcare Bot") # Initialize chat history if "messages" not in st.session_state: st.session_state.messages = [] # Display chat messages from history on app rerun for message in st.session_state.messages: with st.chat_message(message["role"]): st.markdown(message["content"]) # React to user input if prompt := st.chat_input("What is your question?"): # Display user message in chat message container st.chat_message("user").markdown(prompt) # Add user message to chat history st.session_state.messages.append({"role": "user", "content": prompt}) response = respond(prompt) # Display assistant response in chat message container with st.chat_message("assistant"): st.markdown(response) # Add assistant response to chat history st.session_state.messages.append({"role": "assistant", "content": response})