KvrParaskevi's picture
Update chatbot.py
b5d12e2 verified
raw
history blame
No virus
1.5 kB
import os
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
import langchain.globals
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import streamlit as st
my_model_id = os.getenv('MODEL_REPO_ID', 'Default Value')
token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
@st.cache_resource
def load_model():
quantization_config = BitsAndBytesConfig(
load_in_8bit=True,
# bnb_4bit_compute_dtype=torch.bfloat16
)
tokenizer = AutoTokenizer.from_pretrained(my_model_id)
model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto") #,quantization_config=quantization_config
return tokenizer,model
@st.cache_resource
def load_pipeline():
pipe = pipeline("text-generation", model="KvrParaskevi/Llama-2-7b-Hotel-Booking-Model",
max_tokens = 50,top_k = 30, temperature = 0.1,repetition_penalty = 1.03)
hf = HuggingFacePipeline(pipeline=pipe)
return hf
def generate_from_pipeline(text, pipe):
return pipe(text)
def demo_miny_memory(model):
# llm_data = get_Model(hugging_face_key)
memory = ConversationBufferMemory(llm = model,max_token_limit = 512)
return memory
def demo_chain(input_text, memory,model):
# llm_data = get_Model(hugging_face_key)
llm_conversation = ConversationChain(llm=model,memory=memory,verbose=langchain.globals.get_verbose())
chat_reply = llm_conversation.predict(input=input_text)
return chat_reply