Spaces:

KvrParaskevi
/

Hotel-Booking-Assistant

Paused

File size: 1,502 Bytes

189a7a7
 
 
 
c1e8c7e
5b015fd
189a7a7
b58ca25
1c16e2a
189a7a7
3c353fd
 
c1e8c7e
 
 
 
2842164
054ca22
8ab4ca8
7500084
b5d12e2
 
 
 
 
 
 
 
 
 
189a7a7

import os
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationChain
import langchain.globals
from transformers import AutoModelForCausalLM, AutoTokenizer,  BitsAndBytesConfig
import streamlit as st

my_model_id = os.getenv('MODEL_REPO_ID', 'Default Value')
token = os.getenv('HUGGINGFACEHUB_API_TOKEN')

@st.cache_resource 
def load_model():
    quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    # bnb_4bit_compute_dtype=torch.bfloat16
    )
    tokenizer = AutoTokenizer.from_pretrained(my_model_id)
    model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto") #,quantization_config=quantization_config
    
    return tokenizer,model
    
@st.cache_resource 
def load_pipeline():
     pipe = pipeline("text-generation", model="KvrParaskevi/Llama-2-7b-Hotel-Booking-Model", 
        max_tokens = 50,top_k = 30, temperature = 0.1,repetition_penalty = 1.03)
    hf = HuggingFacePipeline(pipeline=pipe)
    return hf

def generate_from_pipeline(text, pipe):
    return pipe(text)

def demo_miny_memory(model):
    # llm_data = get_Model(hugging_face_key)
    memory = ConversationBufferMemory(llm = model,max_token_limit = 512)
    return memory

def demo_chain(input_text, memory,model):
    # llm_data = get_Model(hugging_face_key)
    llm_conversation = ConversationChain(llm=model,memory=memory,verbose=langchain.globals.get_verbose())

    chat_reply = llm_conversation.predict(input=input_text)
    return chat_reply