File size: 3,040 Bytes
189a7a7
7ee74de
2adda29
189a7a7
 
22ad1b8
ee49276
2079e61
63159de
189a7a7
91dd7f4
1c16e2a
189a7a7
5b78f45
63159de
 
 
 
 
 
 
 
 
 
5b78f45
 
 
0d8df02
63159de
5b78f45
 
9f71064
5b78f45
63159de
7ee74de
 
3c353fd
c1e8c7e
 
 
 
2842164
63159de
8ab4ca8
7500084
b5d12e2
7ee74de
 
b5d12e2
63159de
 
 
 
0989a68
5c5dbcf
bf4d664
5c5dbcf
7c912db
bf4d664
 
63159de
7039182
63159de
b5d12e2
63159de
 
189a7a7
63159de
 
 
f8d16dc
85426f6
189a7a7
 
7ee74de
f8d16dc
5b78f45
 
63159de
 
5b78f45
c33f2c8
dc4a656
56397d8
63159de
189a7a7
85426f6
 
 
 
6704e38
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
import os
import spaces
from langchain.memory import ConversationBufferMemory,ConversationSummaryBufferMemory
from langchain.chains import ConversationChain
import langchain.globals
from langchain.prompts import PromptTemplate, ChatPromptTemplate
from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer,  BitsAndBytesConfig, pipeline
#import streamlit as st

my_model_id = os.getenv('MODEL_REPO_ID', 'Default Value')
token = os.getenv('HUGGINGFACEHUB_API_TOKEN')

template = """You are an AI having conversation with a human. Below is an instruction that describes a task. 
Write a response that appropriately completes the request.
Reply with the most helpful and logic answer. During the conversation you need to ask the user 
the following questions to complete the hotel booking task.
1) Where would you like to stay and when?
2) How many people are staying in the room?
3) Do you prefer any ammenities like breakfast included or gym?
4) What is your name, your email address and phone number? 
Make sure you receive a logical answer from the user from every question to complete the hotel 
booking process.

Relevant Information:


{history}

Current Conversation:

Human: {input}
AI:"""

#@st.cache_resource
@spaces.GPU
def load_model():
    quantization_config = BitsAndBytesConfig(
    load_in_8bit=True,
    # bnb_4bit_compute_dtype=torch.bfloat16
    )
    tokenizer = AutoTokenizer.from_pretrained(my_model_id)
    model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto",quantization_config=quantization_config) #
    
    return tokenizer,model
    
#@st.cache_resource
@spaces.GPU
def load_pipeline():
    tokenizer, model = load_model()
    pipe = pipeline("text-generation", 
                    model= model, 
                    tokenizer = tokenizer,
                    #max_new_tokens = 50,
                    top_k = 30, 
                    top_p = 0.7,
                    early_stopping=True,
                    num_beams = 2,
                    temperature = 0.05,
                    repetition_penalty = 1.05)

    llm = HuggingFacePipeline(pipeline = pipe)
    return llm

# def generate_from_pipeline(text, pipe):
#     return pipe(text)

llm = load_pipeline()

def demo_miny_memory():
    #prompt = ChatPromptTemplate.from_template(template)
    memory = ConversationSummaryBufferMemory(llm = llm, memory_key = "history")
    return memory

@spaces.GPU
def demo_chain(input_text,history):
    #PROMPT = ChatPromptTemplate.from_template(template)
    PROMPT = PromptTemplate(template=template, input_variables=["history", "input"])
    conversation = ConversationChain(
        llm=llm,
        prompt=PROMPT,
        #verbose=langchain.globals.get_verbose(),
        verbose=True,
        memory=demo_miny_memory()
    )

    chat_reply = conversation.invoke({
        "input" : input_text,
        "history" : history
    }, return_only_outputs=True)
    return chat_reply #['response'].split('AI:')[-1]