KvrParaskevi commited on
Commit
63159de
1 Parent(s): 6afafbb

Update chatbot.py

Browse files
Files changed (1) hide show
  1. chatbot.py +51 -18
chatbot.py CHANGED
@@ -2,41 +2,74 @@ import os
2
  from langchain.memory import ConversationBufferMemory
3
  from langchain.chains import ConversationChain
4
  import langchain.globals
 
5
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
6
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
7
- import streamlit as st
8
 
9
- my_model_id = os.getenv('MODEL_REPO_ID', 'Default Value')
10
  token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
11
 
12
- @st.cache_resource
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  def load_model():
14
  quantization_config = BitsAndBytesConfig(
15
  load_in_8bit=True,
16
  # bnb_4bit_compute_dtype=torch.bfloat16
17
  )
18
  tokenizer = AutoTokenizer.from_pretrained(my_model_id)
19
- model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto") #,quantization_config=quantization_config
20
 
21
  return tokenizer,model
22
 
23
- @st.cache_resource
24
  def load_pipeline():
25
- pipe = pipeline("text-generation", model="KvrParaskevi/Llama-2-7b-Hotel-Booking-Model", max_tokens = 50,top_k = 30, temperature = 0.1,repetition_penalty = 1.03)
26
- hf = HuggingFacePipeline(pipeline=pipe)
27
- return hf
 
 
 
 
 
 
28
 
29
- def generate_from_pipeline(text, pipe):
30
- return pipe(text)
31
 
32
- def demo_miny_memory(model):
33
- # llm_data = get_Model(hugging_face_key)
34
- memory = ConversationBufferMemory(llm = model,max_token_limit = 512)
 
 
35
  return memory
36
 
37
- def demo_chain(input_text, memory,model):
38
- # llm_data = get_Model(hugging_face_key)
39
- llm_conversation = ConversationChain(llm=model,memory=memory,verbose=langchain.globals.get_verbose())
 
 
 
40
 
41
- chat_reply = llm_conversation.predict(input=input_text)
42
- return chat_reply
 
2
  from langchain.memory import ConversationBufferMemory
3
  from langchain.chains import ConversationChain
4
  import langchain.globals
5
+ from langchain.prompts import PromptTemplate
6
  from langchain_community.llms.huggingface_pipeline import HuggingFacePipeline
7
  from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline
8
+ #import streamlit as st
9
 
10
+ my_model_id = "KvrParaskevi/Llama-2-7b-Hotel-Booking-Model" #os.getenv('MODEL_REPO_ID', 'Default Value')
11
  token = os.getenv('HUGGINGFACEHUB_API_TOKEN')
12
 
13
+ template = """<<SYS>>
14
+ You are an AI having conversation with a human. Below is an instruction that describes a task.
15
+ Write a response that appropriately completes the request.
16
+ Reply with the most helpful and logic answer. During the conversation you need to ask the user
17
+ the following questions to complete the hotel booking task.
18
+
19
+ 1) Where would you like to stay and when?
20
+ 2) How many people are staying in the room?
21
+ 3) Do you prefer any ammenities like breakfast included or gym?
22
+ 4) What is your name, your email address and phone number?
23
+
24
+ Make sure you receive a logical answer from the user from every question to complete the hotel
25
+ booking process.
26
+ <</SYS>>
27
+
28
+ Previous conversation:
29
+ {chat_history}
30
+
31
+ Human: {question}
32
+ AI:"""
33
+
34
+ #@st.cache_resource
35
  def load_model():
36
  quantization_config = BitsAndBytesConfig(
37
  load_in_8bit=True,
38
  # bnb_4bit_compute_dtype=torch.bfloat16
39
  )
40
  tokenizer = AutoTokenizer.from_pretrained(my_model_id)
41
+ model = AutoModelForCausalLM.from_pretrained(my_model_id, device_map="auto",quantization_config=quantization_config) #
42
 
43
  return tokenizer,model
44
 
45
+ #@st.cache_resource
46
  def load_pipeline():
47
+ tokenizer, model = load_model()
48
+ pipe = pipeline("text-generation",
49
+ model= model,
50
+ tokenizer = tokenizer,
51
+ max_tokens = 50,top_k = 30, early_stopping=True,
52
+ temperature = 0.1,repetition_penalty = 1.03)
53
+
54
+ llm = HuggingFacePipeline(pipeline=pipe)
55
+ return llm
56
 
57
+ # def generate_from_pipeline(text, pipe):
58
+ # return pipe(text)
59
 
60
+ llm = load_pipeline()
61
+
62
+ def demo_miny_memory():
63
+ prompt = PromptTemplate.from_template(template)
64
+ memory = ConversationBufferMemory(memory_key="chat_history", llm = llm)
65
  return memory
66
 
67
+ def demo_chain(input_text):
68
+ conversation = ConversationChain(
69
+ llm=llm,
70
+ verbose=True,
71
+ memory=demo_miny_memory()
72
+ )
73
 
74
+ chat_reply = conversation(input=input_text)
75
+ return chat_reply['response']