dinhquangson commited on
Commit
7dbf133
1 Parent(s): 6fb7c86

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -2
app.py CHANGED
@@ -17,7 +17,9 @@ from langchain.chat_models import ChatOpenAI
17
  from langchain.memory import ConversationBufferMemory
18
  from langchain.chains import ConversationalRetrievalChain
19
  from htmlTemplates import css, bot_template, user_template
20
- from langchain.llms import HuggingFaceHub
 
 
21
 
22
 
23
  def get_pdf_pages(pdf_docs):
@@ -108,11 +110,28 @@ def get_conversation_chain(vectorstore):
108
  ConversationalRetrievalChain
109
  A conversational retrieval chain for generating responses.
110
 
111
- """
112
  llm = HuggingFaceHub(
113
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
114
  model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
115
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
116
  # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
117
 
118
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
 
17
  from langchain.memory import ConversationBufferMemory
18
  from langchain.chains import ConversationalRetrievalChain
19
  from htmlTemplates import css, bot_template, user_template
20
+ #from langchain.llms import HuggingFaceHub
21
+ from llama_index.llms import LlamaCPP
22
+
23
 
24
 
25
  def get_pdf_pages(pdf_docs):
 
110
  ConversationalRetrievalChain
111
  A conversational retrieval chain for generating responses.
112
 
113
+
114
  llm = HuggingFaceHub(
115
  repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
116
  model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
117
  )
118
+ """
119
+ llm = LlamaCPP(
120
+ model_url=None, # We'll load locally.
121
+ # Trying small version of an already small model
122
+ model_path='phi-2.Q4_K_M.gguf',
123
+ temperature=0.1,
124
+ max_new_tokens=512,
125
+ context_window=2048, # Phi-2 2K context window - this could be a limitation for RAG as it has to put the content into this context window
126
+ generate_kwargs={},
127
+ # set to at least 1 to use GPU
128
+ # This is small model and there's no indication of layers offloaded to the GPU
129
+ model_kwargs={"n_gpu_layers": 32},
130
+ messages_to_prompt=messages_to_prompt,
131
+ completion_to_prompt=completion_to_prompt,
132
+ verbose=True
133
+ )
134
+
135
  # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
136
 
137
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)