dinhquangson commited on
Commit
0c7d70e
1 Parent(s): 4f0218b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -20
app.py CHANGED
@@ -17,8 +17,8 @@ from langchain.chat_models import ChatOpenAI
17
  from langchain.memory import ConversationBufferMemory
18
  from langchain.chains import ConversationalRetrievalChain
19
  from htmlTemplates import css, bot_template, user_template
20
- #from langchain.llms import HuggingFaceHub
21
- from llama_index.llms import LlamaCPP
22
 
23
 
24
 
@@ -110,27 +110,11 @@ def get_conversation_chain(vectorstore):
110
  ConversationalRetrievalChain
111
  A conversational retrieval chain for generating responses.
112
 
113
-
114
  llm = HuggingFaceHub(
115
- repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
116
  model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
117
  )
118
- """
119
- llm = LlamaCPP(
120
- model_url=None, # We'll load locally.
121
- # Trying small version of an already small model
122
- model_path='phi-2.Q4_K_M.gguf',
123
- temperature=0.1,
124
- max_new_tokens=512,
125
- context_window=2048, # Phi-2 2K context window - this could be a limitation for RAG as it has to put the content into this context window
126
- generate_kwargs={},
127
- # set to at least 1 to use GPU
128
- # This is small model and there's no indication of layers offloaded to the GPU
129
- model_kwargs={"n_gpu_layers": 32},
130
- messages_to_prompt=messages_to_prompt,
131
- completion_to_prompt=completion_to_prompt,
132
- verbose=True
133
- )
134
 
135
  # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
136
 
 
17
  from langchain.memory import ConversationBufferMemory
18
  from langchain.chains import ConversationalRetrievalChain
19
  from htmlTemplates import css, bot_template, user_template
20
+ from langchain.llms import HuggingFaceHub
21
+ #from llama_index.llms import LlamaCPP
22
 
23
 
24
 
 
110
  ConversationalRetrievalChain
111
  A conversational retrieval chain for generating responses.
112
 
113
+ """
114
  llm = HuggingFaceHub(
115
+ repo_id="mlabonne/phixtral-2x2_8",
116
  model_kwargs={"temperature": 0.5, "max_new_tokens": 1024, "max_length": 1048, "top_k": 3, "trust_remote_code": True, "torch_dtype": "auto"},
117
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
  # llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-0613")
120