ryanrwatkins commited on
Commit
1e1ca0a
1 Parent(s): df1c955

Create app_backup.py

Browse files
Files changed (1) hide show
  1. app_backup.py +698 -0
app_backup.py ADDED
@@ -0,0 +1,698 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #https://medium.com/thedeephub/rag-chatbot-powered-by-langchain-openai-google-generative-ai-and-hugging-face-apis-6a9b9d7d59db
2
+ #https://github.com/AlaGrine/RAG_chatabot_with_Langchain/blob/main/RAG_notebook.ipynb
3
+
4
+
5
+ from langchain_community.document_loaders import (
6
+ PyPDFLoader,
7
+ TextLoader,
8
+ DirectoryLoader,
9
+ CSVLoader,
10
+ UnstructuredExcelLoader,
11
+ Docx2txtLoader,
12
+ )
13
+ from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter
14
+ import tiktoken
15
+ import gradio as gr
16
+ import csv
17
+ import os, tempfile, glob, random
18
+ from pathlib import Path
19
+ #from IPython.display import Markdown
20
+ from PIL import Image
21
+ from getpass import getpass
22
+ import numpy as np
23
+ from itertools import combinations
24
+ import pypdf
25
+ import requests
26
+
27
+
28
+
29
+ # LLM: openai and google_genai
30
+ import openai
31
+ from langchain_openai import OpenAI, OpenAIEmbeddings, ChatOpenAI
32
+ from langchain_google_genai import ChatGoogleGenerativeAI
33
+ from langchain_google_genai import GoogleGenerativeAIEmbeddings
34
+
35
+
36
+ # LLM: HuggingFace
37
+ from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings
38
+ from langchain_community.llms import HuggingFaceHub
39
+
40
+ # langchain prompts, memory, chains...
41
+ from langchain.prompts import PromptTemplate, ChatPromptTemplate
42
+ from langchain.chains import ConversationalRetrievalChain
43
+ from langchain_community.chat_message_histories import StreamlitChatMessageHistory
44
+ from operator import itemgetter
45
+ from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough
46
+ from langchain.schema import Document, format_document
47
+ from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string
48
+
49
+ from langchain_google_genai import (
50
+ ChatGoogleGenerativeAI,
51
+ HarmBlockThreshold,
52
+ HarmCategory,
53
+ )
54
+
55
+ # OutputParser
56
+ from langchain_core.output_parsers import StrOutputParser
57
+
58
+ # Chroma: vectorstore
59
+ from langchain_community.vectorstores import Chroma
60
+
61
+ # Contextual Compression
62
+ from langchain.retrievers.document_compressors import DocumentCompressorPipeline
63
+ from langchain.text_splitter import CharacterTextSplitter
64
+ from langchain_community.document_transformers import EmbeddingsRedundantFilter,LongContextReorder
65
+ from langchain.retrievers.document_compressors import EmbeddingsFilter
66
+ from langchain.retrievers import ContextualCompressionRetriever
67
+
68
+ from langchain.retrievers import ContextualCompressionRetriever
69
+ from langchain.retrievers.document_compressors import CohereRerank
70
+ from langchain_community.llms import Cohere
71
+
72
+ from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferMemory
73
+
74
+
75
+ from langchain.schema import Document
76
+
77
+
78
+ # Cohere (not currently in use)
79
+ from langchain.retrievers.document_compressors import CohereRerank
80
+ from langchain_community.llms import Cohere
81
+
82
+ # Get API keys
83
+ openai_api_key = os.environ['openai_key']
84
+ google_api_key = os.environ['gemini_key']
85
+ HF_key = os.environ['HF_token']
86
+ cohere_api_key = os.environ['cohere_api']
87
+
88
+ current_dir = os.getcwd()
89
+
90
+
91
+ # Not currently in use
92
+ prompt_templates = {"All Needs Experts": "Respond as if you are combination of all needs assessment experts."}
93
+ actor_description = {"All Needs Experts": "<div style='float: left;margin: 0px 5px 0px 5px;'><img src='https://na.weshareresearch.com/wp-content/uploads/2023/04/experts2.jpg' alt='needs expert image' style='width:70px;align:top;'></div>A combination of all needs assessment experts."}
94
+
95
+
96
+ # Initiates the UI features
97
+
98
+ def get_empty_state():
99
+ return { "messages": []}
100
+
101
+
102
+ def download_prompt_templates():
103
+ url = "https://huggingface.co/spaces/ryanrwatkins/needs/raw/main/gurus.txt"
104
+ try:
105
+ response = requests.get(url)
106
+ reader = csv.reader(response.text.splitlines())
107
+ next(reader) # skip the header row
108
+ for row in reader:
109
+ if len(row) >= 2:
110
+ act = row[0].strip('"')
111
+ prompt = row[1].strip('"')
112
+ description = row[2].strip('"')
113
+ prompt_templates[act] = prompt
114
+ actor_description[act] = description
115
+
116
+ except requests.exceptions.RequestException as e:
117
+ print(f"An error occurred while downloading prompt templates: {e}")
118
+ return
119
+
120
+ choices = list(prompt_templates.keys())
121
+ choices = choices[:1] + sorted(choices[1:])
122
+ return gr.update(value=choices[0], choices=choices)
123
+
124
+ def on_prompt_template_change(prompt_template):
125
+ if not isinstance(prompt_template, str): return
126
+ return prompt_templates[prompt_template]
127
+
128
+ def on_prompt_template_change_description(prompt_template):
129
+ if not isinstance(prompt_template, str): return
130
+ return actor_description[prompt_template]
131
+
132
+
133
+
134
+ # set to load only PDF, but could change to set to specific directory, so that other files don't get embeddings
135
+
136
+ def langchain_document_loader():
137
+ """
138
+ Load documents from the temporary directory (TMP_DIR).
139
+ Files can be in txt, pdf, CSV or docx format.
140
+ """
141
+ #current_dir = os.getcwd()
142
+ #TMP_DIR = current_dir
143
+ global documents
144
+ documents = []
145
+
146
+ """
147
+ txt_loader = DirectoryLoader(
148
+ TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True
149
+ )
150
+ documents.extend(txt_loader.load())
151
+ """
152
+ pdf_loader = DirectoryLoader(
153
+ current_dir, glob="*.pdf", loader_cls=PyPDFLoader, show_progress=True
154
+ )
155
+ documents.extend(pdf_loader.load())
156
+ """
157
+ csv_loader = DirectoryLoader(
158
+ TMP_DIR.as_posix(), glob="**/*.csv", loader_cls=CSVLoader, show_progress=True,
159
+ loader_kwargs={"encoding":"utf8"}
160
+ )
161
+ documents.extend(csv_loader.load())
162
+
163
+ doc_loader = DirectoryLoader(
164
+ #TMP_DIR.as_posix(),
165
+ current_dir,
166
+ glob="**/*.docx",
167
+ loader_cls=Docx2txtLoader,
168
+ show_progress=True,
169
+ )
170
+ documents.extend(doc_loader.load())
171
+ """
172
+ return documents
173
+ langchain_document_loader()
174
+
175
+
176
+ # Text splitting of the uploaded documents, the chunks will become vectors
177
+
178
+ text_splitter = RecursiveCharacterTextSplitter(
179
+ separators = ["\n\n", "\n", " ", ""],
180
+ chunk_size = 1500,
181
+ chunk_overlap= 200
182
+ )
183
+ chunks = text_splitter.split_documents(documents=documents)
184
+
185
+
186
+
187
+ # just FYI, does not impact anything it is just for information when re-starting the app
188
+
189
+ def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
190
+ """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
191
+ encoding = tiktoken.encoding_for_model(model) # returns the encoding used by the model.
192
+
193
+ tokens_length = [len(encoding.encode(documents[i].page_content)) for i in range(len(documents))]
194
+
195
+ return tokens_length
196
+ chunks_length = tiktoken_tokens(chunks,model="gpt-3.5-turbo")
197
+
198
+ print(f"Number of tokens - Average : {int(np.mean(chunks_length))}")
199
+ print(f"Number of tokens - 25% percentile : {int(np.quantile(chunks_length,0.25))}")
200
+ print(f"Number of tokens - 50% percentile : {int(np.quantile(chunks_length,0.5))}")
201
+ print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
202
+
203
+
204
+
205
+ # For embeddings I am just using the free HF model so others are turned off
206
+
207
+ def select_embeddings_model(LLM_service="HuggingFace"):
208
+ """Connect to the embeddings API endpoint by specifying
209
+ the name of the embedding model.
210
+ if LLM_service == "OpenAI":
211
+ embeddings = OpenAIEmbeddings(
212
+ model='text-embedding-ada-002',
213
+ api_key=openai_api_key)
214
+ """
215
+
216
+ """
217
+ if LLM_service == "Google":
218
+ embeddings = GoogleGenerativeAIEmbeddings(
219
+ model="models/embedding-001",
220
+ google_api_key=google_api_key,
221
+ )
222
+ """
223
+
224
+ if LLM_service == "HuggingFace":
225
+ embeddings = HuggingFaceInferenceAPIEmbeddings(
226
+ api_key=HF_key,
227
+ #model_name="thenlper/gte-large"
228
+ model_name="sentence-transformers/all-MiniLM-l6-v2"
229
+ )
230
+ print("embedding model selected")
231
+ return embeddings
232
+
233
+ #embeddings_OpenAI = select_embeddings_model(LLM_service="OpenAI")
234
+ #embeddings_google = select_embeddings_model(LLM_service="Google")
235
+ embeddings_HuggingFace = select_embeddings_model(LLM_service="HuggingFace")
236
+
237
+
238
+
239
+ # Creates the Database that will hold the embedding vectors
240
+ def create_vectorstore(embeddings,documents,vectorstore_name):
241
+ """Create a Chroma vector database."""
242
+ persist_directory = (current_dir + "/" + vectorstore_name)
243
+ embedding_function=embeddings
244
+ vector_store = Chroma.from_documents(
245
+ documents=documents,
246
+ embedding=embeddings,
247
+ persist_directory=persist_directory
248
+ )
249
+ print("created Chroma vector database")
250
+ return vector_store
251
+
252
+
253
+ create_vectorstores = True # change to True to create vectorstores
254
+
255
+ # Then we tell it to store the embeddings in the VectorStore (stickiong with HF for this)
256
+ if create_vectorstores:
257
+ """
258
+ vector_store_OpenAI,_ = create_vectorstore(
259
+ embeddings=embeddings_OpenAI,
260
+ documents = chunks,
261
+ vectorstore_name="Vit_All_OpenAI_Embeddings",
262
+ )
263
+ print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.")
264
+ """
265
+ """
266
+ vector_store_google,new_vectorstore_name = create_vectorstore(
267
+ embeddings=embeddings_google,
268
+ documents = chunks,
269
+ vectorstore_name="Vit_All_Google_Embeddings"
270
+ )
271
+ print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
272
+ """
273
+
274
+ vector_store_HF = create_vectorstore(
275
+ embeddings=embeddings_HuggingFace,
276
+ documents = chunks,
277
+ vectorstore_name="Vit_All_HF_Embeddings"
278
+ )
279
+ print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
280
+ print("")
281
+
282
+
283
+ # Now we tell it to keep the chromadb persistent so that it can be referenced at any time
284
+
285
+ """
286
+ vector_store_OpenAI = Chroma(
287
+ persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_OpenAI_Embeddings",
288
+ embedding_function=embeddings_OpenAI)
289
+ print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.")
290
+ """
291
+
292
+ """
293
+ vector_store_google = Chroma(
294
+ persist_directory = current_dir + "/Vit_All_Google_Embeddings",
295
+ embedding_function=embeddings_google)
296
+ print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
297
+ """
298
+
299
+ vector_store_HF = Chroma(
300
+ persist_directory = current_dir + "/Vit_All_HF_Embeddings",
301
+ embedding_function=embeddings_HuggingFace)
302
+ print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
303
+
304
+
305
+ # Now we create the code to retrieve embeddings from the vectorstore (again, sticking with HF)
306
+
307
+ def Vectorstore_backed_retriever(
308
+ vectorstore,search_type="similarity",k=10,score_threshold=None
309
+ ):
310
+ """create a vectorsore-backed retriever
311
+ Parameters:
312
+ search_type: Defines the type of search that the Retriever should perform.
313
+ Can be "similarity" (default), "mmr", or "similarity_score_threshold"
314
+ k: number of documents to return (Default: 4)
315
+ score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None)
316
+ """
317
+ print("vector_backed retriever started")
318
+ search_kwargs={}
319
+ if k is not None:
320
+ search_kwargs['k'] = k
321
+ if score_threshold is not None:
322
+ search_kwargs['score_threshold'] = score_threshold
323
+ global retriever
324
+ retriever = vectorstore.as_retriever(
325
+ search_type=search_type,
326
+ search_kwargs=search_kwargs
327
+ )
328
+ print("vector_backed retriever done")
329
+ return retriever
330
+
331
+ # similarity search
332
+ #base_retriever_OpenAI = Vectorstore_backed_retriever(vector_store_OpenAI,"similarity",k=10)
333
+ #base_retriever_google = Vectorstore_backed_retriever(vector_store_google,"similarity",k=10)
334
+ base_retriever_HF = Vectorstore_backed_retriever(vector_store_HF,"similarity",k=10)
335
+
336
+
337
+
338
+ # This next code takes the retrieved embeddings, gets rid of redundant ones, takes out non-useful information, and provides back a shorter embedding for use
339
+
340
+ def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
341
+ """Build a ContextualCompressionRetriever.
342
+ We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
343
+ The compressor here is a Document Compressor Pipeline, which splits documents
344
+ into smaller chunks, removes redundant documents, filters out the most relevant documents,
345
+ and reorder the documents so that the most relevant are at the top and bottom of the list.
346
+
347
+ Parameters:
348
+ embeddings: OpenAIEmbeddings, GoogleGenerativeAIEmbeddings or HuggingFaceInferenceAPIEmbeddings.
349
+ base_retriever: a vectorstore-backed retriever.
350
+ chunk_size (int): Documents will be splitted into smaller chunks using a CharacterTextSplitter with a default chunk_size of 500.
351
+ k (int): top k relevant chunks to the query are filtered using the EmbeddingsFilter. default =16.
352
+ similarity_threshold : minimum relevance threshold used by the EmbeddingsFilter. default =None.
353
+ """
354
+ print("compression retriever started")
355
+ # 1. splitting documents into smaller chunks
356
+ splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, separator=". ")
357
+
358
+ # 2. removing redundant documents
359
+ redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings)
360
+
361
+ # 3. filtering based on relevance to the query
362
+ relevant_filter = EmbeddingsFilter(embeddings=embeddings, k=k, similarity_threshold=similarity_threshold) # similarity_threshold and top K
363
+
364
+ # 4. Reorder the documents
365
+
366
+ # Less relevant document will be at the middle of the list and more relevant elements at the beginning or end of the list.
367
+ # Reference: https://python.langchain.com/docs/modules/data_connection/retrievers/long_context_reorder
368
+ reordering = LongContextReorder()
369
+
370
+ # 5. Create compressor pipeline and retriever
371
+
372
+ pipeline_compressor = DocumentCompressorPipeline(
373
+ transformers=[splitter, redundant_filter, relevant_filter, reordering]
374
+ )
375
+ compression_retriever = ContextualCompressionRetriever(
376
+ base_compressor=pipeline_compressor,
377
+ base_retriever=base_retriever
378
+ )
379
+ print("compression retriever done")
380
+ return compression_retriever
381
+
382
+ compression_retriever_HF = create_compression_retriever(
383
+ embeddings=embeddings_HuggingFace,
384
+ base_retriever=base_retriever_HF,
385
+ k=16)
386
+
387
+
388
+ # Can use the following to rank the returned embeddings in order of relevance but all are used anyway so I am skipping for now (can test later)
389
+
390
+ '''
391
+ def CohereRerank_retriever(
392
+ base_retriever,
393
+ cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8
394
+ ):
395
+ """Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance.
396
+ Parameters:
397
+ base_retriever: a Vectorstore-backed retriever
398
+ cohere_api_key: the Cohere API key
399
+ cohere_model: The Cohere model can be either 'rerank-english-v2.0' or 'rerank-multilingual-v2.0', with the latter being the default.
400
+ top_n: top n results returned by Cohere rerank, default = 8.
401
+ """
402
+ print("cohere rerank started")
403
+ compressor = CohereRerank(
404
+ cohere_api_key=cohere_api_key,
405
+ model=cohere_model,
406
+ top_n=top_n
407
+ )
408
+
409
+ retriever_Cohere = ContextualCompressionRetriever(
410
+ base_compressor=compressor,
411
+ base_retriever=base_retriever
412
+ )
413
+ print("cohere rerank done")
414
+ return retriever_Cohere
415
+ '''
416
+
417
+
418
+
419
+
420
+ # Can use any of these LLMs for responses, for now I am Gemini-Pro for the bot (this is for responses now, not embeddings)
421
+
422
+ def instantiate_LLM(LLM_provider,api_key,temperature=0.8,top_p=0.95,model_name=None):
423
+ """Instantiate LLM in Langchain.
424
+ Parameters:
425
+ LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"]
426
+ model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview",
427
+ "gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"].
428
+ api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token
429
+ temperature (float): Range: 0.0 - 1.0; default = 0.5
430
+ top_p (float): : Range: 0.0 - 1.0; default = 1.
431
+ """
432
+ if LLM_provider == "OpenAI":
433
+ llm = ChatOpenAI(
434
+ api_key=api_key,
435
+ model="gpt-3.5-turbo", # in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview"]
436
+ temperature=temperature,
437
+ model_kwargs={
438
+ "top_p": top_p
439
+ }
440
+ )
441
+ if LLM_provider == "Google":
442
+ llm = ChatGoogleGenerativeAI(
443
+ google_api_key=api_key,
444
+ model="gemini-pro", # "gemini-pro"
445
+ temperature=temperature,
446
+ top_p=top_p,
447
+ convert_system_message_to_human=True,
448
+ safety_settings={
449
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
450
+ HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
451
+ HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
452
+ HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
453
+ HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE},
454
+
455
+ )
456
+ if LLM_provider == "HuggingFace":
457
+ llm = HuggingFaceHub(
458
+ repo_id="mistralai/Mistral-7B-Instruct-v0.2", # "mistralai/Mistral-7B-Instruct-v0.2"
459
+ huggingfacehub_api_token=api_key,
460
+ model_kwargs={
461
+ "temperature":temperature,
462
+ "top_p": top_p,
463
+ "do_sample": True,
464
+ "max_new_tokens":1024
465
+ },
466
+ )
467
+ return llm
468
+
469
+
470
+
471
+ # This creates history (memory) of prior questions. I am using Gemini for this but I left the code if I decide to go to GPT later on.
472
+
473
+ def create_memory(model_name='gemini-pro',memory_max_token=None):
474
+ #def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
475
+ """Creates a ConversationSummaryBufferMemory for gpt-3.5-turbo.
476
+ Creates a ConversationBufferMemory for the other models."""
477
+
478
+ if model_name=="gpt-3.5-turbo":
479
+ if memory_max_token is None:
480
+ memory_max_token = 1024 # max_tokens for 'gpt-3.5-turbo' = 4096
481
+ memory = ConversationSummaryBufferMemory(
482
+ max_token_limit=memory_max_token,
483
+ llm=ChatOpenAI(model_name="gpt-3.5-turbo",openai_api_key=openai_api_key,temperature=0.1),
484
+ return_messages=True,
485
+ memory_key='chat_history',
486
+ output_key="answer",
487
+ input_key="question"
488
+ )
489
+ else:
490
+ memory = ConversationBufferMemory(
491
+ return_messages=True,
492
+ memory_key='chat_history',
493
+ output_key="answer",
494
+ input_key="question",
495
+ )
496
+ return memory
497
+
498
+ # Set a small memory_max_token, just to show how older messages are summarized if max_token_limit is exceeded.
499
+
500
+ memory = create_memory(model_name='gemini-pro',memory_max_token=None)
501
+ #memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
502
+
503
+ # save history as context for the conversation
504
+ memory.save_context(
505
+ inputs={"question":"sample"},
506
+ outputs={"answer":"sample"}
507
+ )
508
+
509
+ # loads the template above
510
+ memory.load_memory_variables({})
511
+
512
+
513
+ # Create the prompt template for the conversation
514
+
515
+ standalone_question_template = """Given the following conversation and a follow up question,
516
+ rephrase the follow up question to be a standalone question, in the English language.\n\n
517
+ Chat History:\n{chat_history}\n
518
+ Follow Up Input: {question}\n
519
+ Standalone question: {question}"""
520
+
521
+ #standalone_question_prompt = PromptTemplate(
522
+ # input_variables=['chat_history', 'question'],
523
+ # template=standalone_question_template
524
+ #)
525
+
526
+
527
+ def answer_template(language="english"):
528
+ """Pass the standalone question along with the chat history and context
529
+ to the `LLM` which will answer"""
530
+
531
+ template = f"""You are a professor who is an expert in needs assessment.
532
+ Answer the question at the end (convert the queestion to {language} language if it is not). But do not include the question in the response.
533
+ Use only the following context (delimited by <context></context>) in responding to the question.
534
+ Be polite and end by asking if you can answer any other questions.
535
+ If you can't answer the question, then you should say that it is not within your knowledge base and that you can only answer needs assessment related questions.
536
+ Your answer must be in the language at the end.
537
+
538
+ <context>
539
+ {{chat_history}}
540
+
541
+ {{context}}
542
+
543
+ </context>
544
+
545
+ Question: {{question}}
546
+ Language: {language}.
547
+
548
+ """
549
+ return template
550
+
551
+ answer_prompt = ChatPromptTemplate.from_template(answer_template())
552
+
553
+
554
+
555
+ # This begins the whole process and gives the parameters
556
+
557
+ chain = ConversationalRetrievalChain.from_llm(
558
+ condense_question_prompt=PromptTemplate(
559
+ input_variables=['chat_history', 'question'],
560
+ template=standalone_question_template),
561
+ combine_docs_chain_kwargs={'prompt': answer_prompt},
562
+ condense_question_llm=instantiate_LLM(
563
+ LLM_provider="Google",api_key=google_api_key,temperature=0.3,
564
+ model_name="gemini-pro"),
565
+ memory=create_memory("gemini-pro"),
566
+ retriever = compression_retriever_HF,
567
+ #retriever = base_retriever_HF, #base_retriever_HF
568
+ llm=instantiate_LLM(
569
+ LLM_provider="Google",api_key=google_api_key,temperature=0.8,
570
+ model_name="gemini-pro"),
571
+ chain_type= "stuff",
572
+ verbose= True,
573
+ return_source_documents=True
574
+ )
575
+
576
+
577
+
578
+ # This below is for the interface
579
+
580
+ def submit_message(prompt, prompt_template, temperature, max_tokens, context_length, state):
581
+
582
+
583
+ history = state['messages']
584
+
585
+ # this could be used later if I want to let users set it to different experts and use different documents based on preferred expert
586
+ #global prompt_template_name
587
+ #prompt_template_name = prompt_template
588
+ #print(prompt_template) # prints who is responding if I move to multiple experts
589
+ #print(prompt_templates[prompt_template])
590
+
591
+
592
+
593
+ completion = chain.invoke({"question":prompt})
594
+
595
+ chain.memory.load_memory_variables({})
596
+
597
+ get_empty_state()
598
+
599
+ state['content'] = completion
600
+
601
+ #state.append(completion.copy())
602
+
603
+ completion = { "content": completion }
604
+
605
+ print("Prompt/question:", prompt)
606
+ answer = completion['content']['answer']
607
+ print("Answer:", answer)
608
+ print("Embeddings utlized:")
609
+
610
+ for document in completion['content']['source_documents']:
611
+ page_content = document.page_content # Use dot notation to access an attribute
612
+ print("Embedding_content:", page_content)
613
+ metadata = document.metadata # Use dot notation to access an attribute
614
+ print("Metadata:", metadata)
615
+ similarity_score = document.state['query_similarity_score']
616
+ print("Similarity_score:", similarity_score)
617
+ print("")
618
+
619
+ highest_similarity_score = -1 # Initialize with a score lower than possible
620
+ selected_document = None # To hold the document with the highest similarity score
621
+
622
+ for document in completion['content']['source_documents']:
623
+ if document.state['query_similarity_score'] > highest_similarity_score:
624
+ highest_similarity_score = document.state['query_similarity_score']
625
+ selected_document = document
626
+
627
+ if selected_document is not None:
628
+ # Remove the "/home/user/app/" part from the document name
629
+ modified_source = selected_document.metadata['source'].replace('/home/user/app/', '').replace('.pdf', '')
630
+ source_info = f"\n**Lead source:** {modified_source}, **Page:** {selected_document.metadata['page']} "
631
+ else:
632
+ source_info = "Lead source: not determined"
633
+
634
+ #chat_messages = [(prompt_msg['content'], completion['content'])]
635
+ chat_messages = [(prompt, completion['content']['answer'] + source_info )]
636
+ return '', chat_messages, state # total_tokens_used_msg,
637
+
638
+
639
+ def clear_conversation():
640
+ return gr.update(value=None, visible=True), None, "", get_empty_state()
641
+
642
+
643
+
644
+
645
+ css = """
646
+ #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
647
+ #chatbox {min-height: 400px;}
648
+ #header {text-align: center;}
649
+ #prompt_template_preview {padding: 1em; border-width: 1px; border-style: solid; border-color: #e0e0e0; border-radius: 4px; min-height: 150px;}
650
+ #total_tokens_str {text-align: right; font-size: 0.8em; color: #666;}
651
+ #label {font-size: 0.8em; padding: 0.5em; margin: 0;}
652
+ .message { font-size: 1.2em; }
653
+ """
654
+
655
+ with gr.Blocks(css=css) as demo:
656
+
657
+ state = gr.State(get_empty_state())
658
+
659
+
660
+ with gr.Column(elem_id="col-container"):
661
+
662
+
663
+ gr.Markdown("""## Ask questions of our *needs assessment* bot! \n
664
+ **It is specially trained to only answer needs assessment related questions.**
665
+ """ ,
666
+ elem_id="header")
667
+
668
+
669
+ with gr.Row():
670
+ with gr.Column():
671
+ chatbot = gr.Chatbot(elem_id="chatbox")
672
+ input_message = gr.Textbox(show_label=False, placeholder="Enter your needs assessment question", visible=True).style(container=False)
673
+
674
+ btn_submit = gr.Button("Submit")
675
+ #total_tokens_str = gr.Markdown(elem_id="total_tokens_str")
676
+ btn_clear_conversation = gr.Button("Start New Conversation", visible=False)
677
+
678
+
679
+ with gr.Column(visible=False):
680
+ prompt_template = gr.Dropdown(label="Choose an Expert:", choices=list(prompt_templates.keys()))
681
+ prompt_template_preview = gr.Markdown(elem_id="prompt_template_preview")
682
+ with gr.Accordion("Advanced parameters", open=False):
683
+ temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, label="Flexibility", info="Higher = More AI, Lower = More Expert")
684
+ max_tokens = gr.Slider(minimum=100, maximum=400, value=200, step=1, label="Length of Response.")
685
+ context_length = gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Context Length", info="Number of previous questions you have asked.")
686
+
687
+
688
+ btn_submit.click(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot, state])
689
+ input_message.submit(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot, state])
690
+ btn_clear_conversation.click(clear_conversation, [], [input_message, chatbot, state])
691
+ prompt_template.change(on_prompt_template_change_description, inputs=[prompt_template], outputs=[prompt_template_preview])
692
+
693
+
694
+ demo.load(download_prompt_templates, inputs=None, outputs=[prompt_template], queur=False)
695
+
696
+
697
+ demo.queue(concurrency_count=10)
698
+ demo.launch(height='800px')