Med Tiouti commited on
Commit
5491a72
1 Parent(s): 1a66660

Test8326832

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ faiss_index_shl/index.faiss filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -7,6 +7,9 @@ sdk: gradio
7
  sdk_version: 4.14.0
8
  app_file: app.py
9
  pinned: false
 
 
 
10
  ---
11
 
12
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
7
  sdk_version: 4.14.0
8
  app_file: app.py
9
  pinned: false
10
+ models:
11
+ - sentence-transformers/all-MiniLM-L6-v2
12
+ - daryl149/llama-2-7b-chat-hf
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ # retrievers
3
+ from langchain.chains import RetrievalQA
4
+
5
+ import textwrap
6
+ import time
7
+
8
+ import torch
9
+ import transformers
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
11
+
12
+ # models
13
+ from langchain.llms import HuggingFacePipeline
14
+ from InstructorEmbedding import INSTRUCTOR
15
+ from langchain.embeddings import HuggingFaceInstructEmbeddings
16
+
17
+ # prompts
18
+ from langchain import PromptTemplate, LLMChain
19
+
20
+ # vector stores
21
+ from langchain.vectorstores import FAISS
22
+
23
+
24
+ def get_model(model_name):
25
+ model_repo = 'daryl149/llama-2-7b-chat-hf'
26
+
27
+ tokenizer = AutoTokenizer.from_pretrained(model_repo, use_fast=True)
28
+
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ model_repo,
31
+ load_in_4bit=True,
32
+ device_map='auto',
33
+ torch_dtype=torch.float16,
34
+ low_cpu_mem_usage=True,
35
+ trust_remote_code=True
36
+ )
37
+ max_len = 2048
38
+
39
+ return tokenizer,model,max_len
40
+
41
+
42
+ tokenizer, model, max_len = get_model("llama2-13b")
43
+
44
+
45
+
46
+ temperature = 0,
47
+ top_p = 0.95,
48
+ repetition_penalty = 1.15
49
+
50
+ pipe = pipeline(
51
+ task = "text-generation",
52
+ model = model,
53
+ tokenizer = tokenizer,
54
+ pad_token_id = tokenizer.eos_token_id,
55
+ max_length = max_len,
56
+ temperature = temperature,
57
+ top_p = top_p,
58
+ repetition_penalty = repetition_penalty
59
+ )
60
+
61
+ llm = HuggingFacePipeline(pipeline = pipe)
62
+
63
+
64
+
65
+
66
+ # similar passages
67
+ k = 3
68
+
69
+
70
+
71
+ embeddings_shl_path ="/content/faiss_index_shl"
72
+ embeddings_model_repo = 'sentence-transformers/all-MiniLM-L6-v2'
73
+ ### download embeddings model
74
+ embeddings = HuggingFaceInstructEmbeddings(
75
+ model_name = embeddings_model_repo,
76
+ model_kwargs = {"device": "cuda"}
77
+ )
78
+
79
+ ### load vector DB embeddings
80
+ vectordb = FAISS.load_local(
81
+ embeddings_shl_path,
82
+ embeddings
83
+ )
84
+
85
+
86
+ prompt_template = """
87
+ Don't try to make up an answer, if you don't know just say that you don't know.
88
+ Answer in the same language the question was asked.
89
+ Don't mention in the answer the speaker just give the answer directly.
90
+ Use only the following pieces of context to answer the question at the end.
91
+
92
+ {context}
93
+
94
+ Question: {question}
95
+ Answer:"""
96
+
97
+
98
+ PROMPT = PromptTemplate(
99
+ template = prompt_template,
100
+ input_variables = ["context", "question"]
101
+ )
102
+
103
+ retriever = vectordb.as_retriever(search_kwargs = {"k": 3, "search_type" : "similarity"})
104
+
105
+ qa_chain = RetrievalQA.from_chain_type(
106
+ llm = llm,
107
+ chain_type = "stuff", # map_reduce, map_rerank, stuff, refine
108
+ retriever = retriever,
109
+ chain_type_kwargs = {"prompt": PROMPT},
110
+ return_source_documents = True,
111
+ verbose = False
112
+ )
113
+
114
+ def wrap_text_preserve_newlines(text, width=700):
115
+ # Split the input text into lines based on newline characters
116
+ lines = text.split('\n')
117
+
118
+ # Wrap each line individually
119
+ wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
120
+
121
+ # Join the wrapped lines back together using newline characters
122
+ wrapped_text = '\n'.join(wrapped_lines)
123
+
124
+ return wrapped_text
125
+
126
+ def process_llm_response(llm_response):
127
+ ans = wrap_text_preserve_newlines(llm_response['result'])
128
+
129
+ sources_used = ' \n'.join(
130
+ [
131
+ "<b> - " + source.metadata['source'].split('/')[-1][:-4] + "</b>"
132
+ for source in llm_response['source_documents']
133
+ ]
134
+ )
135
+
136
+ ans += "\n Sand Hill Road podcast episodes based on your question : \n" + sources_used
137
+ return ans,sources_used
138
+
139
+ def llm_ans(query):
140
+ start = time.time()
141
+ llm_response = qa_chain(query)
142
+ ans,sources_used = process_llm_response(llm_response)
143
+ end = time.time()
144
+
145
+ time_elapsed = int(round(end - start, 0))
146
+ time_elapsed_str = f'\n\nTime elapsed: {time_elapsed} s'
147
+ return ans, sources_used ,time_elapsed_str
148
+
149
+
150
+ def predict(message, history):
151
+ # output = message # debug mode
152
+
153
+ output = str(llm_ans(message)[0]).replace("\n", "<br/>")
154
+ return output
155
+
156
+ demo = gr.ChatInterface(
157
+ predict,
158
+ title = f' Sand Hill Road Podcast Chatbot'
159
+ )
160
+
161
+ demo.queue()
162
+ demo.launch(debug=True)
faiss_index_shl/index.faiss ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:491d00dbb83399b1954976033a584b7f6d92c631d182662ae4ce6a7fbea4acb5
3
+ size 4389933
faiss_index_shl/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b02757d1a2c734a71e2b08b4569007dfd66dab3fbec3d2590a0b63c55a7dabf7
3
+ size 2337485
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ langchain
2
+ faiss-gpu
3
+ transformers
4
+ InstructorEmbedding
5
+ sentence_transformers
6
+ accelerate
7
+ bitsandbytes
8
+ xformers
9
+ einops