Tonic commited on
Commit
89a387a
β€’
1 Parent(s): b3be2a9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -0
app.py CHANGED
@@ -78,6 +78,7 @@ def embedding_worker():
78
 
79
  embedding_response_queue.put(formatted_response)
80
  embedding_request_queue.task_done()
 
81
 
82
  threading.Thread(target=embedding_worker, daemon=True).start()
83
 
@@ -100,6 +101,7 @@ def compute_embeddings(selected_task, input_text):
100
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
101
  embeddings = F.normalize(embeddings, p=2, dim=1)
102
  embeddings_list = embeddings.detach().cpu().numpy().tolist()
 
103
  return embeddings_list
104
 
105
  @spaces.GPU
@@ -130,6 +132,7 @@ def compute_similarity(selected_task, sentence1, sentence2, extra_sentence1, ext
130
  free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
131
 
132
  similarity_scores = {"Similarity 1-2": similarity1, "Similarity 1-3": similarity2, "Similarity 1-4": similarity3}
 
133
  return similarity_scores
134
 
135
  @spaces.GPU
@@ -138,6 +141,7 @@ def compute_cosine_similarity(emb1, emb2):
138
  tensor2 = torch.tensor(emb2).to(device).half()
139
  similarity = F.cosine_similarity(tensor1, tensor2).item()
140
  free_memory(tensor1, tensor2)
 
141
  return similarity
142
 
143
 
@@ -153,6 +157,7 @@ def compute_embeddings_batch(input_texts):
153
  outputs = model(**batch_dict)
154
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
155
  embeddings = F.normalize(embeddings, p=2, dim=1)
 
156
  return embeddings.detach().cpu().numpy()
157
 
158
  def semantic_search(query_embedding, corpus_embeddings, top_k=5):
@@ -188,6 +193,7 @@ def generate_and_format_embeddings(selected_task, input_text):
188
  embedding_request_queue.put((selected_task, input_text))
189
  response = embedding_response_queue.get()
190
  embedding_response_queue.task_done()
 
191
  return response
192
 
193
 
 
78
 
79
  embedding_response_queue.put(formatted_response)
80
  embedding_request_queue.task_done()
81
+ clear_cuda_cache()
82
 
83
  threading.Thread(target=embedding_worker, daemon=True).start()
84
 
 
101
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
102
  embeddings = F.normalize(embeddings, p=2, dim=1)
103
  embeddings_list = embeddings.detach().cpu().numpy().tolist()
104
+ clear_cuda_cache()
105
  return embeddings_list
106
 
107
  @spaces.GPU
 
132
  free_memory(embeddings1, embeddings2, embeddings3, embeddings4)
133
 
134
  similarity_scores = {"Similarity 1-2": similarity1, "Similarity 1-3": similarity2, "Similarity 1-4": similarity3}
135
+ clear_cuda_cache()
136
  return similarity_scores
137
 
138
  @spaces.GPU
 
141
  tensor2 = torch.tensor(emb2).to(device).half()
142
  similarity = F.cosine_similarity(tensor1, tensor2).item()
143
  free_memory(tensor1, tensor2)
144
+ clear_cuda_cache()
145
  return similarity
146
 
147
 
 
157
  outputs = model(**batch_dict)
158
  embeddings = last_token_pool(outputs.last_hidden_state, batch_dict['attention_mask'])
159
  embeddings = F.normalize(embeddings, p=2, dim=1)
160
+ clear_cuda_cache()
161
  return embeddings.detach().cpu().numpy()
162
 
163
  def semantic_search(query_embedding, corpus_embeddings, top_k=5):
 
193
  embedding_request_queue.put((selected_task, input_text))
194
  response = embedding_response_queue.get()
195
  embedding_response_queue.task_done()
196
+ clear_cuda_cache()
197
  return response
198
 
199