prithivida commited on
Commit
0adbdea
1 Parent(s): 05d1ddf

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +46 -0
README.md CHANGED
@@ -86,6 +86,52 @@ Full set of evaluation numbers for our model
86
  #### With Sentence Transformers:
87
 
88
  ```python
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  ```
91
 
 
86
  #### With Sentence Transformers:
87
 
88
  ```python
89
+ from sentence_transformers import SentenceTransformer
90
+ import scipy.spatial
91
+
92
+
93
+ model = SentenceTransformer('prithivida/miniMiracle_zh_v1')
94
+
95
+ corpus = [
96
+ '一个男人正在吃东西',
97
+ '人们正在吃一块面包',
98
+ '女孩抱着婴儿',
99
+ '一个男人正在骑马',
100
+ '一个女人正在弹吉他',
101
+ '两个人推着马车穿过树林',
102
+ '一个人骑着一匹白马在一个封闭的田野里',
103
+ '一只猴子在打鼓',
104
+ '一只猎豹正在猎物后面奔跑',
105
+ '他们享受了一顿美味的盛宴'
106
+ ]
107
+
108
+ queries = [
109
+ '一个人在吃意大利面',
110
+ '一个穿着大猩猩服装的人在打鼓'
111
+ ]
112
+
113
+
114
+ corpus_embeddings = model.encode(corpus)
115
+ query_embeddings = model.encode(queries)
116
+
117
+ # Find the closest 3 sentences of the corpus for each query sentence based on cosine similarity
118
+ closest_n = 3
119
+ for query, query_embedding in zip(queries, query_embeddings):
120
+ distances = scipy.spatial.distance.cdist([query_embedding], corpus_embeddings, "cosine")[0]
121
+
122
+ results = zip(range(len(distances)), distances)
123
+ results = sorted(results, key=lambda x: x[1])
124
+
125
+ print("\n======================\n")
126
+ print("Query:", query)
127
+ print("\nTop 3 most similar sentences in corpus:\n")
128
+
129
+ for idx, distance in results[0:closest_n]:
130
+ print(corpus[idx].strip(), "(Score: %.4f)" % (1-distance))
131
+
132
+ # Optional: How to quantize the embeddings
133
+ # binary_embeddings = quantize_embeddings(embeddings, precision="ubinary")
134
+
135
 
136
  ```
137