gmshroff commited on
Commit
80732d4
1 Parent(s): 274ad4c

added sentence-transformers encode service

Browse files
Files changed (4) hide show
  1. app.py +12 -23
  2. app_gradio.py +0 -49
  3. requirements.txt +1 -0
  4. scripts/update_valdata.py +0 -35
app.py CHANGED
@@ -11,9 +11,19 @@ from background_service import BackgroundTaskService
11
  # anvil.server.connect('PLMOIU5VCGGUOJH2XORIBWV3-ZXZVFLWX7QFIIAF4')
12
  anvil.server.connect("S3SLHUQ2BB33NVTP7FWRAOHS-NDSRD7CDALRPSPLL")
13
 
 
 
 
 
 
 
 
 
 
 
14
  app=Flask(__name__)
15
- MESSAGED={'title':'API Server',
16
- 'messageL':['published server functions:','encode(text)',
17
  'call_gemini(text,key)','call_gpt(text,key,model)',
18
  'task_id<=launch(func_name,*args)','poll(task_id)']}
19
 
@@ -89,27 +99,6 @@ def encode():
89
  embedding=emb_array.tolist()
90
  return jsonify({'embedding': embedding})
91
 
92
- @app.route("/file/<string:filename>")
93
- def return_file(filename):
94
- return send_file('./data/'+filename)
95
-
96
- @app.route('/run',methods=['GET','POST'])
97
- def run_script():
98
- script=''
99
- # print(request.method)
100
- print(request)
101
- if request.method=='GET':
102
- script=request.args.get('script')
103
- print('I am in get')
104
- elif request.method=='POST':
105
- print('I am in post')
106
- data=request.get_json()
107
- if 'script' in data: script=data['script']
108
- if script=='' or script is None: return 'INVALID'
109
- os.system(script+' > ./out.txt')
110
- with open('./out.txt','r') as f: output=f.read()
111
- return output
112
-
113
  @app.route('/',methods=['GET', 'POST'])
114
  def home():
115
  return render_template('home.html',messageD=MESSAGED)
 
11
  # anvil.server.connect('PLMOIU5VCGGUOJH2XORIBWV3-ZXZVFLWX7QFIIAF4')
12
  anvil.server.connect("S3SLHUQ2BB33NVTP7FWRAOHS-NDSRD7CDALRPSPLL")
13
 
14
+ from sentence_transformers import SentenceTransformer
15
+ from sentence_transformers.util import cos_sim
16
+ # model = SentenceTransformer('thenlper/gte-large')
17
+ model = SentenceTransformer('BAAI/bge-large-en')
18
+
19
+ @anvil.server.callable
20
+ def encode(sentence = None):
21
+ vec = model.encode(sentence)
22
+ return [float(val) if isinstance(val, (int, float, np.float32)) else 0.0 for val in vec]
23
+
24
  app=Flask(__name__)
25
+ MESSAGED={'title':'API Server for ICAPP',
26
+ 'messageL':['published server functions:','encode_anvil(text)', 'encode(sentence)',
27
  'call_gemini(text,key)','call_gpt(text,key,model)',
28
  'task_id<=launch(func_name,*args)','poll(task_id)']}
29
 
 
99
  embedding=emb_array.tolist()
100
  return jsonify({'embedding': embedding})
101
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  @app.route('/',methods=['GET', 'POST'])
103
  def home():
104
  return render_template('home.html',messageD=MESSAGED)
app_gradio.py DELETED
@@ -1,49 +0,0 @@
1
- import gradio as gr
2
- from threading import Thread
3
- import time
4
- import anvil.server
5
- import os
6
- anvil.server.connect('55MH4EBKM22EP4E6D5T6CVSL-VGO5X4SM6JEXGJVT')
7
- import json
8
- import ast
9
-
10
- def run_script(scriptname):
11
- # return scriptname
12
- os.system(scriptname+' > ./out.txt')
13
- with open('./out.txt','r') as f: output=f.read()
14
- return output
15
-
16
- @anvil.server.callable
17
- def run_command(scriptname):
18
- os.system(scriptname+' > ./out.txt')
19
- with open('./out.txt','r') as f: output=f.read()
20
- return output
21
-
22
- @anvil.server.callable
23
- def get_file(filename):
24
- m = BlobMedia('text/plain', 'Hello, world!', name='hello.txt')
25
- return m
26
-
27
- gradio_interface = gr.Interface(
28
- fn=run_script,
29
- inputs="text",
30
- outputs="text",
31
- title="REST API with Gradio and Huggingface Spaces",
32
- description='''Inputs should be json of test item e.g., as a dictionary;
33
- output right now is just returning the input; later label will be returned.
34
-
35
- This is how to call the API from Python:
36
-
37
- import requests
38
-
39
- response = requests.post("https://gmshroff-gmserver.hf.space/run/predict", json={
40
- "data": [
41
- "\<put some json string here\>",
42
- ]}).json()
43
-
44
- data = response["data"])
45
-
46
- ''')
47
-
48
- gradio_interface.launch()
49
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -7,3 +7,4 @@ numpy
7
  transformers
8
  google-generativeai
9
  openai
 
 
7
  transformers
8
  google-generativeai
9
  openai
10
+ sentence-transformers
scripts/update_valdata.py DELETED
@@ -1,35 +0,0 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[ ]:
5
-
6
-
7
- from numerapi import NumerAPI
8
- import os
9
- import pandas as pd
10
- import numpy as np
11
-
12
-
13
- # In[ ]:
14
-
15
-
16
- napi = NumerAPI()
17
- data_path='./data/'
18
-
19
-
20
- # In[ ]:
21
-
22
-
23
- napi.download_dataset("v4.2/validation_int8.parquet", data_path+"validation_int8.parquet")
24
- validation_data=pd.read_parquet(data_path+"validation_int8.parquet")
25
- recent_eras=list(validation_data.loc[validation_data['data_type']=='validation']['era'].unique()[-2:])
26
- validation_subset=validation_data[validation_data['era'].isin(recent_eras)]
27
- validation_subset.to_parquet(data_path+"validation_subset_int8.parquet",index=False)
28
- # napi.download_dataset('v4.2/live_int8.parquet',data_path+'live_int8.parquet')
29
-
30
-
31
- # In[ ]:
32
-
33
-
34
- # print("Now please copy the file to server via: scp ../../data/validation_subset_int8.parquet gms@gms1:/home/gms/numerai/data/.")
35
-