tianlong12 commited on
Commit
c2f8097
1 Parent(s): c61a34f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +89 -0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from flask import Flask, request, Response, jsonify
3
+ import requests
4
+ import json
5
+ import random
6
+ from helper import create_jwt, github_username_zed_userid_list
7
+
8
+ app = Flask(__name__)
9
+
10
+ @app.route('/hf/v1/chat/completions', methods=['POST'])
11
+ def chat():
12
+ # Get the payload from the request
13
+ payload = request.json
14
+
15
+ # Get the model from the payload, defaulting to "claude-3-5-sonnet-20240620"
16
+ model = payload.get('model', 'claude-3-5-sonnet-20240620')
17
+
18
+ # Prepare the request for the LLM API
19
+ url = "https://llm.zed.dev/completion?"
20
+
21
+ llm_payload = {
22
+ "provider": "anthropic",
23
+ "model": model,
24
+ "provider_request": {
25
+ "model": model,
26
+ "max_tokens": payload.get('max_tokens', 8192),
27
+ "temperature": payload.get('temperature', 0),
28
+ "top_p": payload.get('top_p', 0.7),
29
+ "messages": payload['messages'],
30
+ "stream": payload.get('stream', False),
31
+ "system": payload.get('system', "")
32
+ }
33
+ }
34
+
35
+ github_username, zed_user_id = random.choice(github_username_zed_userid_list)
36
+ jwt = create_jwt(github_username, zed_user_id)
37
+
38
+ headers = {
39
+ 'Host': 'llm.zed.dev',
40
+ 'accept': '*/*',
41
+ 'content-type': 'application/json',
42
+ 'authorization': f'Bearer {jwt}',
43
+ 'user-agent': 'Zed/0.149.3 (macos; aarch64)'
44
+ }
45
+
46
+ # Get proxy from environment variable
47
+ proxy = os.environ.get('HTTP_PROXY', None)
48
+ proxies = {'http': proxy, 'https': proxy} if proxy else None
49
+
50
+ def generate():
51
+ with requests.post(url, headers=headers, json=llm_payload, stream=True, proxies=proxies) as response:
52
+ for chunk in response.iter_content(chunk_size=1024):
53
+ if chunk:
54
+ # Parse the chunk and format it as per OpenAI's streaming format
55
+ try:
56
+ data = json.loads(chunk.decode('utf-8'))
57
+ content = data.get('completion', '')
58
+ yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
59
+ except json.JSONDecodeError:
60
+ continue
61
+ yield "data: [DONE]\n\n"
62
+
63
+ if payload.get('stream', False):
64
+ return Response(generate(), content_type='text/event-stream')
65
+ else:
66
+ with requests.post(url, headers=headers, json=llm_payload, proxies=proxies) as response:
67
+ data = response.json()
68
+ return jsonify({
69
+ "id": "chatcmpl-" + os.urandom(12).hex(),
70
+ "object": "chat.completion",
71
+ "created": int(time.time()),
72
+ "model": model,
73
+ "choices": [{
74
+ "index": 0,
75
+ "message": {
76
+ "role": "assistant",
77
+ "content": data.get('completion', '')
78
+ },
79
+ "finish_reason": "stop"
80
+ }],
81
+ "usage": {
82
+ "prompt_tokens": -1, # We don't have this information
83
+ "completion_tokens": -1, # We don't have this information
84
+ "total_tokens": -1 # We don't have this information
85
+ }
86
+ })
87
+
88
+ if __name__ == '__main__':
89
+ app.run(debug=True)