File size: 3,297 Bytes
c2f8097
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
from flask import Flask, request, Response, jsonify
import requests
import json
import random
from helper import create_jwt, github_username_zed_userid_list

app = Flask(__name__)

@app.route('/hf/v1/chat/completions', methods=['POST'])
def chat():
    # Get the payload from the request
    payload = request.json

    # Get the model from the payload, defaulting to "claude-3-5-sonnet-20240620"
    model = payload.get('model', 'claude-3-5-sonnet-20240620')

    # Prepare the request for the LLM API
    url = "https://llm.zed.dev/completion?"
    
    llm_payload = {
        "provider": "anthropic",
        "model": model,
        "provider_request": {
            "model": model,
            "max_tokens": payload.get('max_tokens', 8192),
            "temperature": payload.get('temperature', 0),
            "top_p": payload.get('top_p', 0.7),
            "messages": payload['messages'],
            "stream": payload.get('stream', False),
            "system": payload.get('system', "")
        }
    }

    github_username, zed_user_id = random.choice(github_username_zed_userid_list)
    jwt = create_jwt(github_username, zed_user_id)

    headers = {
        'Host': 'llm.zed.dev',
        'accept': '*/*',
        'content-type': 'application/json',
        'authorization': f'Bearer {jwt}',
        'user-agent': 'Zed/0.149.3 (macos; aarch64)'
    }

    # Get proxy from environment variable
    proxy = os.environ.get('HTTP_PROXY', None)
    proxies = {'http': proxy, 'https': proxy} if proxy else None

    def generate():
        with requests.post(url, headers=headers, json=llm_payload, stream=True, proxies=proxies) as response:
            for chunk in response.iter_content(chunk_size=1024):
                if chunk:
                    # Parse the chunk and format it as per OpenAI's streaming format
                    try:
                        data = json.loads(chunk.decode('utf-8'))
                        content = data.get('completion', '')
                        yield f"data: {json.dumps({'choices': [{'delta': {'content': content}}]})}\n\n"
                    except json.JSONDecodeError:
                        continue
            yield "data: [DONE]\n\n"

    if payload.get('stream', False):
        return Response(generate(), content_type='text/event-stream')
    else:
        with requests.post(url, headers=headers, json=llm_payload, proxies=proxies) as response:
            data = response.json()
            return jsonify({
                "id": "chatcmpl-" + os.urandom(12).hex(),
                "object": "chat.completion",
                "created": int(time.time()),
                "model": model,
                "choices": [{
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": data.get('completion', '')
                    },
                    "finish_reason": "stop"
                }],
                "usage": {
                    "prompt_tokens": -1,  # We don't have this information
                    "completion_tokens": -1,  # We don't have this information
                    "total_tokens": -1  # We don't have this information
                }
            })

if __name__ == '__main__':
    app.run(debug=True)