import json import sseclient import requests from flask import Flask, request, Response, stream_with_context import random app = Flask(__name__) def generate_random_ip(): return f"{random.randint(1,255)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(0,255)}" def format_openai_response(content, finish_reason=None): return { "id": "chatcmpl-123", "object": "chat.completion.chunk", "created": 1677652288, "model": "gpt-4o", "choices": [{ "delta": {"content": content} if content else {"finish_reason": finish_reason}, "index": 0, "finish_reason": finish_reason }] } @app.route('/hf/v1/chat/completions', methods=['POST']) def chat_completions(): data = request.json messages = data.get('messages', []) stream = data.get('stream', False) if not messages: return {"error": "No messages provided"}, 400 # 将消息列表转换为单个字符串,保留对话历史 conversation = "\n".join([f"{msg['role']}: {msg['content']}" for msg in messages]) # 添加指导语 conversation += "\n请关注并回复user最近的消息并避免总结对话历史的回答" model = data.get('model', 'gpt-4o') original_api_url = 'https://chatpro.ai-pro.org/api/ask/openAI' headers = { 'content-type': 'application/json', 'X-Forwarded-For': generate_random_ip(), 'origin': 'https://chatpro.ai-pro.org', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36' } payload = { "text": conversation, "endpoint": "openAI", "model": model } def generate(): last_content = "" response = requests.post(original_api_url, headers=headers, json=payload, stream=True) client = sseclient.SSEClient(response) for event in client.events(): if event.data.startswith('{"text":'): data = json.loads(event.data) new_content = data['text'][len(last_content):] last_content = data['text'] if new_content: yield f"data: {json.dumps(format_openai_response(new_content))}\n\n" elif '"final":true' in event.data: yield f"data: {json.dumps(format_openai_response('', 'stop'))}\n\n" yield "data: [DONE]\n\n" break if stream: return Response(stream_with_context(generate()), content_type='text/event-stream') else: full_response = "" for chunk in generate(): if chunk.startswith("data: ") and not chunk.strip() == "data: [DONE]": response_data = json.loads(chunk[6:]) if 'choices' in response_data and response_data['choices']: delta = response_data['choices'][0].get('delta', {}) if 'content' in delta: full_response += delta['content'] return { "id": "chatcmpl-123", "object": "chat.completion", "created": 1677652288, "model": model, "choices": [{ "index": 0, "message": { "role": "assistant", "content": full_response }, "finish_reason": "stop" }], "usage": { "prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0 } } if __name__ == '__main__': app.run(debug=True, port=5000)