import os import time import random import json import asyncio import requests from fastapi import FastAPI, HTTPException, Request from fastapi.responses import StreamingResponse from pydantic import BaseModel from typing import List, Optional, Union app = FastAPI() class ChatCompletionMessage(BaseModel): role: str content: str class ChatCompletionRequest(BaseModel): model: str messages: List[ChatCompletionMessage] temperature: Optional[float] = 1.0 max_tokens: Optional[int] = None stream: Optional[bool] = False class ChatCompletionResponse(BaseModel): id: str object: str created: int model: str choices: List[dict] usage: dict def generate_random_ip(): return f"{random.randint(1,255)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(0,255)}" async def fetch_response(messages: List[ChatCompletionMessage], model: str): your_api_url = "https://chatpro.ai-pro.org/api/ask/openAI" headers = { "content-type": "application/json", "X-Forwarded-For": generate_random_ip(), "origin": "https://chatpro.ai-pro.org", "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36" } conversation = "\n".join([f"{msg.role}: {msg.content}" for msg in messages]) conversation += "\n请关注并回复user最近的消息并避免总结对话历史的回答" data = { "text": conversation, "endpoint": "openAI", "model": model } response = requests.post(your_api_url, headers=headers, json=data) if response.status_code != 200: raise HTTPException(status_code=response.status_code, detail="Error from upstream API") return response.json() async def stream_response(content: str): chunk_size = len(content) # 将整个内容作为一个块发送 chat_id = f"chatcmpl-{os.urandom(12).hex()}" # 发送开始的块 yield f"data: {json.dumps({ 'id': chat_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': 'gpt-3.5-turbo-0613', 'choices': [{ 'index': 0, 'delta': { 'content': content }, 'finish_reason': None }] })}\n\n" # 发送结束的块 yield f"data: {json.dumps({ 'id': chat_id, 'object': 'chat.completion.chunk', 'created': int(time.time()), 'model': 'gpt-3.5-turbo-0613', 'choices': [{ 'index': 0, 'delta': {}, 'finish_reason': 'stop' }] })}\n\n" yield 'data: [DONE]\n\n' @app.post("/hf/v1/chat/completions") async def chat_completions(request: Request): body = await request.json() chat_request = ChatCompletionRequest(**body) api_response = await fetch_response(chat_request.messages, chat_request.model) content = api_response.get("response", "") if chat_request.stream: return StreamingResponse(stream_response(content), media_type="text/event-stream") else: openai_response = ChatCompletionResponse( id="chatcmpl-" + os.urandom(12).hex(), object="chat.completion", created=int(time.time()), model=chat_request.model, choices=[ { "index": 0, "message": { "role": "assistant", "content": content }, "finish_reason": "stop" } ], usage={ "prompt_tokens": sum(len(msg.content) for msg in chat_request.messages), "completion_tokens": len(content), "total_tokens": sum(len(msg.content) for msg in chat_request.messages) + len(content) } ) return openai_response