File size: 3,747 Bytes
e100b79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce0c73c
e100b79
ce0c73c
 
e100b79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ce0c73c
 
 
e100b79
 
ecaf86a
e100b79
 
 
 
ce0c73c
e100b79
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import time
import random
import asyncio
import requests
from fastapi import FastAPI, HTTPException, Request
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import List, Optional, Union

app = FastAPI()

class ChatCompletionMessage(BaseModel):
    role: str
    content: str

class ChatCompletionRequest(BaseModel):
    model: str
    messages: List[ChatCompletionMessage]
    temperature: Optional[float] = 1.0
    max_tokens: Optional[int] = None
    stream: Optional[bool] = False

class ChatCompletionResponse(BaseModel):
    id: str
    object: str
    created: int
    model: str
    choices: List[dict]
    usage: dict

def generate_random_ip():
    return f"{random.randint(1,255)}.{random.randint(0,255)}.{random.randint(0,255)}.{random.randint(0,255)}"

async def fetch_response(messages: List[ChatCompletionMessage], model: str):
    your_api_url = "https://chatpro.ai-pro.org/api/ask/openAI"
    headers = {
        "content-type": "application/json",
        "X-Forwarded-For": generate_random_ip(),
        "origin": "https://chatpro.ai-pro.org",
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
    }
    
    # 将消息列表转换为单个字符串,保留对话历史
    conversation = "\n".join([f"{msg.role}: {msg.content}" for msg in messages])
    
    # 添加指导语
    conversation += "\n请关注并回复user最近的消息并避免总结对话历史的回答"
    
    data = {
        "text": conversation,
        "endpoint": "openAI",
        "model": model
    }

    response = requests.post(your_api_url, headers=headers, json=data)

    if response.status_code != 200:
        raise HTTPException(status_code=response.status_code, detail="Error from upstream API")

    return response.json()

async def stream_response(content: str):
    # Send the entire content as a single chunk
    yield f"data: {{'id': 'chatcmpl-{os.urandom(12).hex()}', 'object': 'chat.completion.chunk', 'created': 1677652288, 'model': 'gpt-3.5-turbo-0613', 'choices': [{'index': 0, 'delta': {{'content': '{content}'}}, 'finish_reason': None}]}}\n\n"
    yield f"data: {{'id': 'chatcmpl-{os.urandom(12).hex()}', 'object': 'chat.completion.chunk', 'created': 1677652288, 'model': 'gpt-3.5-turbo-0613', 'choices': [{'index': 0, 'delta': {{}}, 'finish_reason': 'stop'}]}}\n\n"
    yield 'data: [DONE]\n\n'

@app.post("/hf/v1/chat/completions")
async def chat_completions(request: Request):
    body = await request.json()
    chat_request = ChatCompletionRequest(**body)

    # 传递整个消息历史到API
    api_response = await fetch_response(chat_request.messages, chat_request.model)

    content = api_response.get("response", "")

    if chat_request.stream:
        return StreamingResponse(stream_response(content), media_type="text/event-stream")
    else:
        openai_response = ChatCompletionResponse(
            id="chatcmpl-" + os.urandom(12).hex(),
            object="chat.completion",
            created=int(time.time()),
            model=chat_request.model,
            choices=[
                {
                    "index": 0,
                    "message": {
                        "role": "assistant",
                        "content": content
                    },
                    "finish_reason": "stop"
                }
            ],
            usage={
                "prompt_tokens": sum(len(msg.content) for msg in chat_request.messages),
                "completion_tokens": len(content),
                "total_tokens": sum(len(msg.content) for msg in chat_request.messages) + len(content)
            }
        )
        return openai_response