Spaces:

smgc
/

pplx2api

Running

App Files Files Community

smgc commited on 5 days ago

Commit

1b7f3dd

•

1 Parent(s): 546eb40

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -38

app.py CHANGED Viewed

@@ -7,32 +7,7 @@ import socketio
 import requests
 import logging
 from threading import Event
-import tiktoken  # 引入 tiktoken 库
-from tiktoken import Encoding
-def local_encoding_for_model(model_name: str):
-    """
-    从本地加载编码文件并返回一个 Encoding 对象。
-    """
-    local_encoding_path = '/app/cl100k_base.tiktoken'
-    if os.path.exists(local_encoding_path):
-        with open(local_encoding_path, 'rb') as f:
-            encoding_data = f.read()  # 读取本地编码文件的字节内容
-        # 构造一个 Encoding 对象
-        return Encoding(
-            name="cl100k_base",  # 编码的名称
-            pat_str="",  # 正则表达式（如果有）
-            mergeable_ranks={},  # 合并的 rank 数据（通常是从文件或其他地方加载）
-            special_tokens={},  # 特殊 token 映射
-            explicit_n_vocab=None  # 可选的词汇表大小
-        )
-    else:
-        raise FileNotFoundError(f"Local encoding file not found at {local_encoding_path}")
-# 替换 tiktoken 的 encoding_for_model 函数
-tiktoken.encoding_for_model = local_encoding_for_model
 app = Flask(__name__)
 logging.basicConfig(level=logging.INFO)
@@ -100,14 +75,20 @@ def normalize_content(content):
         # 如果是其他类型，返回空字符串
         return ""
-def calculate_tokens_via_tiktoken(text, model="gpt-3.5-turbo"):
     """
-    使用 tiktoken 库根据 GPT 模型计算 token 数量。
-    Claude 模型与 GPT 模型的 token 计算机制类似，因此可以使用 tiktoken。
     """
-    encoding = tiktoken.encoding_for_model(model)  # 获取模型的编码器
-    tokens = encoding.encode(text)  # 对文本进行 tokenization
-    return len(tokens)
 @app.route('/')
 def root():
@@ -145,8 +126,8 @@ def messages():
         # 使用 normalize_content 递归处理 msg['content']
         previous_messages = "\n\n".join([normalize_content(msg['content']) for msg in json_body['messages']])
-        # 动态计算输入的 token 数量，使用 tiktoken 进行 tokenization
-        input_tokens = calculate_tokens_via_tiktoken(previous_messages, model="gpt-3.5-turbo")
         msg_id = str(uuid.uuid4())
         response_event = Event()
@@ -248,8 +229,8 @@ def messages():
                 if sio.connected:
                     sio.disconnect()
-            # 动态计算输出的 token 数量，使用 tiktoken 进行 tokenization
-            output_tokens = calculate_tokens_via_tiktoken(''.join(response_text), model="gpt-3.5-turbo")
             yield create_event("content_block_stop", {"type": "content_block_stop", "index": 0})
             yield create_event("message_delta", {
@@ -323,8 +304,8 @@ def handle_non_stream(previous_messages, msg_id, model, input_tokens):
         # 等待响应完成
         response_event.wait(timeout=30)
-        # 动态计算输出的 token 数量，使用 tiktoken 进行 tokenization
-        output_tokens = calculate_tokens_via_tiktoken(''.join(response_text), model="gpt-3.5-turbo")
         # 生成完整的响应
         full_response = {

 import requests
 import logging
 from threading import Event
+import re
 app = Flask(__name__)
 logging.basicConfig(level=logging.INFO)
         # 如果是其他类型，返回空字符串
         return ""
+def calculate_tokens(text):
     """
+    改进的 token 计算方法。
+    - 对于英文和有空格的文本，使用空格分词。
+    - 对于中文等没有空格的文本，使用字符级分词。
     """
+    # 首先判断文本是否包含大量非 ASCII 字符（如中文）
+    if re.search(r'[^\x00-\x7F]', text):
+        # 如果包含非 ASCII 字符，使用字符级分词
+        return len(text)
+    else:
+        # 否则使用空格分词
+        tokens = text.split()
+        return len(tokens)
 @app.route('/')
 def root():
         # 使用 normalize_content 递归处理 msg['content']
         previous_messages = "\n\n".join([normalize_content(msg['content']) for msg in json_body['messages']])
+        # 动态计算输入的 token 数量
+        input_tokens = calculate_tokens(previous_messages)
         msg_id = str(uuid.uuid4())
         response_event = Event()
                 if sio.connected:
                     sio.disconnect()
+            # 动态计算输出的 token 数量
+            output_tokens = calculate_tokens(''.join(response_text))
             yield create_event("content_block_stop", {"type": "content_block_stop", "index": 0})
             yield create_event("message_delta", {
         # 等待响应完成
         response_event.wait(timeout=30)
+        # 动态计算输出的 token 数量
+        output_tokens = calculate_tokens(''.join(response_text))
         # 生成完整的响应
         full_response = {