smgc commited on
Commit
546eb40
1 Parent(s): 9e02e2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -1
app.py CHANGED
@@ -8,17 +8,32 @@ import requests
8
  import logging
9
  from threading import Event
10
  import tiktoken # 引入 tiktoken 库
 
11
 
12
  def local_encoding_for_model(model_name: str):
 
 
 
13
  local_encoding_path = '/app/cl100k_base.tiktoken'
14
  if os.path.exists(local_encoding_path):
15
  with open(local_encoding_path, 'rb') as f:
16
- return f.read() # 返回本地编码文件的内容
 
 
 
 
 
 
 
 
 
17
  else:
18
  raise FileNotFoundError(f"Local encoding file not found at {local_encoding_path}")
19
 
 
20
  tiktoken.encoding_for_model = local_encoding_for_model
21
 
 
22
  app = Flask(__name__)
23
  logging.basicConfig(level=logging.INFO)
24
 
 
8
  import logging
9
  from threading import Event
10
  import tiktoken # 引入 tiktoken 库
11
+ from tiktoken import Encoding
12
 
13
  def local_encoding_for_model(model_name: str):
14
+ """
15
+ 从本地加载编码文件并返回一个 Encoding 对象。
16
+ """
17
  local_encoding_path = '/app/cl100k_base.tiktoken'
18
  if os.path.exists(local_encoding_path):
19
  with open(local_encoding_path, 'rb') as f:
20
+ encoding_data = f.read() # 读取本地编码文件的字节内容
21
+
22
+ # 构造一个 Encoding 对象
23
+ return Encoding(
24
+ name="cl100k_base", # 编码的名称
25
+ pat_str="", # 正则表达式(如果有)
26
+ mergeable_ranks={}, # 合并的 rank 数据(通常是从文件或其他地方加载)
27
+ special_tokens={}, # 特殊 token 映射
28
+ explicit_n_vocab=None # 可选的词汇表大小
29
+ )
30
  else:
31
  raise FileNotFoundError(f"Local encoding file not found at {local_encoding_path}")
32
 
33
+ # 替换 tiktoken 的 encoding_for_model 函数
34
  tiktoken.encoding_for_model = local_encoding_for_model
35
 
36
+
37
  app = Flask(__name__)
38
  logging.basicConfig(level=logging.INFO)
39