Update app.py
Browse files
app.py
CHANGED
@@ -28,7 +28,7 @@ class ChatClient:
|
|
28 |
self.model = AutoModelForCausalLM.from_pretrained(model_path).to(self.device)
|
29 |
self.model.eval() # 设置为评估模式
|
30 |
|
31 |
-
|
32 |
"""
|
33 |
生成对话回复。
|
34 |
"""
|
@@ -47,17 +47,15 @@ class ChatClient:
|
|
47 |
}
|
48 |
|
49 |
# 使用生成器生成文本
|
50 |
-
output_sequences = self.model.generate(**inputs, **gen_kwargs)
|
51 |
|
52 |
# 解码生成的文本
|
53 |
# result_text = self.tokenizer.decode(output_sequences[0], skip_special_tokens=True)
|
54 |
# yield result_text
|
55 |
|
56 |
-
#
|
57 |
-
for
|
58 |
-
|
59 |
-
await anyio.sleep(0) # Yield control, simulating asynchronous operation
|
60 |
-
yield result_text
|
61 |
|
62 |
# 创建客户端实例,指定模型路径
|
63 |
model_path = 'model/v3/'
|
@@ -68,7 +66,7 @@ client = ChatClient(model_path)
|
|
68 |
|
69 |
|
70 |
|
71 |
-
|
72 |
message,
|
73 |
history: list[tuple[str, str]],
|
74 |
system_message,
|
@@ -91,21 +89,19 @@ async def respond(
|
|
91 |
|
92 |
response = ""
|
93 |
|
94 |
-
|
95 |
messages,
|
96 |
max_tokens=max_tokens,
|
97 |
stream=True,
|
98 |
temperature=temperature,
|
99 |
top_p=top_p,
|
100 |
):
|
101 |
-
|
102 |
-
|
103 |
-
#
|
104 |
-
|
105 |
-
# response += token
|
106 |
-
# yield response
|
107 |
|
108 |
-
|
|
|
109 |
|
110 |
"""
|
111 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|
|
|
28 |
self.model = AutoModelForCausalLM.from_pretrained(model_path).to(self.device)
|
29 |
self.model.eval() # 设置为评估模式
|
30 |
|
31 |
+
def chat_completion(self, messages, max_tokens, stream=False, temperature=1.0, top_p=1.0):
|
32 |
"""
|
33 |
生成对话回复。
|
34 |
"""
|
|
|
47 |
}
|
48 |
|
49 |
# 使用生成器生成文本
|
50 |
+
# output_sequences = self.model.generate(**inputs, **gen_kwargs)
|
51 |
|
52 |
# 解码生成的文本
|
53 |
# result_text = self.tokenizer.decode(output_sequences[0], skip_special_tokens=True)
|
54 |
# yield result_text
|
55 |
|
56 |
+
# claude 3.5
|
57 |
+
for token in self.model.generate(**inputs, **gen_kwargs, streamer=None):
|
58 |
+
yield self.tokenizer.decode(token, skip_special_tokens=True)
|
|
|
|
|
59 |
|
60 |
# 创建客户端实例,指定模型路径
|
61 |
model_path = 'model/v3/'
|
|
|
66 |
|
67 |
|
68 |
|
69 |
+
def respond(
|
70 |
message,
|
71 |
history: list[tuple[str, str]],
|
72 |
system_message,
|
|
|
89 |
|
90 |
response = ""
|
91 |
|
92 |
+
for message in client.chat_completion(
|
93 |
messages,
|
94 |
max_tokens=max_tokens,
|
95 |
stream=True,
|
96 |
temperature=temperature,
|
97 |
top_p=top_p,
|
98 |
):
|
99 |
+
print(message)
|
100 |
+
token = message
|
101 |
+
#token = message.choices[0].delta.content
|
|
|
|
|
|
|
102 |
|
103 |
+
response += token
|
104 |
+
yield response
|
105 |
|
106 |
"""
|
107 |
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
|