Spaces:
CMLL
/
Running on Zero

CMLL commited on
Commit
7161b69
1 Parent(s): d6087e3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -48
app.py CHANGED
@@ -2,12 +2,10 @@ import spaces
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import gradio as gr
5
- import os
6
 
7
- os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
8
  USE_CUDA = torch.cuda.is_available()
9
- device_ids_parallel = [0]
10
- device = torch.device("cuda:{}".format(device_ids_parallel[0]) if USE_CUDA else "cpu")
11
 
12
  # 初始化
13
  peft_model_id = "CMLM/ZhongJing-2-1_8b"
@@ -16,12 +14,25 @@ model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
16
  model.load_adapter(peft_model_id)
17
  tokenizer = AutoTokenizer.from_pretrained(
18
  "CMLM/ZhongJing-2-1_8b",
19
- padding_side="right",
20
  trust_remote_code=True,
21
  pad_token=''
22
  )
23
 
24
- #多轮
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  @spaces.GPU
26
  def multi_turn_chat(question, chat_history=None):
27
  if not isinstance(question, str):
@@ -32,51 +43,23 @@ def multi_turn_chat(question, chat_history=None):
32
 
33
  chat_history.append({"role": "user", "content": question})
34
 
35
- # Apply the chat template and prepare the input
36
  inputs = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
37
  model_inputs = tokenizer([inputs], return_tensors="pt").to(device)
38
-
39
- try:
40
- # Generate the response from the model
41
- outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
42
- generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
43
- response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
44
- except Exception as e:
45
- raise RuntimeError("Error in model generation: " + str(e))
46
-
47
- # Append the assistant's response to the chat history
48
- chat_history.append({"role": "assistant", "content": response})
49
 
50
- # Format the chat history for output
51
- tempass = ""
52
- tempuser = ""
53
- formatted_history = []
54
- for entry in chat_history:
55
- if entry['role'] == 'user':
56
- tempuser = entry['content']
57
- elif entry['role'] == 'assistant':
58
- tempass = entry['content']
59
- temp = tempuser,tempass
60
- formatted_history.append(temp)
61
 
62
- return formatted_history, chat_history
63
-
64
-
65
- def clear_history():
66
- return [], []
67
 
68
- # 多轮界面
69
- with gr.Blocks() as multi_turn_interface:
70
- chatbot = gr.Chatbot(label="仲景GPT-V2-1.8B 多轮对话")
71
- state = gr.State([])
72
- with gr.Row():
73
- with gr.Column(scale=6):
74
- user_input = gr.Textbox(label="输入",placeholder="输入你的问题")
75
- with gr.Column(scale=1):
76
- submit_btn = gr.Button("提交")
77
- clear_history_btn = gr.Button("清除历史对话")
78
- submit_btn.click(multi_turn_chat, [user_input, state], [chatbot, state], concurrency_limit=10)
79
- clear_history_btn.click(fn=clear_history, inputs=None, outputs=[chatbot, state], queue=False)
80
- user_input.submit(multi_turn_chat, [user_input, state], [chatbot, state], concurrency_limit=10)
81
 
82
- multi_turn_interface.launch(debug=True, server_name='0.0.0.0', server_port=6006, max_threads=200)
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import gradio as gr
 
5
 
6
+ # ZeroGPU 环境会自动管理 GPU 分配,因此我们不设置 CUDA_VISIBLE_DEVICES
7
  USE_CUDA = torch.cuda.is_available()
8
+ device = torch.device("cuda:0" if USE_CUDA else "cpu")
 
9
 
10
  # 初始化
11
  peft_model_id = "CMLM/ZhongJing-2-1_8b"
 
14
  model.load_adapter(peft_model_id)
15
  tokenizer = AutoTokenizer.from_pretrained(
16
  "CMLM/ZhongJing-2-1_8b",
17
+ padding_side="right",
18
  trust_remote_code=True,
19
  pad_token=''
20
  )
21
 
22
+ @spaces.GPU
23
+ def single_turn_chat(question):
24
+ prompt = f"Question: {question}"
25
+ messages = [
26
+ {"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."},
27
+ {"role": "user", "content": prompt}
28
+ ]
29
+ input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
30
+ model_inputs = tokenizer([input], return_tensors="pt").to(device)
31
+ generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
32
+ generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
33
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
34
+ return response
35
+
36
  @spaces.GPU
37
  def multi_turn_chat(question, chat_history=None):
38
  if not isinstance(question, str):
 
43
 
44
  chat_history.append({"role": "user", "content": question})
45
 
 
46
  inputs = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
47
  model_inputs = tokenizer([inputs], return_tensors="pt").to(device)
 
 
 
 
 
 
 
 
 
 
 
48
 
49
+ outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
50
+ generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
51
+ response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
52
 
53
+ chat_history.append({"role": "assistant", "content": response})
54
+ return chat_history
 
 
 
55
 
56
+ # 单轮界面
57
+ single_turn_interface = gr.Interface(
58
+ fn=single_turn_chat,
59
+ inputs=["text"],
60
+ outputs="text",
61
+ title="仲景GPT-V2-1.8B 单轮对话",
62
+ description="Unlocking the Wisdom of Traditional Chinese Medicine with AI."
63
+ )
 
 
 
 
 
64
 
65
+ # 多轮界面配置与之前保持一致