Spaces:
CMLL
/
Running on Zero

CMLL commited on
Commit
b1d449b
1 Parent(s): 0464b4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -33
app.py CHANGED
@@ -1,11 +1,15 @@
 
1
  from transformers import AutoModelForCausalLM, AutoTokenizer
2
  import torch
3
  import gradio as gr
 
4
 
5
- # Set the device
6
- device = "cpu" # replace with your device: "cpu", "cuda", "mps"
 
 
7
 
8
- # Initialize model and tokenizer
9
  peft_model_id = "CMLM/ZhongJing-2-1_8b"
10
  base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
11
  model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
@@ -17,47 +21,85 @@ tokenizer = AutoTokenizer.from_pretrained(
17
  pad_token=''
18
  )
19
 
20
- def get_model_response(question):
21
- # Create the prompt without context
 
22
  prompt = f"Question: {question}"
23
  messages = [
24
  {"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."},
25
  {"role": "user", "content": prompt}
26
  ]
27
-
28
- # Prepare the input
29
- text = tokenizer.apply_chat_template(
30
- messages,
31
- tokenize=False,
32
- add_generation_prompt=True
33
- )
34
- model_inputs = tokenizer([text], return_tensors="pt").to(device)
35
-
36
- # Generate the response
37
- generated_ids = model.generate(
38
- model_inputs.input_ids,
39
- max_new_tokens=512
40
- )
41
- generated_ids = [
42
- output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
43
- ]
44
-
45
- # Decode the response
46
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
47
  return response
48
 
49
- # Define a Gradio interface without the context parameter
50
- def chat_interface(question):
51
- response = get_model_response(question)
52
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
- iface = gr.Interface(
55
- fn=chat_interface,
 
56
  inputs=["text"],
57
  outputs="text",
58
- title="仲景GPT-V2-1.8B",
59
  description="博极医源,精勤不倦。Unlocking the Wisdom of Traditional Chinese Medicine with AI."
60
  )
61
 
62
- # Launch the interface with sharing enabled
63
- iface.launch(share=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import spaces
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
  import gradio as gr
5
+ import os
6
 
7
+ os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
8
+ USE_CUDA = torch.cuda.is_available()
9
+ device_ids_parallel = [0]
10
+ device = torch.device("cuda:{}".format(device_ids_parallel[0]) if USE_CUDA else "cpu")
11
 
12
+ # 初始化
13
  peft_model_id = "CMLM/ZhongJing-2-1_8b"
14
  base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
15
  model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
 
21
  pad_token=''
22
  )
23
 
24
+ #单轮
25
+ @spaces.GPU
26
+ def single_turn_chat(question):
27
  prompt = f"Question: {question}"
28
  messages = [
29
  {"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."},
30
  {"role": "user", "content": prompt}
31
  ]
32
+ input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
33
+ model_inputs = tokenizer([input], return_tensors="pt").to(device)
34
+ generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
35
+ generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
37
  return response
38
 
39
+ #多轮
40
+ @spaces.GPU
41
+ def multi_turn_chat(question, chat_history=None):
42
+ if not isinstance(question, str):
43
+ raise ValueError("The question must be a string.")
44
+
45
+ if chat_history is None or chat_history == []:
46
+ chat_history = [{"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."}]
47
+
48
+ chat_history.append({"role": "user", "content": question})
49
+
50
+ # Apply the chat template and prepare the input
51
+ inputs = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
52
+ model_inputs = tokenizer([inputs], return_tensors="pt").to(device)
53
+
54
+ try:
55
+ # Generate the response from the model
56
+ outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
57
+ generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
58
+ response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
59
+ except Exception as e:
60
+ raise RuntimeError("Error in model generation: " + str(e))
61
+
62
+ # Append the assistant's response to the chat history
63
+ chat_history.append({"role": "assistant", "content": response})
64
+
65
+ # Format the chat history for output
66
+ tempass = ""
67
+ tempuser = ""
68
+ formatted_history = []
69
+ for entry in chat_history:
70
+ if entry['role'] == 'user':
71
+ tempuser = entry['content']
72
+ elif entry['role'] == 'assistant':
73
+ tempass = entry['content']
74
+ temp = (tempuser, tempass)
75
+ formatted_history.append(temp)
76
+
77
+ return formatted_history, chat_history
78
+
79
+ def clear_history():
80
+ return [], []
81
 
82
+ # 单轮界面
83
+ single_turn_interface = gr.Interface(
84
+ fn=single_turn_chat,
85
  inputs=["text"],
86
  outputs="text",
87
+ title="仲景GPT-V2-1.8B 单轮对话",
88
  description="博极医源,精勤不倦。Unlocking the Wisdom of Traditional Chinese Medicine with AI."
89
  )
90
 
91
+ # 多轮界面
92
+ with gr.Blocks() as multi_turn_interface:
93
+ chatbot = gr.Chatbot(label="仲景GPT-V2-1.8B 多轮对话")
94
+ state = gr.State([])
95
+ with gr.Row():
96
+ with gr.Column(scale=6):
97
+ user_input = gr.Textbox(label="输入", placeholder="输入你的问题")
98
+ with gr.Column(scale=6):
99
+ submit_button = gr.Button("发送")
100
+
101
+ submit_button.click(multi_turn_chat, [user_input, state], [chatbot, state])
102
+ user_input.submit(multi_turn_chat, [user_input, state], [chatbot, state])
103
+
104
+ single_turn_interface.launch()
105
+ multi_turn_interface.launch()