CMLL commited on
Commit
d4c9a92
1 Parent(s): 4fbe483

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -53
app.py CHANGED
@@ -1,16 +1,18 @@
1
- import spaces
 
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
- import gradio as gr
5
 
6
- # ZeroGPU 环境会自动管理 GPU 分配,因此我们不设置 CUDA_VISIBLE_DEVICES
7
- USE_CUDA = torch.cuda.is_available()
8
- device = torch.device("cuda:0" if USE_CUDA else "cpu")
9
 
10
- # 初始化
 
 
 
11
  peft_model_id = "CMLM/ZhongJing-2-1_8b"
12
  base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
13
- model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
14
  model.load_adapter(peft_model_id)
15
  tokenizer = AutoTokenizer.from_pretrained(
16
  "CMLM/ZhongJing-2-1_8b",
@@ -19,57 +21,44 @@ tokenizer = AutoTokenizer.from_pretrained(
19
  pad_token=''
20
  )
21
 
22
- @spaces.GPU
23
- def single_turn_chat(question):
24
- try:
25
- prompt = f"Question: {question}"
26
- messages = [
27
- {"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."},
28
- {"role": "user", "content": prompt}
29
- ]
30
- input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
31
- model_inputs = tokenizer([input], return_tensors="pt").to(device)
32
- print("Debug: Model inputs prepared successfully.")
33
-
34
- generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
35
- print("Debug: Model generation completed successfully.")
36
-
37
- generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
38
- response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
39
- return response
40
- except Exception as e:
41
- print(f"Error during model invocation: {str(e)}")
42
- raise
43
 
 
 
 
 
 
 
 
44
 
 
 
 
 
 
 
 
 
45
 
46
- @spaces.GPU
47
- def multi_turn_chat(question, chat_history=None):
48
- if not isinstance(question, str):
49
- raise ValueError("The question must be a string.")
50
-
51
- if chat_history is None or chat_history == []:
52
- chat_history = [{"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."}]
53
-
54
- chat_history.append({"role": "user", "content": question})
55
-
56
- inputs = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
57
- model_inputs = tokenizer([inputs], return_tensors="pt").to(device)
58
-
59
- outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
60
- generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
61
- response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
62
-
63
- chat_history.append({"role": "assistant", "content": response})
64
- return chat_history
65
 
66
- # 单轮界面
67
- single_turn_interface = gr.Interface(
68
- fn=single_turn_chat,
69
  inputs=["text"],
70
  outputs="text",
71
- title="仲景GPT-V2-1.8B 单轮对话",
72
- description="Unlocking the Wisdom of Traditional Chinese Medicine with AI."
73
  )
74
 
75
- # 多轮界面配置与之前保持一致
 
 
 
1
+ import spaces # Import spaces at the top
2
+ import gradio as gr
3
  from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import torch
 
5
 
6
+ # Import the GPU decorator
7
+ from spaces import GPU
 
8
 
9
+ # Set the device to use GPU
10
+ device = "cuda" # Use CUDA for GPU
11
+
12
+ # Initialize model and tokenizer
13
  peft_model_id = "CMLM/ZhongJing-2-1_8b"
14
  base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
15
+ model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map={"cuda": 0})
16
  model.load_adapter(peft_model_id)
17
  tokenizer = AutoTokenizer.from_pretrained(
18
  "CMLM/ZhongJing-2-1_8b",
 
21
  pad_token=''
22
  )
23
 
24
+ @GPU(duration=120) # Decorate with GPU usage and specify the duration
25
+ def get_model_response(question):
26
+ # Create the prompt without context
27
+ prompt = f"Question: {question}"
28
+ messages = [
29
+ {"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."},
30
+ {"role": "user", "content": prompt}
31
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
32
 
33
+ # Prepare the input
34
+ text = tokenizer.apply_chat_template(
35
+ messages,
36
+ tokenize=False,
37
+ add_generation_prompt=True
38
+ )
39
+ model_inputs = tokenizer([text], return_tensors="pt").to(device)
40
 
41
+ # Generate the response
42
+ generated_ids = model.generate(
43
+ model_inputs.input_ids,
44
+ max_new_tokens=512
45
+ )
46
+ generated_ids = [
47
+ output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
48
+ ]
49
 
50
+ # Decode the response
51
+ response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
52
+ return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ iface = gr.Interface(
55
+ fn=get_model_response, # Directly use the decorated function
 
56
  inputs=["text"],
57
  outputs="text",
58
+ title="仲景GPT-V2-1.8B",
59
+ description="博极医源,精勤不倦。Unlocking the Wisdom of Traditional Chinese Medicine with AI."
60
  )
61
 
62
+ # Launch the interface with sharing enabled
63
+ iface.launch(share=True)
64
+