Spaces:

CMLL
/

ZhongJingGPT-V2-1_8b-Test

Sleeping

App Files Files Community

CMLL commited on Jun 18

Commit

d4c9a92

•

1 Parent(s): 4fbe483

Update app.py

Browse files

Files changed (1) hide show

app.py +42 -53

app.py CHANGED Viewed

@@ -1,16 +1,18 @@
-import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
-import gradio as gr
-# ZeroGPU 环境会自动管理 GPU 分配，因此我们不设置 CUDA_VISIBLE_DEVICES
-USE_CUDA = torch.cuda.is_available()
-device = torch.device("cuda:0" if USE_CUDA else "cpu")
-# 初始化
 peft_model_id = "CMLM/ZhongJing-2-1_8b"
 base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
-model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
 model.load_adapter(peft_model_id)
 tokenizer = AutoTokenizer.from_pretrained(
     "CMLM/ZhongJing-2-1_8b",
@@ -19,57 +21,44 @@ tokenizer = AutoTokenizer.from_pretrained(
     pad_token=''
 )
-@spaces.GPU
-def single_turn_chat(question):
-    try:
-        prompt = f"Question: {question}"
-        messages = [
-            {"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."},
-            {"role": "user", "content": prompt}
-        ]
-        input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
-        model_inputs = tokenizer([input], return_tensors="pt").to(device)
-        print("Debug: Model inputs prepared successfully.")
-        generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
-        print("Debug: Model generation completed successfully.")
-        generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
-        response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
-        return response
-    except Exception as e:
-        print(f"Error during model invocation: {str(e)}")
-        raise
-@spaces.GPU
-def multi_turn_chat(question, chat_history=None):
-    if not isinstance(question, str):
-        raise ValueError("The question must be a string.")
-    if chat_history is None or chat_history == []:
-        chat_history = [{"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."}]
-    chat_history.append({"role": "user", "content": question})
-    inputs = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
-    model_inputs = tokenizer([inputs], return_tensors="pt").to(device)
-    outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
-    generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
-    response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
-    chat_history.append({"role": "assistant", "content": response})
-    return chat_history
-# 单轮界面
-single_turn_interface = gr.Interface(
-    fn=single_turn_chat,
     inputs=["text"],
     outputs="text",
-    title="仲景GPT-V2-1.8B 单轮对话",
-    description="Unlocking the Wisdom of Traditional Chinese Medicine with AI."
 )
-# 多轮界面配置与之前保持一致

+import spaces  # Import spaces at the top
+import gradio as gr
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
+# Import the GPU decorator
+from spaces import GPU
+# Set the device to use GPU
+device = "cuda"  # Use CUDA for GPU
+# Initialize model and tokenizer
 peft_model_id = "CMLM/ZhongJing-2-1_8b"
 base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
+model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map={"cuda": 0})
 model.load_adapter(peft_model_id)
 tokenizer = AutoTokenizer.from_pretrained(
     "CMLM/ZhongJing-2-1_8b",
     pad_token=''
 )
+@GPU(duration=120)  # Decorate with GPU usage and specify the duration
+def get_model_response(question):
+    # Create the prompt without context
+    prompt = f"Question: {question}"
+    messages = [
+        {"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."},
+        {"role": "user", "content": prompt}
+    ]
+    # Prepare the input
+    text = tokenizer.apply_chat_template(
+        messages,
+        tokenize=False,
+        add_generation_prompt=True
+    )
+    model_inputs = tokenizer([text], return_tensors="pt").to(device)
+    # Generate the response
+    generated_ids = model.generate(
+        model_inputs.input_ids,
+        max_new_tokens=512
+    )
+    generated_ids = [
+        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
+    ]
+    # Decode the response
+    response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return response
+iface = gr.Interface(
+    fn=get_model_response,  # Directly use the decorated function
     inputs=["text"],
     outputs="text",
+    title="仲景GPT-V2-1.8B",
+    description="博极医源，精勤不倦。Unlocking the Wisdom of Traditional Chinese Medicine with AI."
 )
+# Launch the interface with sharing enabled
+iface.launch(share=True)