qwen2.5

Running on Zero

App Files Files Community

CMLL commited on Jun 18

Commit

b1d449b

•

1 Parent(s): 0464b4c

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -33

app.py CHANGED Viewed

@@ -1,11 +1,15 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import gradio as gr
-# Set the device
-device = "cpu"  # replace with your device: "cpu", "cuda", "mps"
-# Initialize model and tokenizer
 peft_model_id = "CMLM/ZhongJing-2-1_8b"
 base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
 model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
@@ -17,47 +21,85 @@ tokenizer = AutoTokenizer.from_pretrained(
     pad_token=''
 )
-def get_model_response(question):
-    # Create the prompt without context
     prompt = f"Question: {question}"
     messages = [
         {"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."},
         {"role": "user", "content": prompt}
     ]
-    # Prepare the input
-    text = tokenizer.apply_chat_template(
-        messages,
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    model_inputs = tokenizer([text], return_tensors="pt").to(device)
-    # Generate the response
-    generated_ids = model.generate(
-        model_inputs.input_ids,
-        max_new_tokens=512
-    )
-    generated_ids = [
-        output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
-    ]
-    # Decode the response
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     return response
-# Define a Gradio interface without the context parameter
-def chat_interface(question):
-    response = get_model_response(question)
-    return response
-iface = gr.Interface(
-    fn=chat_interface,
     inputs=["text"],
     outputs="text",
-    title="仲景GPT-V2-1.8B",
     description="博极医源，精勤不倦。Unlocking the Wisdom of Traditional Chinese Medicine with AI."
 )
-# Launch the interface with sharing enabled
-iface.launch(share=True)

+import spaces
 from transformers import AutoModelForCausalLM, AutoTokenizer
 import torch
 import gradio as gr
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
+USE_CUDA = torch.cuda.is_available()
+device_ids_parallel = [0]
+device = torch.device("cuda:{}".format(device_ids_parallel[0]) if USE_CUDA else "cpu")
+# 初始化
 peft_model_id = "CMLM/ZhongJing-2-1_8b"
 base_model_id = "Qwen/Qwen1.5-1.8B-Chat"
 model = AutoModelForCausalLM.from_pretrained(base_model_id, device_map="auto")
     pad_token=''
 )
+#单轮
+@spaces.GPU
+def single_turn_chat(question):
     prompt = f"Question: {question}"
     messages = [
         {"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."},
         {"role": "user", "content": prompt}
     ]
+    input = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+    model_inputs = tokenizer([input], return_tensors="pt").to(device)
+    generated_ids = model.generate(model_inputs.input_ids, max_new_tokens=512)
+    generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)]
     response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
     return response
+#多轮
+@spaces.GPU
+def multi_turn_chat(question, chat_history=None):
+    if not isinstance(question, str):
+        raise ValueError("The question must be a string.")
+    if chat_history is None or chat_history == []:
+        chat_history = [{"role": "system", "content": "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来 of Fudan University."}]
+    chat_history.append({"role": "user", "content": question})
+    # Apply the chat template and prepare the input
+    inputs = tokenizer.apply_chat_template(chat_history, tokenize=False, add_generation_prompt=True)
+    model_inputs = tokenizer([inputs], return_tensors="pt").to(device)
+    try:
+        # Generate the response from the model
+        outputs = model.generate(model_inputs.input_ids, max_new_tokens=512)
+        generated_ids = outputs[:, model_inputs.input_ids.shape[-1]:]
+        response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+    except Exception as e:
+        raise RuntimeError("Error in model generation: " + str(e))
+    # Append the assistant's response to the chat history
+    chat_history.append({"role": "assistant", "content": response})
+    # Format the chat history for output
+    tempass = ""
+    tempuser = ""
+    formatted_history = []
+    for entry in chat_history:
+        if entry['role'] == 'user':
+            tempuser = entry['content']
+        elif entry['role'] == 'assistant':
+            tempass = entry['content']
+            temp = (tempuser, tempass)
+            formatted_history.append(temp)
+    return formatted_history, chat_history
+def clear_history():
+    return [], []
+# 单轮界面
+single_turn_interface = gr.Interface(
+    fn=single_turn_chat,
     inputs=["text"],
     outputs="text",
+    title="仲景GPT-V2-1.8B 单轮对话",
     description="博极医源，精勤不倦。Unlocking the Wisdom of Traditional Chinese Medicine with AI."
 )
+# 多轮界面
+with gr.Blocks() as multi_turn_interface:
+    chatbot = gr.Chatbot(label="仲景GPT-V2-1.8B 多轮对话")
+    state = gr.State([])
+    with gr.Row():
+        with gr.Column(scale=6):
+            user_input = gr.Textbox(label="输入", placeholder="输入你的问题")
+        with gr.Column(scale=6):
+            submit_button = gr.Button("发送")
+    submit_button.click(multi_turn_chat, [user_input, state], [chatbot, state])
+    user_input.submit(multi_turn_chat, [user_input, state], [chatbot, state])
+single_turn_interface.launch()
+multi_turn_interface.launch()