qwen2.5

Running on Zero

File size: 6,270 Bytes

bbe7b0a
f3b7005
 
 
d4c9a92
6c69482
0464b4c
d71ad7e
bbe7b0a
 
 
 
 
 
86ba4d3
ea3aa47
74f56e5
df10446
74f56e5
 
bbe7b0a
af7806f
0db1a0f
bbe7b0a
 
ea3aa47
bbe7b0a
74f56e5
 
bbe7b0a
abea35b
4ed0b9b
 
 
 
711c481
d71ad7e
4ed0b9b
 
abea35b
4ed0b9b
bbe7b0a
 
4ed0b9b
dd1d2c2
bbe7b0a
 
0db1a0f
bbe7b0a
 
 
8560f66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6e999ef
4ed0b9b
bbe7b0a
4ed0b9b
dd1d2c2
4ed0b9b
bbe7b0a
4ed0b9b
ea3aa47
bbe7b0a
 
 
4ed0b9b
bbe7b0a
 
 
4ed0b9b
bbe7b0a
 
4ed0b9b
bbe7b0a
 
 
 
 
 
4ed0b9b
bbe7b0a
 
 
 
 
 
4ed0b9b
 
bbe7b0a
 
 
 
 
 
4ed0b9b
bbe7b0a
d71ad7e
746a084
d71ad7e
74f56e5
0db1a0f
 
4ed0b9b
7161b69
b1d449b
0db1a0f
 
ad01e97
 
0db1a0f
bbe7b0a
 
0db1a0f

import os
from threading import Thread
from typing import Iterator

import gradio as gr
import spaces
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

MAX_MAX_NEW_TOKENS = 2048
DEFAULT_MAX_NEW_TOKENS = 1024
MAX_INPUT_TOKEN_LENGTH = int(os.getenv("MAX_INPUT_TOKEN_LENGTH", "4096"))

DESCRIPTION = """\
# FulPhil-仲景中医大语言模型-ZhongJingGPT-V2-1_8b-First LLM in TCM

致敬仲景先师,融汇古典与智能。本模型由医哲未来(FulPhil)研发,是专注于中医药领域的强大语言模型。它能够运用传统中医理论,结合现代人工智能技术,为中医研究和应用提供卓越助力。欢迎关注我们的 [GitHub主页](https://github.com/pariskang/CMLM-ZhongJing) 及模型 [ZhongJing-2-1_8b](https://huggingface.co/CMLM/ZhongJing-2-1_8b) 下载体验！
Paying tribute to the ancient master Zhang Zhongjing, this model integrates classical knowledge with modern intelligence. Developed by FulPhil (Future Medicine Philosophy), it is a powerful language model focused on the field of Traditional Chinese Medicine (TCM). It employs traditional TCM theories combined with contemporary artificial intelligence technology to provide excellent support for TCM research and applications. Welcome to visit our [GitHub homepage](https://github.com/pariskang/CMLM-ZhongJing) and download the model [ZhongJing-2-1_8b-merge](https://huggingface.co/CMLM/ZhongJing-2-1_8b) for a trial experience!
请注意!!!本模型不得用于任何医疗或潜在具有医疗或康养建议的任何场景，目前仍为科研测试阶段，敬请帮我们提出宝贵意见，谢谢。
Please note!!! This model should not be used for any medical purposes or scenarios potentially involving medical or health advice. It is currently still in the research and testing stage. We sincerely request your valuable feedback. Thank you.
"""


LICENSE = """
<p/>

---
As a derivate work of [ZhongJing-2-1_8b](https://huggingface.co/CMLM/ZhongJing-2-1_8b) by FulPhil,
this demo is governed by the original [license](https://huggingface.co/CMLM/ZhongJing-2-1_8b/blob/main/LICENSE.txt) and [acceptable use policy](https://huggingface.co/CMLM/ZhongJing-2-1_8b/blob/main/USE_POLICY.md).
"""

if not torch.cuda.is_available():
    DESCRIPTION += "\n<p>Running on CPU 🥶 This demo does not work on CPU.</p>"

if torch.cuda.is_available():
    model_id = "Qwen/Qwen2.5-1.5B-Instruct"
    model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype="auto", device_map="auto")
    tokenizer = AutoTokenizer.from_pretrained(model_id)
    tokenizer.use_default_system_prompt = False

@spaces.GPU
def generate(
    message: str,
    chat_history: list[tuple[str, str]],
    system_prompt: str = "You are a helpful TCM assistant named 仲景中医大语言模型, created by 医哲未来. You can switch between Chinese and English based on user preference.",
    max_new_tokens: int = 1024,
    temperature: float = 0.6,
    top_p: float = 0.95,
    top_k: int = 50,
    repetition_penalty: float = 1.2,
) -> Iterator[str]:
    conversation = [{"role": "system", "content": system_prompt}]
    for user, assistant in chat_history:
        conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
    conversation.append({"role": "user", "content": message})

    input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt")
    if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
        input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
        gr.Warning(f"Trimmed input from conversation as it was longer than {MAX_INPUT_TOKEN_LENGTH} tokens.")
    input_ids = input_ids.to(model.device)

    streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
    generate_kwargs = dict(
        {"input_ids": input_ids},
        streamer=streamer,
        max_new_tokens=max_new_tokens,
        do_sample=True,
        top_p=top_p,
        top_k=top_k,
        temperature=temperature,
        num_beams=1,
        repetition_penalty=repetition_penalty,
    )
    t = Thread(target=model.generate, kwargs=generate_kwargs)
    t.start()

    outputs = []
    for text in streamer:
        outputs.append(text)
        yield "".join(outputs)

chat_interface = gr.ChatInterface(
    fn=generate,
    additional_inputs=[
        gr.Textbox(label="System prompt", lines=6, value="You are a helpful TCM assistant named 仲景中医大语言模型, created by 医哲未来. You can switch between Chinese and English based on user preference."),
        gr.Slider(
            label="Max new tokens",
            minimum=1,
            maximum=MAX_MAX_NEW_TOKENS,
            step=1,
            value=DEFAULT_MAX_NEW_TOKENS,
        ),
        gr.Slider(
            label="Temperature",
            minimum=0.1,
            maximum=4.0,
            step=0.1,
            value=0.6,
        ),
        gr.Slider(
            label="Top-p (nucleus sampling)",
            minimum=0.05,
            maximum=1.0,
            step=0.05,
            value=0.9,
        ),
        gr.Slider(
            label="Top-k",
            minimum=1,
            maximum=1000,
            step=1,
            value=50,
        ),
        gr.Slider(
            label="Repetition penalty",
            minimum=1.0,
            maximum=2.0,
            step=0.05,
            value=1.2,
        ),
    ],
    stop_btn=None,
    examples=[
        ["你能简要解释一下什么是中医吗？"],
        ["简述《伤寒杂病论》的主要内容。"],
        ["中医如何治疗失眠？"],
        ["我发热，咳嗽，咽痛，舌苔黄腻，脉滑数，请给出中医诊断及处方？"],
        ["写一篇关于‘AI在中医研究中的应用’的100字文章。"],
        ["写一篇从中医角度关于‘秋季女性健康调养方案‘的1000字科普文章，从季节变化、饮食调理、活动养生等方面进行阐述"],
    ],
)

with gr.Blocks(css="style.css") as demo:
    gr.Markdown(DESCRIPTION)
    gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
    chat_interface.render()
    gr.Markdown(LICENSE)

if __name__ == "__main__":
    demo.queue(max_size=20).launch()