qwen2.5

Running on Zero

App Files Files Community

CMLL commited on Jun 18

Commit

220ce3a

•

1 Parent(s): 8724a4d

Update app.py

Browse files

Files changed (1) hide show

app.py +14 -15

app.py CHANGED Viewed

@@ -36,16 +36,14 @@ if torch.cuda.is_available():
 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
-    system_prompt: str,
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
-    conversation = []
-    if system_prompt:
-        conversation.append({"role": "system", "content": system_prompt})
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
@@ -61,26 +59,27 @@ def generate(
         "repetition_penalty": repetition_penalty,
     }
     def run_generation():
         try:
-            return pipe(input_text, **generate_kwargs)
         except Exception as e:
-            gr.Error(f"Error in generation: {e}")
-            return []
-    t = Thread(target=run_generation)
-    t.start()
-    t.join()  # Ensure the thread completes before proceeding
     outputs = []
-    for text in run_generation():
-        outputs.append(text['generated_text'])
-        yield "".join(outputs)
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
-        gr.Textbox(label="System prompt", lines=6),
         gr.Slider(
             label="Max new tokens",
             minimum=1,

 def generate(
     message: str,
     chat_history: list[tuple[str, str]],
+    system_prompt: str = "You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来.",
     max_new_tokens: int = 1024,
     temperature: float = 0.6,
     top_p: float = 0.9,
     top_k: int = 50,
     repetition_penalty: float = 1.2,
 ) -> Iterator[str]:
+    conversation = [{"role": "system", "content": system_prompt}]
     for user, assistant in chat_history:
         conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
     conversation.append({"role": "user", "content": message})
         "repetition_penalty": repetition_penalty,
     }
+    # Function to run the generation
     def run_generation():
         try:
+            results = pipe(input_text, **generate_kwargs)
+            return results
         except Exception as e:
+            return [f"Error in generation: {e}"]
+    # Run generation in a separate thread and wait for it to finish
     outputs = []
+    generation_thread = Thread(target=lambda: outputs.extend(run_generation()))
+    generation_thread.start()
+    generation_thread.join()
+    for output in outputs:
+        yield output['generated_text'] if isinstance(output, dict) else output
 chat_interface = gr.ChatInterface(
     fn=generate,
     additional_inputs=[
+        gr.Textbox(label="System prompt", lines=6, value="You are a helpful TCM medical assistant named 仲景中医大语言模型, created by 医哲未来."),
         gr.Slider(
             label="Max new tokens",
             minimum=1,