nchen909 commited on
Commit
66be512
1 Parent(s): 413c7f5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -14
app.py CHANGED
@@ -1,18 +1,22 @@
1
  import gradio as gr
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
 
 
4
 
5
- title = "Apollo-6B-GGUF Run On CPU"
6
 
7
  description = """
8
- 🔎 [Apollo-6B](https://huggingface.co/FreedomIntelligence/Apollo-6B) [GGUF format model](https://huggingface.co/FreedomIntelligence/Apollo-6B-GGUF) , 8-bit quantization balanced quality gguf version, running on CPU. Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all).
9
 
10
  🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue.
11
 
 
12
  """
13
 
14
  """
15
- [Model From FreedomIntelligence/Apollo-6B-GGUF](https://huggingface.co/FreedomIntelligence/Apollo-6B-GGUF)
 
16
  """
17
 
18
  model_path = "models"
@@ -23,22 +27,58 @@ print("Start the model init process")
23
  model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
24
  print("Finish the model init process")
25
 
26
- model.config["promptTemplate"] = "[INST] {0} [/INST]"
27
- model.config["systemPrompt"] = ""
28
  model._is_chat_session_activated = False
29
 
30
  max_new_tokens = 2048
31
 
32
- def generater(message, history, temperature, top_p, top_k):
33
- prompt = "<s>"
 
 
 
 
 
 
 
 
 
 
 
34
  for user_message, assistant_message in history:
35
  prompt += model.config["promptTemplate"].format(user_message)
36
- prompt += assistant_message + "</s>"
37
  prompt += model.config["promptTemplate"].format(message)
38
- outputs = []
39
- for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
40
- outputs.append(token)
41
- yield "".join(outputs)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  def vote(data: gr.LikeData):
44
  if data.liked:
@@ -51,7 +91,7 @@ chatbot = gr.Chatbot(avatar_images=('resourse/user-icon.png', 'resourse/chatbot-
51
  additional_inputs=[
52
  gr.Slider(
53
  label="temperature",
54
- value=0.8,
55
  minimum=0.0,
56
  maximum=2.0,
57
  step=0.05,
@@ -95,4 +135,4 @@ with gr.Blocks(css="resourse/style/custom.css") as demo:
95
  iface.render()
96
 
97
  if __name__ == "__main__":
98
- demo.queue(max_size=256).launch()
 
1
  import gradio as gr
2
  from gpt4all import GPT4All
3
  from huggingface_hub import hf_hub_download
4
+ import subprocess
5
+ import asyncio
6
 
7
+ title = "Apollo-7B-GGUF Run On CPU"
8
 
9
  description = """
10
+ 🔎 [Apollo-7B](https://huggingface.co/FreedomIntelligence/Apollo-7B) [GGUF format model](https://huggingface.co/FreedomIntelligence/Apollo-7B-GGUF) , 8-bit quantization balanced quality gguf version, running on CPU. Using [GitHub - llama.cpp](https://github.com/ggerganov/llama.cpp) [GitHub - gpt4all](https://github.com/nomic-ai/gpt4all).
11
 
12
  🔨 Running on CPU-Basic free hardware. Suggest duplicating this space to run without a queue.
13
 
14
+ Mistral does not support system prompt symbol (such as ```<<SYS>>```) now, input your system prompt in the first message if you need. Learn more: [Guardrailing Mistral 7B](https://docs.mistral.ai/usage/guardrailing).
15
  """
16
 
17
  """
18
+ [Model From TheBloke/Mistral-6B-Instruct-v0.1-GGUF](https://huggingface.co/FreedomIntelligence/Apollo-6B-GGUF)
19
+ [Mistral-instruct-v0.1 System prompt](https://docs.mistral.ai/usage/guardrailing)
20
  """
21
 
22
  model_path = "models"
 
27
  model = model = GPT4All(model_name, model_path, allow_download = False, device="cpu")
28
  print("Finish the model init process")
29
 
30
+ model.config["promptTemplate"] = "{0}"
31
+ model.config["systemPrompt"] = "You are a multiligual AI doctor, your name is Apollo."
32
  model._is_chat_session_activated = False
33
 
34
  max_new_tokens = 2048
35
 
36
+ # def generater(message, history, temperature, top_p, top_k):
37
+ # prompt = "<s>"
38
+ # for user_message, assistant_message in history:
39
+ # prompt += model.config["promptTemplate"].format(user_message)
40
+ # prompt += assistant_message + "</s>"
41
+ # prompt += model.config["promptTemplate"].format(message)
42
+ # outputs = []
43
+ # for token in model.generate(prompt=prompt, temp=temperature, top_k = top_k, top_p = top_p, max_tokens = max_new_tokens, streaming=True):
44
+ # outputs.append(token)
45
+ # yield "".join(outputs)
46
+ async def generater(message, history, temperature, top_p, top_k):
47
+ # 构建prompt
48
+ prompt = ""
49
  for user_message, assistant_message in history:
50
  prompt += model.config["promptTemplate"].format(user_message)
51
+ prompt += assistant_message
52
  prompt += model.config["promptTemplate"].format(message)
53
+
54
+ # Debug: 打印最终的prompt以验证其正确性
55
+ print(f"Final prompt: {prompt}")
56
+
57
+ cmd = [
58
+ "./main",
59
+ "-m", model_path+"/"+model_name,
60
+ "--prompt", prompt
61
+ ]
62
+
63
+ # 使用subprocess.Popen调用./main并流式读取输出
64
+ process = subprocess.Popen(
65
+ cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True
66
+ )
67
+
68
+ # 初始占位符输出
69
+ yield "Generating response..."
70
+ # 异步等待并处理输出
71
+ try:
72
+ while True:
73
+ line = process.stdout.readline()
74
+ if not line:
75
+ break # 如果没有更多的输出,结束循环
76
+ print(f"Generated line: {line.strip()}") # Debug: 打印生成的每行
77
+ yield line
78
+ except Exception as e:
79
+ print(f"Error during generation: {e}")
80
+ yield "Sorry, an error occurred while generating the response."
81
+
82
 
83
  def vote(data: gr.LikeData):
84
  if data.liked:
 
91
  additional_inputs=[
92
  gr.Slider(
93
  label="temperature",
94
+ value=0.5,
95
  minimum=0.0,
96
  maximum=2.0,
97
  step=0.05,
 
135
  iface.render()
136
 
137
  if __name__ == "__main__":
138
+ demo.queue(max_size=3).launch()