CMLL commited on
Commit
cdddf91
1 Parent(s): bcf1d2d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -26
app.py CHANGED
@@ -1,45 +1,33 @@
1
- #import subprocess
2
-
3
- # Remove the llama.cpp directory if it exists
4
- #subprocess.run(["rm", "-rf", "llama.cpp"])
5
-
6
- # Clone the llama.cpp repository
7
- #subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"])
8
-
9
- # Change directory to llama.cpp
10
- #subprocess.run(["make", "LLAMA_CUBLAS=1"], cwd="llama.cpp")
11
-
12
- # Download the zephyr-7b-beta model
13
- #subprocess.run(["wget", "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf"])
14
-
15
- # Run the server
16
- #subprocess.run(["./server", "-m", "zephyr-7b-beta.Q6_K.gguf", "-ngl", "9999", "-c", "0", "--port", "12345"], cwd="llama.cpp")
17
-
18
  # ライブラリのインストール
19
- !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
20
- #(CPUで実行する場合)!pip install llama-cpp-python
21
- pip install gradio
 
 
 
22
 
23
  # モデルのダウンロード
24
- wget https://huggingface.co/TFMC/openbuddy-llama2-13b-v11.1-bf16-GGUF/resolve/main/ggml-model-q4_m.gguf
 
 
 
25
 
26
  # ウェブUIの起動
27
- import os
28
  import gradio as gr
29
  import copy
30
  import time
31
  from llama_cpp import Llama
32
 
33
  llm = Llama(
34
- model_path="ggml-model-q4_m.gguf",
35
  n_ctx=2048,
36
- n_gpu_layers=100, #CPUで実行する場合は削除
37
  )
38
 
39
  history = []
40
 
41
  system_message = """
42
- あなたはAIアシスタントです。
43
  """
44
 
45
  def generate_text(message, history):
@@ -68,7 +56,7 @@ def generate_text(message, history):
68
  temp += stream["choices"][0]["text"]
69
  yield temp
70
 
71
- history = ["init", input_prompt]
72
 
73
 
74
  demo = gr.ChatInterface(
@@ -84,3 +72,4 @@ demo = gr.ChatInterface(
84
  demo.queue(concurrency_count=1, max_size=5)
85
  demo.launch(debug=True, share=True)
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # ライブラリのインストール
2
+ import os
3
+ import subprocess
4
+
5
+ # 必要なライブラリのインストール
6
+ subprocess.check_call(["pip", "install", "llama-cpp-python"])
7
+ subprocess.check_call(["pip", "install", "gradio"])
8
 
9
  # モデルのダウンロード
10
+ model_url = "https://huggingface.co/CMLL/ZhongJing-2-1_8b-GGUF/resolve/main/ZhongJing1_5-1_8b-fp16.gguf"
11
+ model_path = "ggml-model.gguf"
12
+ if not os.path.exists(model_path):
13
+ subprocess.check_call(["wget", model_url, "-O", model_path])
14
 
15
  # ウェブUIの起動
 
16
  import gradio as gr
17
  import copy
18
  import time
19
  from llama_cpp import Llama
20
 
21
  llm = Llama(
22
+ model_path=model_path,
23
  n_ctx=2048,
24
+ n_gpu_layers=100, # CPUで実行する場合は削除
25
  )
26
 
27
  history = []
28
 
29
  system_message = """
30
+ You are a helpful TCM medical assistant named 仲景中医大语言模型.
31
  """
32
 
33
  def generate_text(message, history):
 
56
  temp += stream["choices"][0]["text"]
57
  yield temp
58
 
59
+ history.append((message, temp))
60
 
61
 
62
  demo = gr.ChatInterface(
 
72
  demo.queue(concurrency_count=1, max_size=5)
73
  demo.launch(debug=True, share=True)
74
 
75
+