CMLL commited on
Commit
f2ae05b
1 Parent(s): 196935a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +76 -6
app.py CHANGED
@@ -1,16 +1,86 @@
1
- import subprocess
2
 
3
  # Remove the llama.cpp directory if it exists
4
- subprocess.run(["rm", "-rf", "llama.cpp"])
5
 
6
  # Clone the llama.cpp repository
7
- subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"])
8
 
9
  # Change directory to llama.cpp
10
- subprocess.run(["make", "LLAMA_CUBLAS=1"], cwd="llama.cpp")
11
 
12
  # Download the zephyr-7b-beta model
13
- subprocess.run(["wget", "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf"])
14
 
15
  # Run the server
16
- subprocess.run(["./server", "-m", "zephyr-7b-beta.Q6_K.gguf", "-ngl", "9999", "-c", "0", "--port", "12345"], cwd="llama.cpp")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #import subprocess
2
 
3
  # Remove the llama.cpp directory if it exists
4
+ #subprocess.run(["rm", "-rf", "llama.cpp"])
5
 
6
  # Clone the llama.cpp repository
7
+ #subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"])
8
 
9
  # Change directory to llama.cpp
10
+ #subprocess.run(["make", "LLAMA_CUBLAS=1"], cwd="llama.cpp")
11
 
12
  # Download the zephyr-7b-beta model
13
+ #subprocess.run(["wget", "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf"])
14
 
15
  # Run the server
16
+ #subprocess.run(["./server", "-m", "zephyr-7b-beta.Q6_K.gguf", "-ngl", "9999", "-c", "0", "--port", "12345"], cwd="llama.cpp")
17
+
18
+ # ライブラリのインストール
19
+ !CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
20
+ #(CPUで実行する場合)!pip install llama-cpp-python
21
+ !pip install gradio
22
+
23
+ # モデルのダウンロード
24
+ !wget https://huggingface.co/TFMC/openbuddy-llama2-13b-v11.1-bf16-GGUF/resolve/main/ggml-model-q4_m.gguf
25
+
26
+ # ウェブUIの起動
27
+ import os
28
+ import gradio as gr
29
+ import copy
30
+ import time
31
+ from llama_cpp import Llama
32
+
33
+ llm = Llama(
34
+ model_path="ggml-model-q4_m.gguf",
35
+ n_ctx=2048,
36
+ n_gpu_layers=100, #CPUで実行する場合は削除
37
+ )
38
+
39
+ history = []
40
+
41
+ system_message = """
42
+ あなたはAIアシスタントです。
43
+ """
44
+
45
+ def generate_text(message, history):
46
+ temp = ""
47
+ input_prompt = f"{system_message}"
48
+ for interaction in history:
49
+ input_prompt = input_prompt + "\nUSER: " + str(interaction[0]) + "\nASSISTANT: " + str(interaction[1])
50
+ input_prompt = input_prompt + "\nUSER: " + str(message) + "\nASSISTANT: "
51
+
52
+ output = llm.create_completion(
53
+ input_prompt,
54
+ temperature=0.7,
55
+ top_p=0.3,
56
+ top_k=40,
57
+ repeat_penalty=1.1,
58
+ max_tokens=1024,
59
+ stop=[
60
+ "ASSISTANT:",
61
+ "USER:",
62
+ "SYSTEM:",
63
+ ],
64
+ stream=True,
65
+ )
66
+ for out in output:
67
+ stream = copy.deepcopy(out)
68
+ temp += stream["choices"][0]["text"]
69
+ yield temp
70
+
71
+ history = ["init", input_prompt]
72
+
73
+
74
+ demo = gr.ChatInterface(
75
+ generate_text,
76
+ title="ZhongJingGPT-V2-1_8B-GGUF chatbot using llama-cpp-python",
77
+ description="",
78
+ examples=["日本の四国にある県名を挙げてください。"],
79
+ cache_examples=True,
80
+ retry_btn=None,
81
+ undo_btn="Remove last",
82
+ clear_btn="Clear all",
83
+ )
84
+ demo.queue(concurrency_count=1, max_size=5)
85
+ demo.launch(debug=True, share=True)
86
+