Spaces:

CMLM
/

ZhongJing-V2-1_8b-4bit

Runtime error

App Files Files Community

CMLL commited on Jun 17

Commit

f2ae05b

•

1 Parent(s): 196935a

Update app.py

Browse files

Files changed (1) hide show

app.py +76 -6

app.py CHANGED Viewed

@@ -1,16 +1,86 @@
-import subprocess
 # Remove the llama.cpp directory if it exists
-subprocess.run(["rm", "-rf", "llama.cpp"])
 # Clone the llama.cpp repository
-subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"])
 # Change directory to llama.cpp
-subprocess.run(["make", "LLAMA_CUBLAS=1"], cwd="llama.cpp")
 # Download the zephyr-7b-beta model
-subprocess.run(["wget", "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf"])
 # Run the server
-subprocess.run(["./server", "-m", "zephyr-7b-beta.Q6_K.gguf", "-ngl", "9999", "-c", "0", "--port", "12345"], cwd="llama.cpp")

+#import subprocess
 # Remove the llama.cpp directory if it exists
+#subprocess.run(["rm", "-rf", "llama.cpp"])
 # Clone the llama.cpp repository
+#subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"])
 # Change directory to llama.cpp
+#subprocess.run(["make", "LLAMA_CUBLAS=1"], cwd="llama.cpp")
 # Download the zephyr-7b-beta model
+#subprocess.run(["wget", "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf"])
 # Run the server
+#subprocess.run(["./server", "-m", "zephyr-7b-beta.Q6_K.gguf", "-ngl", "9999", "-c", "0", "--port", "12345"], cwd="llama.cpp")
+# ライブラリのインストール
+!CMAKE_ARGS="-DLLAMA_CUBLAS=on" FORCE_CMAKE=1 pip install llama-cpp-python
+#（CPUで実行する場合）!pip install llama-cpp-python
+!pip install gradio
+# モデルのダウンロード
+!wget https://huggingface.co/TFMC/openbuddy-llama2-13b-v11.1-bf16-GGUF/resolve/main/ggml-model-q4_m.gguf
+# ウェブUIの起動
+import os
+import gradio as gr
+import copy
+import time
+from llama_cpp import Llama
+llm = Llama(
+    model_path="ggml-model-q4_m.gguf",
+    n_ctx=2048,
+    n_gpu_layers=100, #CPUで実行する場合は削除
+)
+history = []
+system_message = """
+あなたはAIアシスタントです。
+"""
+def generate_text(message, history):
+    temp = ""
+    input_prompt = f"{system_message}"
+    for interaction in history:
+        input_prompt = input_prompt + "\nUSER: " + str(interaction[0]) + "\nASSISTANT: " + str(interaction[1])
+    input_prompt = input_prompt + "\nUSER: " + str(message) + "\nASSISTANT: "
+    output = llm.create_completion(
+        input_prompt,
+        temperature=0.7,
+        top_p=0.3,
+        top_k=40,
+        repeat_penalty=1.1,
+        max_tokens=1024,
+        stop=[
+            "ASSISTANT:",
+            "USER:",
+            "SYSTEM:",
+        ],
+        stream=True,
+    )
+    for out in output:
+        stream = copy.deepcopy(out)
+        temp += stream["choices"][0]["text"]
+        yield temp
+    history = ["init", input_prompt]
+demo = gr.ChatInterface(
+    generate_text,
+    title="ZhongJingGPT-V2-1_8B-GGUF chatbot using llama-cpp-python",
+    description="",
+    examples=["日本の四国にある県名を挙げてください。"],
+    cache_examples=True,
+    retry_btn=None,
+    undo_btn="Remove last",
+    clear_btn="Clear all",
+)
+demo.queue(concurrency_count=1, max_size=5)
+demo.launch(debug=True, share=True)