import subprocess

# Remove the llama.cpp directory if it exists
subprocess.run(["rm", "-rf", "llama.cpp"])

# Clone the llama.cpp repository
subprocess.run(["git", "clone", "https://github.com/ggerganov/llama.cpp.git"])

# Change directory to llama.cpp
subprocess.run(["make", "LLAMA_CUBLAS=1"], cwd="llama.cpp")

# Download the zephyr-7b-beta model
subprocess.run(["wget", "https://huggingface.co/TheBloke/zephyr-7B-beta-GGUF/resolve/main/zephyr-7b-beta.Q6_K.gguf"])

# Run the server
subprocess.run(["./server", "-m", "zephyr-7b-beta.Q6_K.gguf", "-ngl", "9999", "-c", "0", "--port", "12345"], cwd="llama.cpp")