Update llm_handler.py
Browse files- llm_handler.py +0 -24
llm_handler.py
CHANGED
@@ -22,30 +22,6 @@ settings = provider.get_provider_default_settings()
|
|
22 |
settings.max_tokens = 2000
|
23 |
settings.stream = True
|
24 |
|
25 |
-
from llama_cpp import Llama
|
26 |
-
from llama_cpp_agent import LlamaCppAgent
|
27 |
-
from llama_cpp_agent import MessagesFormatterType
|
28 |
-
from llama_cpp_agent.providers import LlamaCppPythonProvider
|
29 |
-
|
30 |
-
# Initialize the Llama model
|
31 |
-
llama_model = Llama("Arcee-Spark-GGUF/Arcee-Spark-Q4_K_M.gguf", n_batch=1024, n_threads=10, n_gpu_layers=33, n_ctx=2048, verbose=False)
|
32 |
-
|
33 |
-
# Create the provider
|
34 |
-
provider = LlamaCppPythonProvider(llama_model)
|
35 |
-
|
36 |
-
# Create the agent
|
37 |
-
agent = LlamaCppAgent(
|
38 |
-
provider,
|
39 |
-
system_prompt="You are a helpful assistant.",
|
40 |
-
predefined_messages_formatter_type=MessagesFormatterType.CHATML,
|
41 |
-
debug_output=True
|
42 |
-
)
|
43 |
-
|
44 |
-
# Set provider settings
|
45 |
-
settings = provider.get_provider_default_settings()
|
46 |
-
settings.max_tokens = 2000
|
47 |
-
settings.stream = True
|
48 |
-
|
49 |
def send_to_llm(provider, msg_list):
|
50 |
try:
|
51 |
|
|
|
22 |
settings.max_tokens = 2000
|
23 |
settings.stream = True
|
24 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
25 |
def send_to_llm(provider, msg_list):
|
26 |
try:
|
27 |
|