from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent from llama_cpp_agent import MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider # Initialize the Llama model llama_model = Llama("Arcee-Spark-GGUF/Arcee-Spark-Q4_K_M.gguf", n_batch=1024, n_threads=10, n_gpu_layers=33, n_ctx=2048, verbose=False) # Create the provider provider = LlamaCppPythonProvider(llama_model) # Create the agent agent = LlamaCppAgent( provider, system_prompt="You are a helpful assistant.", predefined_messages_formatter_type=MessagesFormatterType.CHATML, debug_output=True ) # Set provider settings settings = provider.get_provider_default_settings() settings.max_tokens = 2000 settings.stream = True def send_to_llm(provider, msg_list): try: response = agent.get_chat_response(msg_list, settings=settings) return response.content, None # We don't have usage info in this case except Exception as e: print(f"Error in send_to_llm: {str(e)}") return f"Error: {str(e)}", None