from llama_cpp import Llama from llama_cpp_agent import LlamaCppAgent from llama_cpp_agent import MessagesFormatterType from llama_cpp_agent.providers import LlamaCppPythonProvider # Initialize the Llama model llama_model = Llama("Arcee-Spark-GGUF/Arcee-Spark-Q4_K_M.gguf", n_batch=1024, n_threads=10, n_gpu_layers=33, n_ctx=2048, verbose=False) # Create the provider provider = LlamaCppPythonProvider(llama_model) # Create the agent agent = LlamaCppAgent( provider, system_prompt="You are a helpful assistant.", predefined_messages_formatter_type=MessagesFormatterType.CHATML, debug_output=True ) # Set provider settings settings = provider.get_provider_default_settings() settings.max_tokens = 2000 settings.stream = True def send_to_llm(provider, msg_list): try: # Concatenate all messages into a single string full_message = "\n".join([f"{msg['role']}: {msg['content']}" for msg in msg_list]) # Call get_chat_response with the full message string response = agent.get_chat_response(full_message, llm_sampling_settings=settings) # Check if response is a string or an object with content attribute if isinstance(response, str): return response, None elif hasattr(response, 'content'): return response.content, None else: return str(response), None # Convert to string if it's neither except Exception as e: print(f"Error in send_to_llm: {str(e)}") return f"Error: {str(e)}", None