import gradio as gr from llama_cpp import Llama # Load the GGUF model using llama.cpp Python bindings model_path = "google/gemma-7b-it-GGUF.bin" # Adjust the path as needed model = Llama(model_path=model_path, n_ctx=512) # Define the chatbot function def chat(input_text): output = model(input_text, max_tokens=150, temperature=0.7, top_p=0.9) return output["choices"][0]["text"] # Create the Gradio interface interface = gr.Interface( fn=chat, inputs="text", outputs="text", title="Gemma 7B Chatbot" ) # Launch the interface interface.launch()