import gradio as gr
from llama_cpp import Llama

# Load the GGUF model using llama.cpp Python bindings
model_path = "google/gemma-7b-it-GGUF.bin"  # Adjust the path as needed
model = Llama(model_path=model_path, n_ctx=512)

# Define the chatbot function
def chat(input_text):
    output = model(input_text, max_tokens=150, temperature=0.7, top_p=0.9)
    return output["choices"][0]["text"]

# Create the Gradio interface
interface = gr.Interface(
    fn=chat,
    inputs="text",
    outputs="text",
    title="Gemma 7B Chatbot"
)

# Launch the interface
interface.launch()