import gradio as gr
import os
import time
import spaces
import torch
import re
import gradio as gr
from threading import Thread
from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
from PIL import Image

import subprocess
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)

model_id = "vikhyatk/moondream2"
revision = "2024-04-02"
tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
moondream = AutoModelForCausalLM.from_pretrained(
    model_id, trust_remote_code=True, revision=revision,
    torch_dtype=torch.bfloat16, device_map={"": "cuda"},
    attn_implementation="flash_attention_2"
)
moondream.eval()
def print_like_dislike(x: gr.LikeData):
    print(x.index, x.value, x.liked)

def add_message(history, message):
    # Handle image and text input
    if message["files"]:
        for x in message["files"]:
            history.append(((x,), None))
    if message["text"] is not None:
        history.append((message["text"], None))
    return history, gr.MultimodalTextbox(value=None, interactive=False)
@spaces.GPU(duration=10)
def bot(history):
    # Reverse search through the last 5 messages for an image file
    last_five_messages = history[-5:]  # Get the last five messages
    image_path = None
    last_message = None
    for message in last_five_messages:
        if isinstance(message[0], tuple) and isinstance(message[0][0], str):
            image_path = message[0][0]
        if isinstance(message[0],str):
            last_message = message[0]
    if image_path:
        try:
            image = Image.open(image_path)  # Try to open the image using Pillow
            image_embeds = moondream.encode_image(image)
            print(image_embeds.shape)
            response = moondream.answer_question(image_embeds, last_message, tokenizer)
        except IOError:
            response = "Failed to open image. Please check the image path or file permissions."
    else:
        image_embeds = torch.zeros(1, 729, 2048, dtype=torch.bfloat16, device='cuda')
        response = moondream.answer_question(image_embeds, last_message, tokenizer)

    history[-1][1] = ""
    for character in response:
        history[-1][1] += character
        yield history

with gr.Blocks(theme=gr.themes.Default(primary_hue="green", secondary_hue="emerald")) as demo:
    gr.Markdown(
        """
        # AskMoondream: Moondream 2 Demonstration Space
        ## Modularity AI presents this open source huggingface space for running fast experimental inferences on Moondream2.
        """
    )
    chatbot = gr.Chatbot(
        [],
        elem_id="chatbot",
        bubble_full_width=False,
        height = 550
    )

    chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
    chat_msg = chat_input.submit(add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input])
    bot_msg = chat_msg.then(bot, inputs=chatbot, outputs=chatbot, api_name="bot_response")
    bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, outputs=[chat_input])

    chatbot.like(print_like_dislike, None, None)

demo.queue()
demo.launch()