hanzla commited on
Commit
2ac2001
1 Parent(s): b23fbc0

chat interface v2

Browse files
Files changed (2) hide show
  1. app_v2.py +78 -0
  2. requirements.txt +2 -1
app_v2.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import os
3
+ import time
4
+ import spaces
5
+ import torch
6
+ import re
7
+ import gradio as gr
8
+ from threading import Thread
9
+ from transformers import TextIteratorStreamer, AutoTokenizer, AutoModelForCausalLM
10
+ from PIL import Image
11
+
12
+ import subprocess
13
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
14
+
15
+ model_id = "vikhyatk/moondream2"
16
+ revision = "2024-04-02"
17
+ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
18
+ moondream = AutoModelForCausalLM.from_pretrained(
19
+ model_id, trust_remote_code=True, revision=revision,
20
+ torch_dtype=torch.bfloat16, device_map={"": "cuda"},
21
+ attn_implementation="flash_attention_2"
22
+ )
23
+ moondream.eval()
24
+ def print_like_dislike(x: gr.LikeData):
25
+ print(x.index, x.value, x.liked)
26
+
27
+ def add_message(history, message):
28
+ # Handle image and text input
29
+ if message["files"]:
30
+ for x in message["files"]:
31
+ history.append(((x,), None))
32
+ if message["text"] is not None:
33
+ history.append((message["text"], None))
34
+ return history, gr.MultimodalTextbox(value=None, interactive=False)
35
+
36
+ def bot(history):
37
+ # Reverse search through the last 5 messages for an image file
38
+ last_five_messages = history[-5:] # Get the last five messages
39
+ image_path = None
40
+ for message in reversed(last_five_messages):
41
+ if isinstance(message[0], tuple) and isinstance(message[0][0], str):
42
+ image_path = message[0][0]
43
+ break
44
+
45
+ if image_path:
46
+ try:
47
+ image = Image.open(image_path) # Try to open the image using Pillow
48
+ image_embeds = moondream.encode_image(image)
49
+ print(moondream.answer_question(image_embeds, "Describe this image.", tokenizer))
50
+ response = f"Successfully loaded image from path: {image_path}"
51
+ except IOError:
52
+ response = "Failed to open image. Please check the image path or file permissions."
53
+ elif isinstance(history[-1][0], str):
54
+ response = "HOLA, it's a string" # Handle text messages
55
+ else:
56
+ response = "**I can only process text messages and images. Please send some text or upload an image!**"
57
+
58
+ history[-1][1] = ""
59
+ for character in response:
60
+ history[-1][1] += character
61
+ yield history
62
+
63
+ with gr.Blocks(theme="Monochrome") as demo:
64
+ chatbot = gr.Chatbot(
65
+ [],
66
+ elem_id="chatbot",
67
+ bubble_full_width=False
68
+ )
69
+
70
+ chat_input = gr.MultimodalTextbox(interactive=True, file_types=["image"], placeholder="Enter message or upload file...", show_label=False)
71
+ chat_msg = chat_input.submit(add_message, inputs=[chatbot, chat_input], outputs=[chatbot, chat_input])
72
+ bot_msg = chat_msg.then(bot, inputs=chatbot, outputs=chatbot, api_name="bot_response")
73
+ bot_msg.then(lambda: gr.MultimodalTextbox(interactive=True), None, outputs=[chat_input])
74
+
75
+ chatbot.like(print_like_dislike, None, None)
76
+
77
+ demo.queue()
78
+ demo.launch()
requirements.txt CHANGED
@@ -1,4 +1,5 @@
1
  timm==0.9.12
2
  transformers==4.36.2
3
  einops==0.7.0
4
- accelerate==0.25.0
 
 
1
  timm==0.9.12
2
  transformers==4.36.2
3
  einops==0.7.0
4
+ accelerate==0.25.0
5
+ pillow==10.3.0