Ahmed007 commited on
Commit
8c4ab6b
1 Parent(s): 2d347d9

Add application file

Browse files
Files changed (2) hide show
  1. app.py +34 -0
  2. requirements.txt +11 -0
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from transformers import AutoModelForCausalLM, AutoTokenizer
2
+ from PIL import Image
3
+ import gradio as gr
4
+ import numpy as np
5
+
6
+ # Load the model and tokenizer
7
+ model_id = "vikhyatk/moondream2"
8
+ revision = "2024-05-20"
9
+ model = AutoModelForCausalLM.from_pretrained(
10
+ model_id, trust_remote_code=True, revision=revision
11
+ )
12
+ tokenizer = AutoTokenizer.from_pretrained(model_id, revision=revision)
13
+
14
+ def analyze_image_direct(image, question):
15
+ # Convert PIL Image to the format expected by the model
16
+ # Note: This step depends on the model's expected input format
17
+ # For demonstration, assuming the model accepts PIL images directly
18
+ enc_image = model.encode_image(image) # This method might not exist; adjust based on actual model capabilities
19
+
20
+ # Generate an answer to the question based on the encoded image
21
+ # Note: This step is hypothetical and depends on the model's capabilities
22
+ answer = model.answer_question(enc_image, question, tokenizer) # Adjust based on actual model capabilities
23
+
24
+ return answer
25
+
26
+ # Create Gradio interface
27
+ iface = gr.Interface(fn=analyze_image_direct,
28
+ inputs=[gr.Image(type="pil"), gr.Textbox(lines=2, placeholder="Enter your question here...")],
29
+ outputs='text',
30
+ title="Direct Image Question Answering",
31
+ description="Upload an image and ask a question about it directly using the model.")
32
+
33
+ # Launch the interface
34
+ iface.launch()
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ opencv-python-headless
2
+ datasets
3
+ transformers
4
+ accelerate
5
+ evaluate
6
+ bitsandbytes
7
+ accelerate
8
+ einops
9
+ Pillow
10
+ torch
11
+ torchvision