Llama-3.2-11b / app.py
nick911's picture
Update app.py
3378170 verified
raw
history blame
No virus
2.72 kB
import requests
import torch
from PIL import Image
from transformers import MllamaForConditionalGeneration, AutoProcessor
import gradio as gr
import spaces
class VisionInstructChat:
def __init__(self):
# Initialize the model and processor
self.model_id = "meta-llama/Llama-3.2-90B-Vision-Instruct"
self.model = MllamaForConditionalGeneration.from_pretrained(
self.model_id,
torch_dtype=torch.bfloat16,
device_map="auto",
)
self.processor = AutoProcessor.from_pretrained(self.model_id)
# Method to handle the model's response to an image and text input
@spaces
def chat_with_model(self, history, image, user_text):
if image is None or not user_text.strip():
return history + [["Please upload an image and enter a prompt."]]
# Prepare messages for the model
messages = [
{"role": "user", "content": [
{"type": "image"},
{"type": "text", "text": user_text}
]}
]
input_text = self.processor.apply_chat_template(messages, add_generation_prompt=True)
inputs = self.processor(image, input_text, return_tensors="pt").to(self.model.device)
# Generate response
output = self.model.generate(**inputs, max_new_tokens=100)
response = self.processor.decode(output[0])
# Add user prompt and model response to chat history
history.append([user_text, response])
return history
# Method to reset the chat history
@spaces
def reset_chat(self):
return []
# Method to create the Gradio interface
def launch_interface(self):
with gr.Blocks() as demo:
gr.Markdown("### Chat with Vision-Instruct Model")
# Chat history
chat_history = gr.Chatbot(label="Chat History")
# Inputs: Image and Text
with gr.Row():
with gr.Column(scale=3):
image_input = gr.Image(type="pil", label="Upload Image")
with gr.Column(scale=7):
user_input = gr.Textbox(placeholder="Type your message here...", label="Your Prompt")
# Submit and Clear buttons
submit_button = gr.Button("Send")
clear_button = gr.Button("Clear Chat")
# Button actions
submit_button.click(fn=self.chat_with_model, inputs=[chat_history, image_input, user_input], outputs=chat_history)
clear_button.click(fn=self.reset_chat, outputs=chat_history)
demo.launch()
# Create an instance of the class and launch the interface
chat_app = VisionInstructChat()
chat_app.launch_interface()