Spaces:
Runtime error
Runtime error
import requests | |
from PIL import Image | |
import gradio as gr | |
from transformers import AutoProcessor, Blip2ForConditionalGeneration | |
import torch | |
css = """ | |
#column_container { | |
position: relative; | |
height: 800px; | |
max-width: 700px; | |
display: flex; | |
flex-direction: column; | |
background-color: lightgray; | |
border: 1px solid gray; | |
border-radius: 5px; | |
padding: 10px; | |
box-shadow: 2px 2px 5px gray; | |
margin-left: auto; | |
margin-right: auto; | |
} | |
#input_prompt { | |
position: fixed; | |
bottom: 0; | |
max-width: 680px; | |
} | |
#chatbot-component { | |
overflow: auto; | |
} | |
""" | |
processor = AutoProcessor.from_pretrained("Salesforce/blip2-opt-2.7b") | |
model = Blip2ForConditionalGeneration.from_pretrained("Salesforce/blip2-opt-2.7b", torch_dtype=torch.float16) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
def upload_button_config(): | |
return gr.update(visible=False) | |
def update_textbox_config(text_in): | |
return gr.update(visible=True) | |
#takes input and generates the Response | |
def predict(btn_upload, counter,image_hid, input, history): | |
if counter == 0: | |
image_in = Image.open(btn_upload) | |
#Resizing the image | |
basewidth = 512 | |
wpercent = (basewidth/float(image_in.size[0])) | |
hsize = int((float(image_in.size[1])*float(wpercent))) | |
image_in = image_in.resize((basewidth,hsize)) #, Image.Resampling.LANCZOS) | |
# Save the image to the file-like object | |
#seed = random.randint(0, 1000000) | |
img_name = "uploaded_image.png" #f"./edited_image_{seed}.png" | |
image_in.save(img_name) | |
#add state | |
history = history or [] | |
response = '<img src="/file=' + img_name + '">' | |
history.append((input, response)) | |
counter += 1 | |
return history, history, img_name, counter, image_in | |
#process the prompt | |
print(f"prompt is :{input}") | |
#Getting prompt in the format - Question: Is this photo unusual? Answer: | |
prompt = f"Question: {input} Answer: " | |
inputs = processor(image_hid, text=prompt, return_tensors="pt").to(device, torch.float16) | |
#generate the response | |
generated_ids = model.generate(**inputs, max_new_tokens=10) | |
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip() | |
print(f"generated_text is : {generated_text}") | |
#add state | |
history = history or [] | |
response = generated_text | |
history.append((input, response)) | |
counter += 1 | |
return history, history, "uploaded_image.png", counter, image_hid | |
#Blocks Layout - leaving this here for moment - "#chatbot-component .overflow-y-auto{height:800px}" | |
with gr.Blocks(css="#chatbot-component {height: 600px} #input_prompt {position: absolute; bottom: 0;}") as demo: | |
with gr.Row(): | |
with gr.Column(scale=1): | |
#with gr.Accordion("See details"): | |
gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;"> | |
<div | |
style=" | |
display: inline-flex; | |
align-items: center; | |
gap: 0.8rem; | |
font-size: 1.75rem; | |
" | |
> | |
<h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;"> | |
Bringing Visual Conversations to Life with BLIP2 | |
</h1> | |
</div> | |
<p style="margin-bottom: 10px; font-size: 94%"> | |
Blip2 is functioning as an <b>instructed zero-shot image-to-text generation</b> model using OPT-2.7B in this Space. | |
It shows a wide range of capabilities including visual conversation, visual knowledge reasoning, visual commensense reasoning, storytelling, | |
personalized image-to-text generation etc.<br> | |
BLIP-2 by <a href="https://huggingface.co/Salesforce" target="_blank">Salesforce</a> is now available in🤗Transformers! | |
This model was contributed by <a href="https://twitter.com/NielsRogge" target="_blank">nielsr</a>. | |
The BLIP-2 model was proposed in <a href="https://arxiv.org/abs/2301.12597" target="_blank">BLIP-2: Bootstrapping Language-Image Pre-training with Frozen Image Encoders and Large Language Models</a> | |
by Junnan Li, Dongxu Li, Silvio Savarese, Steven Hoi.<br><br> | |
</p></div>""") | |
gr.HTML("""<a href="https://huggingface.co/spaces/ysharma/InstructPix2Pix_Chatbot?duplicate=true"><img src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>Duplicate Space with GPU Upgrade for fast Inference & no queue<br>""") | |
with gr.Column(elem_id = "column_container", scale=2): | |
#text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image') | |
btn_upload = gr.UploadButton("Upload image!", file_types=["image"], file_count="single", elem_id="upload_button") | |
text_in = gr.Textbox(value='', placeholder="Type your questions here and press enter", elem_id = "input_prompt", visible=False, label='Great! Now you can ask questions to get more information about the image') | |
chatbot = gr.Chatbot(elem_id = 'chatbot-component', label='Converse with Images') | |
state_in = gr.State() | |
counter_out = gr.Number(visible=False, value=0, precision=0) | |
text_out = gr.Textbox(visible=False) #getting image name out | |
image_hid = gr.Image(visible=False) #, type='pil') | |
#Using Event Listeners | |
btn_upload.upload(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid]) | |
btn_upload.upload(fn = update_textbox_config, inputs=text_in, outputs = text_in) | |
text_in.submit(predict, [btn_upload, counter_out, image_hid, text_in, state_in], [chatbot, state_in, text_out, counter_out, image_hid]) | |
chatbot.change(fn = upload_button_config, outputs=btn_upload) #, scroll_to_output = True) | |
demo.queue(concurrency_count=10) | |
demo.launch(debug=True) #, width="80%", height=2000) |