ocr / app.py
bensheng's picture
ff
08374fd verified
raw
history blame
No virus
1.88 kB
import gradio as gr
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from PIL import Image
import base64
from io import BytesIO
# 加载模型和分词器
model_name = "openbmb/MiniCPM-Llama3-V-2_5-int4"
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
def encode_image(image):
buffered = BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')
def generate_text(prompt, max_length=100):
inputs = tokenizer(prompt, return_tensors="pt")
with torch.no_grad():
outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=1)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
def predict(image, prompt):
if image is not None:
# 确保image是PIL Image对象
if isinstance(image, str):
image = Image.open(image)
# 编码图像
encoded_image = encode_image(image)
# 准备输入
full_prompt = f"<image>{encoded_image}</image>\n{prompt if prompt else 'Describe this image.'}"
# 生成文本
result = generate_text(full_prompt)
return f"Model response: {result}\n\nUser prompt: {prompt}"
else:
return "No image uploaded. " + (f"You asked: {prompt}" if prompt else "Please upload an image and optionally provide a prompt.")
demo = gr.Interface(
predict,
inputs=[
gr.Image(type="pil", label="Upload Image"),
gr.Textbox(label="Prompt (optional)")
],
outputs=gr.Textbox(label="Result"),
title="Image Analysis with MiniCPM-Llama3-V-2_5-int4",
description="Upload an image and optionally provide a prompt for analysis."
)
if __name__ == "__main__":
demo.launch()