bensheng commited on
Commit
6ba4ced
1 Parent(s): e3adf3e

use transform

Browse files
Files changed (1) hide show
  1. app.py +41 -51
app.py CHANGED
@@ -1,64 +1,54 @@
1
  import gradio as gr
2
- from huggingface_hub import InferenceClient
 
 
3
  import base64
 
4
 
5
- client = InferenceClient("openbmb/MiniCPM-Llama3-V-2_5-int4",trust_remote_code=True)
 
 
 
6
 
7
- def encode_image(image_path):
8
- with open(image_path, "rb") as image_file:
9
- return base64.b64encode(image_file.read()).decode('utf-8')
 
10
 
11
- def respond(
12
- message,
13
- image,
14
- history,
15
- system_message,
16
- max_tokens,
17
- temperature,
18
- top_p,
19
- ):
20
- messages = [{"role": "system", "content": system_message}]
21
- for user_msg, bot_msg in history:
22
- messages.append({"role": "user", "content": user_msg})
23
- messages.append({"role": "assistant", "content": bot_msg})
24
-
25
- if image:
26
- base64_image = encode_image(image)
27
- image_message = f"<image>{base64_image}</image>"
28
- message = image_message + "\n" + message
29
 
30
- messages.append({"role": "user", "content": message})
31
-
32
- response = ""
33
- for message in client.text_generation(
34
- prompt=f"{messages}",
35
- max_new_tokens=max_tokens,
36
- stream=True,
37
- temperature=temperature,
38
- top_p=top_p,
39
- ):
40
- token = message.token.text
41
- response += token
42
- yield response, history + [(message, response)]
 
 
 
 
 
43
 
44
  demo = gr.Interface(
45
- respond,
46
  inputs=[
47
- gr.Textbox(label="Message"),
48
- gr.Image(type="filepath", label="Upload Image"),
49
- gr.State([]), # for history
50
- gr.Textbox(value="You are a friendly AI assistant capable of understanding images and text.", label="System message"),
51
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
52
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
53
- gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"),
54
  ],
55
- outputs=[
56
- gr.Textbox(label="Response"),
57
- gr.State() # for updated history
58
- ],
59
- title="MiniCPM-Llama3-V-2_5 Image and Text Chat",
60
- description="Upload an image and ask questions about it, or just chat without an image.",
61
- allow_flagging="never"
62
  )
63
 
64
  if __name__ == "__main__":
 
1
  import gradio as gr
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM
3
+ import torch
4
+ from PIL import Image
5
  import base64
6
+ from io import BytesIO
7
 
8
+ # 加载模型和分词器
9
+ model_name = "openbmb/MiniCPM-Llama3-V-2_5-int4"
10
+ tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
11
+ model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True)
12
 
13
+ def encode_image(image):
14
+ buffered = BytesIO()
15
+ image.save(buffered, format="PNG")
16
+ return base64.b64encode(buffered.getvalue()).decode('utf-8')
17
 
18
+ def generate_text(prompt, max_length=100):
19
+ inputs = tokenizer(prompt, return_tensors="pt")
20
+ with torch.no_grad():
21
+ outputs = model.generate(**inputs, max_length=max_length, num_return_sequences=1)
22
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
+ def predict(image, prompt):
25
+ if image is not None:
26
+ # 确保image是PIL Image对象
27
+ if isinstance(image, str):
28
+ image = Image.open(image)
29
+
30
+ # 编码图像
31
+ encoded_image = encode_image(image)
32
+
33
+ # 准备输入
34
+ full_prompt = f"<image>{encoded_image}</image>\n{prompt if prompt else 'Describe this image.'}"
35
+
36
+ # 生成文本
37
+ result = generate_text(full_prompt)
38
+
39
+ return f"Model response: {result}\n\nUser prompt: {prompt}"
40
+ else:
41
+ return "No image uploaded. " + (f"You asked: {prompt}" if prompt else "Please upload an image and optionally provide a prompt.")
42
 
43
  demo = gr.Interface(
44
+ predict,
45
  inputs=[
46
+ gr.Image(type="pil", label="Upload Image"),
47
+ gr.Textbox(label="Prompt (optional)")
 
 
 
 
 
48
  ],
49
+ outputs=gr.Textbox(label="Result"),
50
+ title="Image Analysis with MiniCPM-Llama3-V-2_5-int4",
51
+ description="Upload an image and optionally provide a prompt for analysis."
 
 
 
 
52
  )
53
 
54
  if __name__ == "__main__":