aifeifei798 commited on
Commit
2fc572d
1 Parent(s): cb9a86d

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +4 -4
  2. app.py +89 -0
  3. pre-requirements.txt +1 -0
  4. requirements.txt +3 -0
README.md CHANGED
@@ -1,10 +1,10 @@
1
  ---
2
  title: Florence 2 SD3 Captioner
3
- emoji: 📈
4
- colorFrom: purple
5
- colorTo: indigo
6
  sdk: gradio
7
- sdk_version: 4.37.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
1
  ---
2
  title: Florence 2 SD3 Captioner
3
+ emoji:
4
+ colorFrom: blue
5
+ colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 4.36.1
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
app.py ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoProcessor, AutoModelForCausalLM
3
+ #import spaces
4
+ import re
5
+ from PIL import Image
6
+
7
+ import subprocess
8
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
9
+
10
+ model = AutoModelForCausalLM.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True).to("cpu").eval()
11
+
12
+ processor = AutoProcessor.from_pretrained('gokaygokay/Florence-2-SD3-Captioner', trust_remote_code=True)
13
+
14
+
15
+ TITLE = "# [Florence-2 SD3 Long Captioner](https://huggingface.co/gokaygokay/Florence-2-SD3-Captioner/)"
16
+ DESCRIPTION = "[Florence-2 Base](https://huggingface.co/microsoft/Florence-2-base-ft) fine-tuned on Long SD3 Prompt and Image pairs. Check above link for datasets that are used for fine-tuning."
17
+
18
+ def modify_caption(caption: str) -> str:
19
+ """
20
+ Removes specific prefixes from captions if present, otherwise returns the original caption.
21
+ Args:
22
+ caption (str): A string containing a caption.
23
+ Returns:
24
+ str: The caption with the prefix removed if it was present, or the original caption.
25
+ """
26
+ # Define the prefixes to remove
27
+ prefix_substrings = [
28
+ ('captured from ', ''),
29
+ ('captured at ', '')
30
+ ]
31
+
32
+ # Create a regex pattern to match any of the prefixes
33
+ pattern = '|'.join([re.escape(opening) for opening, _ in prefix_substrings])
34
+ replacers = {opening.lower(): replacer for opening, replacer in prefix_substrings}
35
+
36
+ # Function to replace matched prefix with its corresponding replacement
37
+ def replace_fn(match):
38
+ return replacers[match.group(0).lower()]
39
+
40
+ # Apply the regex to the caption
41
+ modified_caption = re.sub(pattern, replace_fn, caption, count=1, flags=re.IGNORECASE)
42
+
43
+ # If the caption was modified, return the modified version; otherwise, return the original
44
+ return modified_caption if modified_caption != caption else caption
45
+
46
+ #@spaces.GPU
47
+ def run_example(image):
48
+ image = Image.fromarray(image)
49
+ task_prompt = "<DESCRIPTION>"
50
+ prompt = task_prompt + "Describe this image in great detail."
51
+
52
+ # Ensure the image is in RGB mode
53
+ if image.mode != "RGB":
54
+ image = image.convert("RGB")
55
+
56
+ inputs = processor(text=prompt, images=image, return_tensors="pt").to("cpu")
57
+ generated_ids = model.generate(
58
+ input_ids=inputs["input_ids"],
59
+ pixel_values=inputs["pixel_values"],
60
+ max_new_tokens=1024,
61
+ num_beams=3
62
+ )
63
+ generated_text = processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
64
+ parsed_answer = processor.post_process_generation(generated_text, task=task_prompt, image_size=(image.width, image.height))
65
+ return modify_caption(parsed_answer["<DESCRIPTION>"])
66
+
67
+
68
+ css = """
69
+ #output {
70
+ height: 500px;
71
+ overflow: auto;
72
+ border: 1px solid #ccc;
73
+ }
74
+ """
75
+
76
+ with gr.Blocks(css=css) as demo:
77
+ gr.Markdown(TITLE)
78
+ gr.Markdown(DESCRIPTION)
79
+ with gr.Tab(label="Florence-2 SD3 Prompts"):
80
+ with gr.Row():
81
+ with gr.Column():
82
+ input_img = gr.Image(label="Input Picture")
83
+ submit_btn = gr.Button(value="Submit")
84
+ with gr.Column():
85
+ output_text = gr.Textbox(label="Output Text")
86
+
87
+ submit_btn.click(run_example, [input_img], [output_text])
88
+
89
+ demo.launch(debug=True)
pre-requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pip>=23.0.0
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ spaces
2
+ transformers
3
+ timm