gokaygokay commited on
Commit
0598d11
1 Parent(s): 6cfd7ba

llm prompt

Browse files
Files changed (2) hide show
  1. app.py +65 -6
  2. llm_inference.py +225 -0
app.py CHANGED
@@ -9,6 +9,7 @@ import numpy as np
9
  import os
10
  import subprocess
11
  from huggingface_hub import hf_hub_download
 
12
 
13
  # Install flash-attn
14
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
@@ -39,6 +40,9 @@ hf_hub_download(
39
  token = huggingface_token
40
  )
41
 
 
 
 
42
  # Florence caption function
43
  @spaces.GPU
44
  def florence_caption(image):
@@ -70,14 +74,19 @@ def enhance_prompt(input_prompt):
70
  return enhanced_text
71
 
72
  @spaces.GPU(duration=60)
73
- def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, negative_prompt="", progress=gr.Progress(track_tqdm=True)):
74
  if image is not None:
75
  # Convert image to PIL if it's not already
76
  if not isinstance(image, Image.Image):
77
  image = Image.fromarray(image)
78
 
79
- prompt = florence_caption(image)
80
- print(prompt)
 
 
 
 
 
81
  else:
82
  prompt = text_prompt
83
 
@@ -101,6 +110,24 @@ def process_workflow(image, text_prompt, use_enhancer, seed, randomize_seed, wid
101
 
102
  return image, prompt, seed
103
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
104
  custom_css = """
105
  .input-group, .output-group {
106
  border: 1px solid #e0e0e0;
@@ -139,6 +166,25 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="blue", secondar
139
  text_prompt = gr.Textbox(label="Text Prompt (optional, used if no image is uploaded)")
140
  negative_prompt = gr.Textbox(label="Negative Prompt")
141
  use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
143
  randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
144
  width = gr.Slider(label="Width", minimum=512, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
@@ -154,13 +200,26 @@ with gr.Blocks(css=custom_css, theme=gr.themes.Soft(primary_hue="blue", secondar
154
  final_prompt = gr.Textbox(label="Final Prompt Used")
155
  used_seed = gr.Number(label="Seed Used")
156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  generate_btn.click(
158
  fn=process_workflow,
159
  inputs=[
160
- input_image, text_prompt, use_enhancer, seed, randomize_seed,
161
- width, height, guidance_scale, num_inference_steps, negative_prompt
162
  ],
163
  outputs=[output_image, final_prompt, used_seed]
164
  )
165
 
166
- demo.launch(debug=True)
 
9
  import os
10
  import subprocess
11
  from huggingface_hub import hf_hub_download
12
+ from llm_inference import LLMInferenceNode
13
 
14
  # Install flash-attn
15
  subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 
40
  token = huggingface_token
41
  )
42
 
43
+ # Initialize LLMInferenceNode
44
+ llm_node = LLMInferenceNode()
45
+
46
  # Florence caption function
47
  @spaces.GPU
48
  def florence_caption(image):
 
74
  return enhanced_text
75
 
76
  @spaces.GPU(duration=60)
77
+ def process_workflow(image, text_prompt, use_enhancer, use_llm_generator, llm_provider, llm_model, prompt_type, seed, randomize_seed, width, height, guidance_scale, num_inference_steps, negative_prompt="", progress=gr.Progress(track_tqdm=True)):
78
  if image is not None:
79
  # Convert image to PIL if it's not already
80
  if not isinstance(image, Image.Image):
81
  image = Image.fromarray(image)
82
 
83
+ caption = florence_caption(image)
84
+ print(f"Florence caption: {caption}")
85
+
86
+ if use_llm_generator:
87
+ prompt = generate_llm_prompt(caption, llm_provider, llm_model, prompt_type)
88
+ else:
89
+ prompt = caption
90
  else:
91
  prompt = text_prompt
92
 
 
110
 
111
  return image, prompt, seed
112
 
113
+ def generate_llm_prompt(input_text, provider, model, prompt_type):
114
+ try:
115
+ dynamic_seed = random.randint(0, 1000000)
116
+ result = llm_node.generate(
117
+ input_text=input_text,
118
+ long_talk=True,
119
+ compress=False,
120
+ compression_level="medium",
121
+ poster=False,
122
+ prompt_type=prompt_type,
123
+ provider=provider,
124
+ model=model
125
+ )
126
+ return result
127
+ except Exception as e:
128
+ print(f"An error occurred in generate_llm_prompt: {e}")
129
+ return input_text # Return original input if there's an error
130
+
131
  custom_css = """
132
  .input-group, .output-group {
133
  border: 1px solid #e0e0e0;
 
166
  text_prompt = gr.Textbox(label="Text Prompt (optional, used if no image is uploaded)")
167
  negative_prompt = gr.Textbox(label="Negative Prompt")
168
  use_enhancer = gr.Checkbox(label="Use Prompt Enhancer", value=False)
169
+ use_llm_generator = gr.Checkbox(label="Use LLM Prompt Generator", value=False)
170
+ llm_provider = gr.Dropdown(
171
+ choices=["Hugging Face", "SambaNova"],
172
+ label="LLM Provider",
173
+ value="Hugging Face",
174
+ visible=False
175
+ )
176
+ llm_model = gr.Dropdown(
177
+ label="LLM Model",
178
+ choices=["Qwen/Qwen2.5-72B-Instruct", "meta-llama/Meta-Llama-3.1-70B-Instruct", "mistralai/Mixtral-8x7B-Instruct-v0.1", "mistralai/Mistral-7B-Instruct-v0.3"],
179
+ value="Qwen/Qwen2.5-72B-Instruct",
180
+ visible=False
181
+ )
182
+ prompt_type = gr.Dropdown(
183
+ choices=["Random", "Long", "Short", "Medium", "OnlyObjects", "NoFigure", "Landscape", "Fantasy"],
184
+ label="Prompt Type",
185
+ value="Random",
186
+ visible=False
187
+ )
188
  seed = gr.Slider(label="Seed", minimum=0, maximum=MAX_SEED, step=1, value=0)
189
  randomize_seed = gr.Checkbox(label="Randomize Seed", value=True)
190
  width = gr.Slider(label="Width", minimum=512, maximum=MAX_IMAGE_SIZE, step=32, value=1024)
 
200
  final_prompt = gr.Textbox(label="Final Prompt Used")
201
  used_seed = gr.Number(label="Seed Used")
202
 
203
+ def update_llm_visibility(use_llm):
204
+ return {
205
+ llm_provider: gr.update(visible=use_llm),
206
+ llm_model: gr.update(visible=use_llm),
207
+ prompt_type: gr.update(visible=use_llm)
208
+ }
209
+
210
+ use_llm_generator.change(
211
+ update_llm_visibility,
212
+ inputs=[use_llm_generator],
213
+ outputs=[llm_provider, llm_model, prompt_type]
214
+ )
215
+
216
  generate_btn.click(
217
  fn=process_workflow,
218
  inputs=[
219
+ input_image, text_prompt, use_enhancer, use_llm_generator, llm_provider, llm_model, prompt_type,
220
+ seed, randomize_seed, width, height, guidance_scale, num_inference_steps, negative_prompt
221
  ],
222
  outputs=[output_image, final_prompt, used_seed]
223
  )
224
 
225
+ demo.launch(debug=True)
llm_inference.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import random # Import the random module
3
+ from openai import OpenAI
4
+
5
+
6
+ class LLMInferenceNode:
7
+ def __init__(self):
8
+ self.huggingface_token = os.getenv("HUGGINGFACE_TOKEN")
9
+ self.sambanova_api_key = os.getenv("SAMBANOVA_API_KEY")
10
+
11
+ self.huggingface_client = OpenAI(
12
+ base_url="https://api-inference.huggingface.co/v1/",
13
+ api_key=self.huggingface_token,
14
+ )
15
+ self.sambanova_client = OpenAI(
16
+ api_key=self.sambanova_api_key,
17
+ base_url="https://api.sambanova.ai/v1",
18
+ )
19
+
20
+ def generate_prompt(self, dynamic_seed, prompt_type, custom_input):
21
+ """
22
+ Generates a prompt based on the provided seed, prompt type, and custom input.
23
+ """
24
+ random.seed(dynamic_seed)
25
+ if custom_input and custom_input.strip():
26
+ prompt = custom_input
27
+ else:
28
+ prompt = f"Create a random prompt based on the '{prompt_type}' type."
29
+
30
+ # Additional logic can be added here if needed
31
+ print(f"Generated prompt: {prompt}") # Debug statement
32
+ return prompt
33
+
34
+ def generate(
35
+ self,
36
+ input_text,
37
+ long_talk,
38
+ compress,
39
+ compression_level,
40
+ poster,
41
+ prompt_type,
42
+ custom_base_prompt="",
43
+ provider="Hugging Face",
44
+ api_key=None,
45
+ model=None,
46
+ ):
47
+ try:
48
+ # Define prompts
49
+ default_long_prompt = """Create a detailed visually descriptive caption of this description,
50
+ which will be used as a prompt for a text to image AI system (caption only, no instructions like "create an image").
51
+ Remove any mention of digital artwork or artwork style. Give detailed visual descriptions of the character(s), including ethnicity, skin tone, expression etc.
52
+ Imagine using keywords for a still for someone who has aphantasia. Describe the image style, e.g., any photographic or art styles/techniques utilized.
53
+ Make sure to fully describe all aspects of the cinematography, with abundant technical details and visual descriptions.
54
+ If there is more than one image, combine the elements and characters from all of the images creatively into a single
55
+ cohesive composition with a single background, inventing an interaction between the characters.
56
+ Be creative in combining the characters into a single cohesive scene.
57
+ Focus on two primary characters (or one) and describe an interesting interaction between them, such as a hug, a kiss, a fight, giving an object,
58
+ an emotional reaction/interaction. If there is more than one background in the images, pick the most appropriate one.
59
+ Your output is only the caption itself, no comments or extra formatting.
60
+ The caption is in a single long paragraph.
61
+ If you feel the images are inappropriate, invent a new scene/characters inspired by these.
62
+ Additionally, incorporate a specific movie director's visual style and describe the lighting setup in detail,
63
+ including the type, color, and placement of light sources to create the desired mood and atmosphere.
64
+ Always frame the scene, including details about the film grain, color grading, and any artifacts or characteristics specific."""
65
+
66
+ default_simple_prompt = """Create a brief, straightforward caption for this description, suitable for a text-to-image AI system.
67
+ Focus on the main elements, key characters, and overall scene without elaborate details.
68
+ Provide a clear and concise description in one or two sentences. Your output is only the caption itself, no comments or extra formatting.
69
+ The caption is in a single long paragraph."""
70
+
71
+ poster_prompt = """Analyze the provided description and extract key information to create a movie poster style description. Format the output as follows:
72
+ Title: A catchy, intriguing title that captures the essence of the scene, place the title in "".
73
+ Main character: Give a description of the main character.
74
+ Background: Describe the background in detail.
75
+ Supporting characters: Describe the supporting characters.
76
+ Branding type: Describe the branding type.
77
+ Tagline: Include a tagline that captures the essence of the movie.
78
+ Visual style: Ensure that the visual style fits the branding type and tagline.
79
+ You are allowed to make up film and branding names, and do them like 80's, 90's or modern movie posters.
80
+ Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""
81
+
82
+ only_objects_prompt = """Create a highly detailed and visually rich description focusing solely on inanimate objects,
83
+ without including any human or animal figures. Describe the objects' shapes, sizes, colors, textures, and materials in great detail.
84
+ Pay attention to their arrangement, positioning, and how they interact with light and shadow. Include information about the setting
85
+ or environment these objects are in, such as indoor/outdoor, time of day, weather conditions, and any atmospheric effects.
86
+ Mention any unique features, patterns, or imperfections on the objects. Describe the overall composition, perspective, and
87
+ any artistic techniques that might be employed to render these objects (e.g., photorealism, impressionistic style, etc.).
88
+ Your description should paint a vivid picture that allows someone to imagine the scene without seeing it, focusing on the beauty,
89
+ complexity, or significance of everyday objects. Your output is only the caption itself, no comments or extra formatting.
90
+ The caption is in a single long paragraph."""
91
+
92
+ no_figure_prompt = """Generate a comprehensive and visually evocative description of a scene
93
+ or landscape without including any human or animal figures. Focus on the environment, natural elements, and man-made structures if present.
94
+ Describe the topography, vegetation, weather conditions, and time of day in great detail.
95
+ Pay attention to colors, textures, and how light interacts with different elements of the scene.
96
+ If there are buildings or other structures, describe their architecture, condition, and how they fit into the landscape.
97
+ Include sensory details beyond just visual elements - mention sounds, smells, and the overall atmosphere or mood of the scene.
98
+ Describe any notable features like bodies of water, geological formations, or sky phenomena.
99
+ Consider the perspective from which the scene is viewed and how this affects the composition.
100
+ Your description should transport the reader to this location, allowing them to vividly imagine the scene without any living subjects present.
101
+ Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""
102
+
103
+ landscape_prompt = """Create an immersive and detailed description of a landscape,
104
+ focusing on its natural beauty and geographical features.
105
+ Begin with the overall topography - is it mountainous, coastal, forested, desert, or a combination?
106
+ Describe the horizon and how land meets sky. Detail the vegetation, noting types of trees, flowers, or grass,
107
+ and how they're distributed across the landscape. Include information about any water features -
108
+ rivers, lakes, oceans - and how they interact with the land. Describe the sky, including cloud formations,
109
+ color gradients, and any celestial bodies visible.
110
+ Pay attention to the quality of light, time of day, and season, explaining how these factors affect the colors and shadows in the scene.
111
+ Include details about weather conditions and how they impact the landscape.
112
+ Mention any geological features like rock formations, cliffs, or unique land patterns.
113
+ If there are any distant man-made elements, describe how they integrate with the natural setting.
114
+ Your description should capture the grandeur and mood of the landscape,
115
+ allowing the reader to feel as if they're standing within this awe-inspiring natural scene.
116
+ Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""
117
+
118
+ fantasy_prompt = """Craft an extraordinarily detailed and imaginative description of a fantasy scene,
119
+ blending elements of magic, otherworldly creatures, and fantastical environments. Begin by setting the overall tone -
120
+ is this a dark and foreboding realm, a whimsical fairytale setting, or an epic high-fantasy world?
121
+ Describe the landscape, including any impossible or magical geographical features like floating islands,
122
+ crystal forests, or rivers of starlight. Detail the flora and fauna,
123
+ focusing on fantastical plants and creatures that don't exist in our world.
124
+ Include descriptions of any structures or ruins, emphasizing their otherworldly architecture and magical properties.
125
+ Describe the sky and any celestial bodies, considering how they might differ from our reality.
126
+ Include details about the presence of magic - how it manifests visually,
127
+ its effects on the environment, and any magical phenomena occurring in the scene.
128
+ If there are characters present, describe their appearance, focusing on non-human features, magical auras, or
129
+ fantastical clothing and accessories. Pay attention to colors, textures, and light sources,
130
+ especially those that couldn't exist in the real world. Your description should transport the
131
+ reader to a realm of pure imagination, where the laws of physics and nature as we know them don't apply.
132
+ Your output is only the caption itself, no comments or extra formatting. The caption is in a single long paragraph."""
133
+
134
+ prompt_types = {
135
+ "Long": default_long_prompt,
136
+ "Short": default_simple_prompt,
137
+ "Medium": poster_prompt,
138
+ "OnlyObjects": only_objects_prompt,
139
+ "NoFigure": no_figure_prompt,
140
+ "Landscape": landscape_prompt,
141
+ "Fantasy": fantasy_prompt,
142
+ }
143
+
144
+ # Determine the base prompt
145
+ print(f"Received prompt_type: '{prompt_type}'") # Debug print
146
+ if prompt_type == "Random":
147
+ prompt_type = random.choice(list(prompt_types.keys()))
148
+ print(f"Randomly selected prompt type: {prompt_type}")
149
+
150
+ if prompt_type and prompt_type.strip() and prompt_type in prompt_types:
151
+ base_prompt = prompt_types[prompt_type]
152
+ print(f"Using {prompt_type} prompt")
153
+ elif custom_base_prompt.strip():
154
+ base_prompt = custom_base_prompt
155
+ print("Using custom base prompt")
156
+ else:
157
+ base_prompt = default_long_prompt
158
+ print(f"Warning: Unknown or empty prompt type '{prompt_type}'. Using default long prompt.")
159
+
160
+ # Handle compression if applicable
161
+ if compress and not poster:
162
+ compression_chars = {
163
+ "soft": 600 if long_talk else 300,
164
+ "medium": 400 if long_talk else 200,
165
+ "hard": 200 if long_talk else 100,
166
+ }
167
+ char_limit = compression_chars.get(compression_level, 200)
168
+ base_prompt += f" Compress the output to be concise while retaining key visual details. MAX OUTPUT SIZE no more than {char_limit} characters."
169
+
170
+ # Construct messages for the LLM
171
+ system_message = "You are a helpful assistant. Try your best to give the best response possible to the user."
172
+
173
+ if input_text.startswith("Create a random prompt based on"):
174
+ user_message = f"Create a random description based on this\nInstructions: {base_prompt}"
175
+ else:
176
+ user_message = f"{base_prompt}\nDescription: {input_text}"
177
+
178
+ # Generate a random seed
179
+ seed = random.randint(0, 10000)
180
+ print(f"Generated seed: {seed}") # Debug print
181
+
182
+ # Select the appropriate provider
183
+ if provider == "Hugging Face":
184
+ response = self.huggingface_client.chat.completions.create(
185
+ model=model or "meta-llama/Meta-Llama-3.1-70B-Instruct",
186
+ max_tokens=1024,
187
+ temperature=1.0,
188
+ top_p=0.95,
189
+ messages=[
190
+ {"role": "system", "content": system_message},
191
+ {"role": "user", "content": user_message},
192
+ ],
193
+ seed=seed # Pass the seed parameter
194
+ )
195
+ output = response.choices[0].message.content.strip()
196
+
197
+ elif provider == "SambaNova":
198
+ response = self.sambanova_client.chat.completions.create(
199
+ model=model or "Meta-Llama-3.1-70B-Instruct",
200
+ max_tokens=1024,
201
+ temperature=1.0,
202
+ messages=[
203
+ {"role": "system", "content": system_message},
204
+ {"role": "user", "content": user_message},
205
+ ],
206
+ seed=seed # Pass the seed parameter
207
+ )
208
+ output = response.choices[0].message.content.strip()
209
+
210
+ else:
211
+ raise ValueError(f"Unsupported provider: {provider}")
212
+
213
+ # Clean up the output if necessary
214
+ if ": " in output:
215
+ output = output.split(": ", 1)[1].strip()
216
+ elif output.lower().startswith("here"):
217
+ sentences = output.split(". ")
218
+ if len(sentences) > 1:
219
+ output = ". ".join(sentences[1:]).strip()
220
+
221
+ return output
222
+
223
+ except Exception as e:
224
+ print(f"An error occurred: {e}")
225
+ return f"Error occurred while processing the request: {str(e)}"