from vllm import LLM, SamplingParams import gradio as gr repo_id = "mistral-community/pixtral-12b-240910" #Replace to the model you would like to use sampling_params = SamplingParams(temperature=0.8, top_p=0.95) # @spaces.GPU #[uncomment to use ZeroGPU] def infer(image_url, prompt, progress=gr.Progress(track_tqdm=True)): # tokenize image urls and text llm = LLM(model="mistralai/Pixtral-12B-2409") # Name or path of your model messages = [ { "role": "user", "content": [{"type": "text", "text": prompt}, {"type": "image_url", "image_url": {"url": image_url}}] }, ] outputs = llm.chat(messages, sampling_params=sampling_params) print(outputs[0].outputs[0].text) return outputs example_images = ["https://picsum.photos/id/237/200/300"] example_prompts = ["What do you see in this image?"] css = """ #col-container { margin: 0 auto; max-width: 640px; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.Markdown(f""" # Text-to-Image Gradio Template """) with gr.Row(): prompt = gr.Text( label="Prompt", show_label=False, max_lines=2, placeholder="Enter your prompt", container=False, ) image_url = gr.Text( label="Image URL", show_label=False, max_lines=1, placeholder="Enter your image URL", container=False, ) run_button = gr.Button("Run", scale=0) result = gr.Textbox( show_label=False ) gr.Examples( examples=example_images, inputs=[image_url] ) gr.Examples( examples=example_prompts, inputs=[prompt] ) gr.on( triggers=[run_button.click, image_url.submit, prompt.submit], fn=infer, inputs=[image_url, prompt], outputs=[result] ) demo.queue().launch()