hgdgng commited on
Commit
d989353
1 Parent(s): 17e6d32

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -29
app.py CHANGED
@@ -1,34 +1,28 @@
1
- import gradio as gr
2
- from transformers import pipeline
 
 
3
 
4
- # Load the large language model (LLM)
5
- try:
6
- # Load model directly
7
- from transformers import AutoProcessor, AutoModelForPreTraining
8
 
9
- processor = AutoProcessor.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct")
10
- model = AutoModelForPreTraining.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct") # You can use a different model here
11
- print("Model loaded successfully!")
12
- except Exception as e:
13
- print(f"Error loading model: {e}")
14
- llm_pipeline = None
15
 
16
- # Define the function to generate text based on input prompt
17
- def generate_text(prompt):
18
- if llm_pipeline is None:
19
- return "Error: Model not loaded."
20
- result = llm_pipeline(prompt, max_length=100, num_return_sequences=1)
21
- return result[0]['generated_text']
22
 
23
- # Create the Gradio interface
24
- interface = gr.Interface(
25
- fn=generate_text,
26
- inputs=gr.Textbox(lines=7, label="Input Prompt"),
27
- outputs="text",
28
- title="Large Language Model Text Generation",
29
- description="Enter a prompt to generate text using a large language model."
30
- )
31
 
32
- print("Launching the Gradio interface...")
33
- # Launch the interface
34
- interface.launch()
 
1
+ import requests
2
+ import torch
3
+ from PIL import Image
4
+ from transformers import MllamaForConditionalGeneration, AutoProcessor
5
 
6
+ model_id = "meta-llama/Llama-3.2-11B-Vision-Instruct"
 
 
 
7
 
8
+ model = MllamaForConditionalGeneration.from_pretrained(
9
+ model_id,
10
+ torch_dtype=torch.bfloat16,
11
+ device_map="auto",
12
+ )
13
+ processor = AutoProcessor.from_pretrained(model_id)
14
 
15
+ url = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg"
16
+ image = Image.open(requests.get(url, stream=True).raw)
 
 
 
 
17
 
18
+ messages = [
19
+ {"role": "user", "content": [
20
+ {"type": "image"},
21
+ {"type": "text", "text": "If I had to write a haiku for this one, it would be: "}
22
+ ]}
23
+ ]
24
+ input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
25
+ inputs = processor(image, input_text, return_tensors="pt").to(model.device)
26
 
27
+ output = model.generate(**inputs, max_new_tokens=30)
28
+ print(processor.decode(output[0]))