Molmo-7B-D-0924 / app.py
ashisdeveloper's picture
Create app.py
9ff61cd verified
raw
history blame contribute delete
No virus
1.62 kB
from transformers import AutoModelForCausalLM, AutoProcessor, GenerationConfig, AutoTokenizer, Qwen2TokenizerFast
from PIL import Image
import torch
import requests
from accelerate import init_empty_weights
USE_GPU = True
device = torch.device("cuda" if USE_GPU and torch.cuda.is_available() else "cpu")
processor = AutoProcessor.from_pretrained(
'allenai/MolmoE-1B-0924',
trust_remote_code=True,
torch_dtype='auto',
device_map='auto' if USE_GPU else None,
cache_dir="./models/molmo1"
)
with init_empty_weights():
model = AutoModelForCausalLM.from_pretrained(
'allenai/MolmoE-1B-0924',
trust_remote_code=True,
torch_dtype='auto',
device_map='auto' if USE_GPU else None,
cache_dir="./models/molmo1",
attn_implementation="eager"
)
if not USE_GPU:
model.to(device)
model.tie_weights()
image_path = "./public/image.jpg" # Replace with your image file path
image = Image.open(image_path)
image = image.convert("RGB")
inputs = processor.process(
images=[image],
text="Extract text"
)
inputs = {k: v.to(model.device).unsqueeze(0) for k, v in inputs.items()}
print('STARTED')
output = model.generate_from_batch(
inputs,
GenerationConfig(
max_new_tokens=2000,
# temperature=0.1,
# top_p=top_p,
stop_strings="<|endoftext|>"
),
tokenizer=processor.tokenizer
)
# Only get generated tokens; decode them to text
generated_tokens = output[0, inputs['input_ids'].size(1):]
generated_text = processor.tokenizer.decode(generated_tokens, skip_special_tokens=True)
print(generated_text)