wjbmattingly commited on
Commit
e1ce3ad
1 Parent(s): 02dda57

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +68 -0
README.md ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ datasets:
3
+ - scta/scta-htr-training-data
4
+ base_model:
5
+ - Qwen/Qwen2-VL-2B-Instruct
6
+ ---
7
+ import torch
8
+ from transformers import Qwen2VLForConditionalGeneration, AutoTokenizer, AutoProcessor
9
+ from qwen_vl_utils import process_vision_info
10
+
11
+
12
+ device = "cuda" if torch.cuda.is_available() else "cpu"
13
+
14
+ model_dir = "medieval-data/qwen2-vl-2b-scta"
15
+
16
+
17
+
18
+ model = Qwen2VLForConditionalGeneration.from_pretrained(
19
+ model_dir, torch_dtype="auto", device_map="auto"
20
+ )
21
+
22
+ processor = AutoProcessor.from_pretrained("Qwen/Qwen2-VL-2B-Instruct")
23
+ image_url ="""https://loris2.scta.info/lon/L28v.jpg/full/full/0/default.jpg"""
24
+ messages = [
25
+ {
26
+ "role": "user",
27
+ "content": [
28
+ {
29
+ "type": "image",
30
+ "image": image_url,
31
+ },
32
+ {"type": "text", "text": "Convert this image to text."},
33
+ ],
34
+ }
35
+ ]
36
+
37
+ # Preparation for inference
38
+ text = processor.apply_chat_template(
39
+ messages, tokenize=False, add_generation_prompt=True
40
+ )
41
+ image_inputs, video_inputs = process_vision_info(messages)
42
+ inputs = processor(
43
+ text=[text],
44
+ images=image_inputs,
45
+ videos=video_inputs,
46
+ padding=True,
47
+ return_tensors="pt",
48
+ )
49
+ inputs = inputs.to(device)
50
+
51
+ # Inference: Generation of the output
52
+ generated_ids = model.generate(**inputs, max_new_tokens=4000)
53
+ generated_ids_trimmed = [
54
+ out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
55
+ ]
56
+ output_text = processor.batch_decode(
57
+ generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
58
+ )
59
+
60
+ print(output_text)
61
+ # Import required libraries if not already imported
62
+ from IPython.display import display, Image
63
+
64
+ # Display the output text
65
+ print(output_text)
66
+
67
+ # Display the image
68
+ display(Image(url=image_url))