sadjava commited on
Commit
e80e8f4
1 Parent(s): b333832
Files changed (7) hide show
  1. 1.jpg +0 -0
  2. 2.jpg +0 -0
  3. 3.jpg +0 -0
  4. __pycache__/model.cpython-39.pyc +0 -0
  5. app.py +72 -0
  6. model.pt +3 -0
  7. model.py +29 -0
1.jpg ADDED
2.jpg ADDED
3.jpg ADDED
__pycache__/model.cpython-39.pyc ADDED
Binary file (1.12 kB). View file
 
app.py ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: ../app.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['device', 'model', 'MEAN', 'STD', 'transform', 'image', 'label', 'examples', 'intf', 'to_img', 'draw_image_with_bbox',
5
+ 'localize_dog']
6
+
7
+ # %% ../app.ipynb 3
8
+ device = "cuda" if torch.cuda.is_available() else "cpu"
9
+
10
+ model = Model()
11
+ model.load_state_dict(torch.load('model.pt'))
12
+ model = model.to(device)
13
+ model.eval()
14
+
15
+ # %% ../app.ipynb 4
16
+ MEAN = [0.485, 0.456, 0.406]
17
+ STD = [0.229, 0.224, 0.225]
18
+
19
+ # %% ../app.ipynb 5
20
+ transform = transforms.Compose([
21
+ transforms.Resize((224, 224)),
22
+ transforms.ToTensor(),
23
+ transforms.Normalize(MEAN, STD),
24
+ ])
25
+
26
+ # %% ../app.ipynb 6
27
+ def to_img(inp):
28
+ mean = np.array(MEAN)
29
+ std = np.array(STD)
30
+ inp = std * inp + mean
31
+ inp = np.clip(inp, 0, 1)
32
+ return inp * 255
33
+
34
+
35
+ def draw_image_with_bbox(im, shape, pred_bbox=None, pred_obj=1):
36
+ im = im.numpy().transpose((1, 2, 0))
37
+ im = cv2.resize(im, dsize=shape)
38
+
39
+ image_with_bbox = Image.fromarray(to_img(im).astype(np.uint8))
40
+ image_draw = ImageDraw.Draw(image_with_bbox)
41
+
42
+ xc, yc, w, h = pred_bbox
43
+ xmin = (xc - w / 2) * shape[0]
44
+ ymin = (yc - h / 2) * shape[1]
45
+ w = w * shape[0]
46
+ h = h * shape[1]
47
+ xmin, ymin, w, h = map(int, [xmin, ymin, w, h])
48
+ if pred_obj > 0.5:
49
+ image_draw.rectangle((max(xmin, 1), max(ymin, 1), min(xmin+w, shape[0] - 1), min(ymin+h, shape[1] - 1)), outline='red')
50
+
51
+ return image_with_bbox
52
+
53
+ # %% ../app.ipynb 7
54
+ def localize_dog(im):
55
+ shape = im.size[:2]
56
+ im = im.convert('RGB')
57
+ im = transform(im)
58
+ pred_label, pred_bbox = model(im.unsqueeze(0).to(device))
59
+ prediction = draw_image_with_bbox(im, shape, pred_bbox[0], pred_label[0])
60
+ return prediction
61
+
62
+ # %% ../app.ipynb 9
63
+ image = gr.inputs.Image(type="pil")
64
+ label = gr.outputs.Image(type="pil")
65
+ examples = ['1.jpg', '2.jpg', '3.jpg']
66
+
67
+ intf = gr.Interface(fn=localize_dog,
68
+ inputs=image,
69
+ outputs=label,
70
+ title='Dog localization',
71
+ examples=examples)
72
+ intf.launch()
model.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ae751730cd1867ed8266116a6e5ebcb0e0052fb2fed2cd58c2f7e76d05b08f5
3
+ size 46900993
model.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch.nn as nn
2
+ from torchvision.models import resnet18, ResNet18_Weights
3
+ import torch.nn.functional as F
4
+ import torch
5
+
6
+ class Model(nn.Module):
7
+ def __init__(self):
8
+ super().__init__()
9
+ self.feature_extractor = resnet18(weights=ResNet18_Weights)
10
+ in_channels = self.feature_extractor.fc.in_features
11
+ self.feature_extractor.fc = nn.Identity()
12
+ # Output is a vector of dimension 1 + 4
13
+ # 1 for probability of belonging to any class
14
+ # 4 for bounding box of object that is presented (if no object is presented i. e. the probability < a threshold, any 4 numbers)
15
+ self.fc_prob = nn.Sequential(
16
+ nn.Linear(in_channels, 512),
17
+ nn.Linear(512, 1)
18
+ )
19
+ self.fc_bbox = nn.Sequential(
20
+ nn.Linear(in_channels, 512),
21
+ nn.Linear(512, 4)
22
+ )
23
+
24
+ def forward(self, x):
25
+ pred_prob = torch.sigmoid(self.fc_prob(self.feature_extractor(x)))
26
+
27
+ pred_bbox = self.fc_bbox(self.feature_extractor(x))
28
+
29
+ return (pred_prob, pred_bbox)