File size: 2,837 Bytes
fca2efd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import argparse
import glob
import os

import numpy as np

from inference_utils import (
    YOLO,
    ModelsPath,
    Style,
    center,
    clean_videos,
    draw,
    euclidean_distance,
    iou,
    yolo_detections_to_norfair_detections,
)
from norfair.norfair import Paths, Tracker, Video
from norfair.norfair.camera_motion import (
    HomographyTransformationGetter,
    MotionEstimator,
)

DISTANCE_THRESHOLD_BBOX: float = 3.33
DISTANCE_THRESHOLD_CENTROID: int = 30
MAX_DISTANCE: int = 10000

parser = argparse.ArgumentParser(description="Track objects in a video.")
parser.add_argument("--img-size", type=int, default="720", help="YOLOv7 inference size (pixels)")
parser.add_argument(
    "--iou-threshold", type=float, default="0.45", help="YOLOv7 IOU threshold for NMS"
)
parser.add_argument(
    "--classes", nargs="+", type=int, help="Filter by class: --classes 0, or --classes 0 2 3"
)
args = parser.parse_args()


def inference(
    input_video: str,
    model: str,
    motion_estimation: bool,
    drawing_paths: bool,
    track_points: str,
    model_threshold: str,
):
    clean_videos("tmp")

    coord_transformations = None
    paths_drawer = None
    track_points = Style[track_points].value
    model = YOLO(ModelsPath[model].value, device="cuda")
    video = Video(input_path=input_video, output_path="tmp")

    if motion_estimation:
        transformations_getter = HomographyTransformationGetter()

        motion_estimator = MotionEstimator(
            max_points=500,
            min_distance=7,
            transformations_getter=transformations_getter,
            draw_flow=True,
        )

    distance_function = iou if track_points == "bbox" else euclidean_distance
    distance_threshold = (
        DISTANCE_THRESHOLD_BBOX if track_points == "bbox" else DISTANCE_THRESHOLD_CENTROID
    )
    tracker = Tracker(
        distance_function=distance_function,
        distance_threshold=distance_threshold,
    )

    if drawing_paths:
        paths_drawer = Paths(center, attenuation=0.01)

    for frame in video:
        yolo_detections = model(
            frame,
            conf_threshold=model_threshold,
            iou_threshold=args.iou_threshold,
            image_size=720,
            classes=args.classes,
        )

        mask = np.ones(frame.shape[:2], frame.dtype)

        if motion_estimation:
            coord_transformations = motion_estimator.update(frame, mask)

        detections = yolo_detections_to_norfair_detections(
            yolo_detections, track_points=track_points
        )

        tracked_objects = tracker.update(
            detections=detections, coord_transformations=coord_transformations
        )

        frame = draw(paths_drawer, track_points, frame, detections, tracked_objects)
        video.write(frame)

    return f"{input_video[1:-4]}_out.mp4"