import argparse import glob import os import numpy as np from inference_utils import ( YOLO, ModelsPath, Style, center, clean_videos, draw, euclidean_distance, iou, yolo_detections_to_norfair_detections, ) from norfair.norfair import Paths, Tracker, Video from norfair.norfair.camera_motion import ( HomographyTransformationGetter, MotionEstimator, ) DISTANCE_THRESHOLD_BBOX: float = 3.33 DISTANCE_THRESHOLD_CENTROID: int = 30 MAX_DISTANCE: int = 10000 parser = argparse.ArgumentParser(description="Track objects in a video.") parser.add_argument("--img-size", type=int, default="720", help="YOLOv7 inference size (pixels)") parser.add_argument( "--iou-threshold", type=float, default="0.45", help="YOLOv7 IOU threshold for NMS" ) parser.add_argument( "--classes", nargs="+", type=int, help="Filter by class: --classes 0, or --classes 0 2 3" ) args = parser.parse_args() def inference( input_video: str, model: str, motion_estimation: bool, drawing_paths: bool, track_points: str, model_threshold: str, ): clean_videos("tmp") coord_transformations = None paths_drawer = None track_points = Style[track_points].value model = YOLO(ModelsPath[model].value, device="cuda") video = Video(input_path=input_video, output_path="tmp") if motion_estimation: transformations_getter = HomographyTransformationGetter() motion_estimator = MotionEstimator( max_points=500, min_distance=7, transformations_getter=transformations_getter, draw_flow=True, ) distance_function = iou if track_points == "bbox" else euclidean_distance distance_threshold = ( DISTANCE_THRESHOLD_BBOX if track_points == "bbox" else DISTANCE_THRESHOLD_CENTROID ) tracker = Tracker( distance_function=distance_function, distance_threshold=distance_threshold, ) if drawing_paths: paths_drawer = Paths(center, attenuation=0.01) for frame in video: yolo_detections = model( frame, conf_threshold=model_threshold, iou_threshold=args.iou_threshold, image_size=720, classes=args.classes, ) mask = np.ones(frame.shape[:2], frame.dtype) if motion_estimation: coord_transformations = motion_estimator.update(frame, mask) detections = yolo_detections_to_norfair_detections( yolo_detections, track_points=track_points ) tracked_objects = tracker.update( detections=detections, coord_transformations=coord_transformations ) frame = draw(paths_drawer, track_points, frame, detections, tracked_objects) video.write(frame) return f"{input_video[1:-4]}_out.mp4"