# -*- coding: utf-8 -*- """YOLOS minimal inference example.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/YOLOS/YOLOS_minimal_inference_example.ipynb ## Set-up environment First, we install the HuggingFace Transformers library (from source for now, as the model was just added to the library and not yet included in a new PyPi release). """ import gradio as gr from gradio.mix import Series from PIL import Image import requests from transformers import AutoFeatureExtractor, YolosForObjectDetection import torch import matplotlib.pyplot as plt import cv2 import os os.getcwd() # colors for visualization COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125], [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]] def plot_results(pil_img, prob, boxes, count): plt.figure(figsize=(16,10)) plt.imshow(pil_img) ax = plt.gca() colors = COLORS * 100 for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors): ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin, fill=False, color=c, linewidth=3)) cl = p.argmax() text = f'{model.config.id2label[cl.item()]}: {p[cl]:0.2f}' ax.text(xmin, ymin, text, fontsize=15, bbox=dict(facecolor='yellow', alpha=0.5)) plt.axis('off') if count < 10: plt.savefig('exp2/frame0%d.png' % count) else: plt.savefig('exp2/frame%d.png' % count) model = YolosForObjectDetection.from_pretrained("hustvl/yolos-small") vidcap = cv2.VideoCapture('/content/2022-08-10_ППП-стоянки_кам-3_191356 (online-video-cutter.com).mp4') success,image = vidcap.read() count = 0 #path = '/content/cutted' feature_extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-small") while success: success,image = vidcap.read() count += 1 if count%10 == 0: image = Image.fromarray(image) pixel_values = feature_extractor(image, return_tensors="pt").pixel_values with torch.no_grad(): outputs = model(pixel_values, output_attentions=True) # keep only predictions of queries with 0.9+ confidence (excluding no-object class) probas = outputs.logits.softmax(-1)[0, :, :-1] keep = probas.max(-1).values > 0.8 # rescale bounding boxes target_sizes = torch.tensor(image.size[::-1]).unsqueeze(0) postprocessed_outputs = feature_extractor.post_process(outputs, target_sizes) bboxes_scaled = postprocessed_outputs[0]['boxes'] plot_results(image, probas[keep], bboxes_scaled[keep], count) print('Process a new frame: ', success) """Set model and directory parameters: Perform sliced inference on given folder: """ image_folder = '/content/exp2' file_list = os.listdir(image_folder) #grab last 2 characters of the file name: def last_2chars(x): return(x[5:7]) srtd = sorted(file_list, key = last_2chars) video_name = 'video.avi' images = [img for img in srtd if img.endswith(".png")] frame = cv2.imread(os.path.join(image_folder, images[0])) height, width, layers = frame.shape video = cv2.VideoWriter(video_name, 0, 5, (width,height)) for image in images: video.write(cv2.imread(os.path.join(image_folder, image))) cv2.destroyAllWindows() video.release()