# -*- coding: utf-8 -*-
"""YOLOS minimal inference example.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/github/NielsRogge/Transformers-Tutorials/blob/master/YOLOS/YOLOS_minimal_inference_example.ipynb

## Set-up environment

First, we install the HuggingFace Transformers library (from source for now, as the model was just added to the library and not yet included in a new PyPi release).
"""
import gradio as gr
from gradio.mix import Series
from PIL import Image
import requests
from transformers import AutoFeatureExtractor, YolosForObjectDetection
import torch
import matplotlib.pyplot as plt
import cv2

import os
os.getcwd()

# colors for visualization
COLORS = [[0.000, 0.447, 0.741], [0.850, 0.325, 0.098], [0.929, 0.694, 0.125],
          [0.494, 0.184, 0.556], [0.466, 0.674, 0.188], [0.301, 0.745, 0.933]]

def plot_results(pil_img, prob, boxes, count):
    plt.figure(figsize=(16,10))
    plt.imshow(pil_img)
    ax = plt.gca()
    colors = COLORS * 100
    for p, (xmin, ymin, xmax, ymax), c in zip(prob, boxes.tolist(), colors):
        ax.add_patch(plt.Rectangle((xmin, ymin), xmax - xmin, ymax - ymin,
                                   fill=False, color=c, linewidth=3))
        cl = p.argmax()
        text = f'{model.config.id2label[cl.item()]}: {p[cl]:0.2f}'
        ax.text(xmin, ymin, text, fontsize=15,
                bbox=dict(facecolor='yellow', alpha=0.5))
    plt.axis('off')
    if count < 10:
      plt.savefig('exp2/frame0%d.png' % count)
    else: plt.savefig('exp2/frame%d.png' % count)

model = YolosForObjectDetection.from_pretrained("hustvl/yolos-small")
vidcap = cv2.VideoCapture('/content/2022-08-10_ППП-стоянки_кам-3_191356 (online-video-cutter.com).mp4')
success,image = vidcap.read()
count = 0
#path = '/content/cutted'
feature_extractor = AutoFeatureExtractor.from_pretrained("hustvl/yolos-small")

while success:
  success,image = vidcap.read()
  count += 1

  if count%10 == 0:
    image = Image.fromarray(image)
    pixel_values = feature_extractor(image, return_tensors="pt").pixel_values

    with torch.no_grad():
      outputs = model(pixel_values, output_attentions=True)

    # keep only predictions of queries with 0.9+ confidence (excluding no-object class)
    probas = outputs.logits.softmax(-1)[0, :, :-1]
    keep = probas.max(-1).values > 0.8

    # rescale bounding boxes
    target_sizes = torch.tensor(image.size[::-1]).unsqueeze(0)
    postprocessed_outputs = feature_extractor.post_process(outputs, target_sizes)
    bboxes_scaled = postprocessed_outputs[0]['boxes']
    plot_results(image, probas[keep], bboxes_scaled[keep], count)

    print('Process a new frame: ', success)

"""Set model and directory parameters:

Perform sliced inference on given folder:
"""

image_folder = '/content/exp2'
file_list = os.listdir(image_folder)

#grab last 2 characters of the file name:
def last_2chars(x):
    return(x[5:7])

srtd = sorted(file_list, key = last_2chars)

video_name = 'video.avi'

images = [img for img in srtd if img.endswith(".png")]
frame = cv2.imread(os.path.join(image_folder, images[0]))
height, width, layers = frame.shape

video = cv2.VideoWriter(video_name, 0, 5, (width,height))

for image in images:
    video.write(cv2.imread(os.path.join(image_folder, image)))

cv2.destroyAllWindows()
video.release()