# Copyright (c) Meta Platforms, Inc. and affiliates. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. import torch as th import torch.nn as nn from torchvision.models import vgg19 import torch.nn.functional as F import logging logger = logging.getLogger(__name__) class Vgg19(nn.Module): def __init__(self, requires_grad=False): super(Vgg19, self).__init__() vgg19_network = vgg19(pretrained=True) # vgg19_network.load_state_dict(state_dict) vgg_pretrained_features = vgg19_network.features self.slice1 = nn.Sequential() self.slice2 = nn.Sequential() self.slice3 = nn.Sequential() self.slice4 = nn.Sequential() self.slice5 = nn.Sequential() for x in range(2): self.slice1.add_module(str(x), vgg_pretrained_features[x]) for x in range(2, 7): self.slice2.add_module(str(x), vgg_pretrained_features[x]) for x in range(7, 12): self.slice3.add_module(str(x), vgg_pretrained_features[x]) for x in range(12, 21): self.slice4.add_module(str(x), vgg_pretrained_features[x]) for x in range(21, 30): self.slice5.add_module(str(x), vgg_pretrained_features[x]) if not requires_grad: for param in self.parameters(): param.requires_grad = False def forward(self, X): h_relu1 = self.slice1(X) h_relu2 = self.slice2(h_relu1) h_relu3 = self.slice3(h_relu2) h_relu4 = self.slice4(h_relu3) h_relu5 = self.slice5(h_relu4) out = [h_relu1, h_relu2, h_relu3, h_relu4, h_relu5] return out class VGGLossMasked(nn.Module): def __init__(self, weights=None): super().__init__() self.vgg = Vgg19() if weights is None: # self.weights = [1.0 / 32, 1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0] self.weights = [20.0, 5.0, 0.9, 0.5, 0.5] else: self.weights = weights def normalize(self, batch): mean = batch.new_tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1) std = batch.new_tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1) return ((batch / 255.0).clamp(0.0, 1.0) - mean) / std def forward(self, x_rgb, y_rgb, mask): x_norm = self.normalize(x_rgb) y_norm = self.normalize(y_rgb) x_vgg = self.vgg(x_norm) y_vgg = self.vgg(y_norm) loss = 0 for i in range(len(x_vgg)): if isinstance(mask, th.Tensor): m = F.interpolate( mask, size=(x_vgg[i].shape[-2], x_vgg[i].shape[-1]), mode="bilinear" ).detach() else: m = mask vx = x_vgg[i] * m vy = y_vgg[i] * m loss += self.weights[i] * (vx - vy).abs().mean() # logger.info( # f"loss for {i}, {loss.item()} vx={vx.shape} vy={vy.shape} {vx.max()} {vy.max()}" # ) return loss