model: transformer_config: # checkpoint_path: ../../../checkpoints/kny_video_full_gpt2_xl/checkpoint remaining_frames_method: "own_embeddings" transformer_type: "gpt2-xl" first_stage_config: checkpoint_path: ../../../checkpoints/kny_image_full_vgg19/checkpoint vqvae_config: beta: 0.25 num_embeddings: 50257 embedding_dim: 128 autoencoder_config: z_channels: 512 channels: 32 channels_multiplier: - 2 - 4 - 8 - 8 num_res_blocks: 1 attention_resolution: - 16 resolution: 128 dropout: 0.0 discriminator_config: num_layers: 3 filters: 64 loss_config: discriminator: loss: "hinge" factor: 1.0 iter_start: 16200 weight: 0.3 vqvae: codebook_weight: 1.0 perceptual_weight: 4.0 perceptual_loss: "vgg19" train: batch_size: 64 accumulation_size: 1 n_epochs: 500 len_x_train: 28213 warmup_epoch_percentage: 0.15 lr_start: 5e-6 lr_max: 1e-4 perceptual_loss_weight: 1.0 n_frames_before: 1 stop_ground_truth_after_epoch: 200