{ "image_size_encoder": 256, "triplane_scaling_divider": 0.96806, "diffusion_input_size": 32, "trainer_name": "flow_matching", "use_amp": false, "clip_denoised": false, "num_samples": 1, "num_instances": 10, "use_ddim": false, "ddpm_model_path": "", "cldm_model_path": "", "rec_model_path": "", "logdir": "./", "data_dir": "NONE", "eval_data_dir": "/cpfs01/user/lanyushi.p/Repo/eccv24/open-source/InstantMesh/test_dir", "eval_batch_size": 1, "num_workers": 0, "overfitting": false, "image_size": 256, "iterations": 5000001, "schedule_sampler": "uniform", "anneal_lr": false, "lr": 2e-05, "weight_decay": 0.05, "lr_anneal_steps": 0, "batch_size": 1, "microbatch": 1, "ema_rate": "0.9999", "log_interval": 50, "eval_interval": 5000, "save_interval": 10000, "resume_checkpoint": "checkpoints/objaverse/objaverse-dit/i23d/model_joint_denoise_rec_model2990000.safetensors", "resume_cldm_checkpoint": "", "resume_checkpoint_EG3D": "", "use_fp16": false, "fp16_scale_growth": 0.001, "load_submodule_name": "", "ignore_resume_opt": false, "freeze_ae": false, "denoised_ae": true, "prompt": "a red chair", "interval": 5, "save_img": false, "use_train_trajectory": false, "unconditional_guidance_scale": 6.5, "use_eos_feature": false, "export_mesh": false, "cond_key": "img", "allow_tf32": true, "num_channels": 320, "num_res_blocks": 2, "num_heads": 8, "num_heads_upsample": -1, "num_head_channels": -1, "attention_resolutions": "4,2,1", "channel_mult": "", "dropout": 0.0, "class_cond": false, "use_checkpoint": false, "use_scale_shift_norm": true, "resblock_updown": false, "use_new_attention_order": false, "denoise_in_channels": 4, "denoise_out_channels": 4, "create_controlnet": false, "create_dit": true, "i23d": true, "create_unet_with_hint": false, "dit_model_arch": "DiT-PixArt-L/2", "use_spatial_transformer": true, "transformer_depth": 1, "context_dim": 1024, "pooling_ctx_dim": 768, "roll_out": true, "n_embed": null, "legacy": true, "mixing_logit_init": -6, "hint_channels": 3, "learn_sigma": false, "diffusion_steps": 1000, "noise_schedule": "linear", "standarization_xt": false, "timestep_respacing": "", "use_kl": false, "predict_xstart": false, "predict_v": true, "rescale_timesteps": false, "rescale_learned_sigmas": false, "mixed_prediction": false, "dino_version": "mv-sd-dit-dynaInp-trilatent", "encoder_in_channels": 10, "img_size": [ 256 ], "patch_size": 14, "in_chans": 384, "num_classes": 0, "embed_dim": 384, "depth": 6, "mlp_ratio": 4.0, "qkv_bias": false, "qk_scale": null, "drop_rate": 0.1, "attn_drop_rate": 0.0, "drop_path_rate": 0.0, "norm_layer": "nn.LayerNorm", "cls_token": false, "encoder_cls_token": false, "decoder_cls_token": false, "sr_kwargs": {}, "sr_ratio": 2, "use_clip": false, "arch_encoder": "vits", "arch_decoder": "vitb", "load_pretrain_encoder": false, "encoder_lr": 1e-05, "encoder_weight_decay": 0.001, "no_dim_up_mlp": true, "dim_up_mlp_as_func": false, "decoder_load_pretrained": false, "uvit_skip_encoder": true, "vae_p": 2, "ldm_z_channels": 4, "ldm_embed_dim": 4, "use_conf_map": false, "sd_E_ch": 64, "z_channels": 12, "sd_E_num_res_blocks": 1, "num_frames": 6, "arch_dit_decoder": "DiT2-L/2", "return_all_dit_layers": false, "lrm_decoder": false, "plane_n": 3, "gs_rendering": false, "decomposed": true, "triplane_fg_bg": false, "cfg": "objverse_tuneray_aug_resolution_64_64_auto", "density_reg": 0.0, "density_reg_p_dist": 0.004, "reg_type": "l1", "triplane_decoder_lr": 5e-05, "super_resolution_lr": 5e-05, "c_scale": 1, "nsr_lr": 0.02, "triplane_size": 224, "decoder_in_chans": 32, "triplane_in_chans": 32, "decoder_output_dim": 3, "out_chans": 96, "c_dim": 25, "ray_start": 0.6, "ray_end": 1.8, "rendering_kwargs": { "image_resolution": 256, "disparity_space_sampling": false, "clamp_mode": "softplus", "c_gen_conditioning_zero": true, "c_scale": 1, "superresolution_noise_mode": "none", "density_reg": 0.0, "density_reg_p_dist": 0.004, "reg_type": "l1", "decoder_lr_mul": 1, "decoder_activation": "sigmoid", "sr_antialias": true, "return_triplane_features": false, "return_sampling_details_flag": true, "superresolution_module": "utils.torch_utils.components.NearestConvSR", "depth_resolution": 64, "depth_resolution_importance": 64, "ray_start": "auto", "ray_end": "auto", "box_warp": 0.9, "white_back": true, "radius_range": [ 1.5, 2 ], "sampler_bbox_min": -0.45, "sampler_bbox_max": 0.45, "filter_out_of_bbox": true, "PatchRaySampler": true, "patch_rendering_resolution": 45, "z_near": 1.05, "z_far": 2.45 }, "sr_training": false, "bcg_synthesis": false, "bcg_synthesis_kwargs": {}, "patch_rendering_resolution": 45, "vit_decoder_lr": 1e-05, "vit_decoder_wd": 0.001, "ae_classname": "vit.vit_triplane.RodinSR_256_fusionv6_ConvQuant_liteSR_dinoInit3DAttn_SD_B_3L_C_withrollout_withSD_D_ditDecoder", "color_criterion": "mse", "l2_lambda": 1.0, "lpips_lambda": 0.8, "lpips_delay_iter": 0, "sr_delay_iter": 0, "kl_anneal": false, "latent_lambda": 0.0, "latent_criterion": "mse", "kl_lambda": 0.0, "ssim_lambda": 0.0, "l1_lambda": 0.0, "id_lambda": 0.0, "depth_lambda": 0.0, "alpha_lambda": 1.0, "fg_mse": false, "bg_lamdba": 0.01, "density_reg_every": 4, "shape_uniform_lambda": 0.005, "shape_importance_lambda": 0.01, "shape_depth_lambda": 0.0, "rec_cvD_lambda": 0.01, "nvs_cvD_lambda": 0.025, "patchgan_disc_factor": 0.01, "patchgan_disc_g_weight": 0.2, "r1_gamma": 1.0, "sds_lamdba": 1.0, "nvs_D_lr_mul": 1, "cano_D_lr_mul": 1, "ce_balanced_kl": 1.0, "p_eps_lambda": 1, "symmetry_loss": false, "depth_smoothness_lambda": 0.0, "ce_lambda": 0.5, "negative_entropy_lambda": 0.5, "grad_clip": true, "online_mask": false, "sde_time_eps": 0.01, "sde_beta_start": 0.1, "sde_beta_end": 20.0, "sde_sde_type": "vpsde", "sde_sigma2_0": 0.0, "iw_sample_p": "drop_sigma2t_iw", "iw_sample_q": "ll_iw", "iw_subvp_like_vp_sde": false, "train_vae": false, "pred_type": "v", "p_rendering_loss": false, "unfix_logit": false, "loss_type": "eps", "loss_weight": "simple", "diffusion_ce_anneal": true, "enable_mixing_normal": false, "only_mid_control": false, "control_key": "img", "normalize_clip_encoding": true, "scale_clip_encoding": 1.0, "cfg_dropout_prob": 0.1, "use_lmdb": false, "use_wds": false, "use_lmdb_compressed": false, "compile": false, "objv_dataset": true, "decode_encode_img_only": false, "load_wds_diff": true, "load_wds_latent": false, "eval_load_wds_instance": true, "shards_lst": "", "eval_shards_lst": "", "mv_input": true, "duplicate_sample": true, "orthog_duplicate": false, "split_chunk_input": false, "load_real": true, "four_view_for_latent": false, "single_view_for_i23d": false, "shuffle_across_cls": true, "load_extra_36_view": false, "mv_latent_dir": "", "append_depth": false, "plucker_embedding": true, "gs_cam_format": false, "split_chunk_size": 8, "path_type": "Linear", "prediction": "velocity", "sample_eps": null, "train_eps": null, "snr_type": "lognorm", "local_rank": 0, "gpus": 1 }