multimodalart/dreambooth-training · Adding base inpainting model...erorr.. help

Hi there,

First off thank you for this space! I've used it several times and its been great!

Recently I was trying to use a custom model for inpainting and found out the hard way that in order to do that the base model actually has to be inpainting too. I guess it has to do how weights are calculated for inpainting?

Anyways, I modified app.py and added runwayml's inpainting, https://huggingface.co/runwayml/stable-diffusion-inpainting/tree/main, model like so

if(is_gpu_associated):
    model_v1 = snapshot_download(repo_id="multimodalart/sd-fine-tunable")
    model_v1_inpainting = snapshot_download(repo_id="runwayml/stable-diffusion-inpainting", ignore_patterns=["*.ckpt"])
    model_v2 = snapshot_download(repo_id="stabilityai/stable-diffusion-2-1", ignore_patterns=["*.ckpt", "*.safetensors"])
    model_v2_512 = snapshot_download(repo_id="stabilityai/stable-diffusion-2-1-base", ignore_patterns=["*.ckpt", "*.safetensors"])
    safety_checker = snapshot_download(repo_id="multimodalart/sd-sc")
    model_to_load = model_v1

def swap_base_model(selected_model):
    if(is_gpu_associated):
        global model_to_load
        if(selected_model == "v1-5"):
            model_to_load = model_v1
        elif(selected_model == "v1-5-inpainting"):
            model_to_load = model_v1_inpainting
        elif(selected_model == "v2-1-768"):
            model_to_load = model_v2
        else:
            model_to_load = model_v2_512

Made some changes in other places too:

    if(is_spaces):
        if(selected_model == "v1-5"):
            its = 1.1 if which_gpu == "T4" else 1.8
            if(experimental_faces):
                its = 1
        elif(selected_model == "v1-5-inpainting"):
            its = 1.1 if which_gpu == "T4" else 1.8
            if(experimental_faces):
                its = 1

and here:

    with gr.Row() as what_are_you_training:
        type_of_thing = gr.Dropdown(label="What would you like to train?", choices=["object", "person", "style"], value="object", interactive=True)
        with gr.Column():
            base_model_to_use = gr.Dropdown(label="Which base model would you like to use?", choices=["v1-5", "v1-5-inpainting", "v2-1-512", "v2-1-768"], value="v1-5", interactive=True)

So...when i train i keep getting an error like this...and I don't know what to make of it anymore. It's also hard to debug on the workspace becase its not like i get the persisted environment to debug what's going on.

Here's the error:


To create a public link, set `share=True` in `launch()`.
Starting single training...
Namespace(Session_dir='', adam_beta1=0.9, adam_beta2=0.999, adam_epsilon=1e-08, adam_weight_decay=0.01, cache_latents=True, center_crop=False, class_data_dir=None, class_prompt='', dump_only_text_encoder=False, gradient_accumulation_steps=1, gradient_checkpointing=True, hub_model_id=None, hub_token=None, image_captions_filename=True, instance_data_dir='instance_images', instance_prompt='', learning_rate=2e-06, local_rank=-1, logging_dir='logs', lr_scheduler='polynomial', lr_warmup_steps=0, max_grad_norm=1.0, max_train_steps=4050, mixed_precision='fp16', num_class_images=100, num_train_epochs=1, output_dir='output_model', pretrained_model_name_or_path='/home/user/.cache/huggingface/hub/models--runwayml--stable-diffusion-inpainting/snapshots/caac1048f28756b68042add4670bec6f4ae314f8', prior_loss_weight=1.0, push_to_hub=False, resolution=512, sample_batch_size=4, save_n_steps=0, save_starting_step=1, scale_lr=False, seed=42, stop_text_encoder_training=1215, tokenizer_name=None, train_batch_size=1, train_only_unet=False, train_text_encoder=True, use_8bit_adam=True, with_prior_preservation=False)
Traceback (most recent call last):
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/gradio/routes.py", line 337, in run_predict
    output = await app.get_blocks().process_api(
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/gradio/blocks.py", line 1015, in process_api
    result = await self.call_function(
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/gradio/blocks.py", line 833, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/anyio/to_thread.py", line 31, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
    return await future
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/anyio/_backends/_asyncio.py", line 867, in run
    result = context.run(func, *args)
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/gradio/helpers.py", line 584, in tracked_fn
    response = fn(*args)
  File "app.py", line 351, in train
    push(model_name, where_to_upload, hf_token, which_model, True)
  File "app.py", line 371, in push
    convert("output_model", "model.ckpt")
  File "/home/user/app/convertosd.py", line 270, in convert
    unet_state_dict = torch.load(unet_path, map_location="cpu")
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/torch/serialization.py", line 699, in load
    with _open_file_like(f, 'rb') as opened_file:
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/torch/serialization.py", line 230, in _open_file_like
    return _open_file(name_or_buffer, mode)
  File "/home/user/.pyenv/versions/3.8.9/lib/python3.8/site-packages/torch/serialization.py", line 211, in __init__
    super(_open_file, self).__init__(open(name, mode))
FileNotFoundError: [Errno 2] No such file or directory: 'output_model/unet/diffusion_pytorch_model.bin'

Seems to be complaining about output_model somewhere.

Not sure what to do...any advice?