{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "TPU" }, "cells": [ { "cell_type": "code", "source": [ "!pip install git+https://github.com/huggingface/diffusers.git\n", "!pip install -U -r requirements.txt\n", "!pip install huggingface\n", "!pip install diffusers[training]\n", "!pip install diffusers\n", "!pip install torch\n", "!sudo apt -qq install git-lfs\n", "!git config --global credential.helper store\n", "!pip install tqdm" ], "metadata": { "id": "aE5NZ-XcU7bC" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from dataclasses import dataclass\n", "\n", "\n", "@dataclass\n", "class TrainingConfig:\n", " image_size = 128 # the generated image resolution\n", " train_batch_size = 16\n", " eval_batch_size = 16 # how many images to sample during evaluation\n", " num_epochs = 50\n", " gradient_accumulation_steps = 1\n", " learning_rate = 1e-4\n", " lr_warmup_steps = 500\n", " save_image_epochs = 10\n", " save_model_epochs = 10\n", " mixed_precision = \"fp16\" # `no` for float32, `fp16` for automatic mixed precision\n", " output_dir = \"ddpm-butterflies-128\" # the model name locally and on the HF Hub\n", "\n", " push_to_hub = True # whether to upload the saved model to the HF Hub\n", " hub_private_repo = False\n", " overwrite_output_dir = True # overwrite the old model when re-running the notebook\n", " seed = 0\n", "\n", "\n", "config = TrainingConfig()" ], "metadata": { "id": "faBx8T9NV1Xv" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "d5jOnnaPSKZx" }, "outputs": [], "source": [ "from datasets import load_dataset\n", "\n", "config.dataset_name = \"Drozdik/tattoo_v0\"\n", "dataset = load_dataset(config.dataset_name, split=\"train\")" ] }, { "cell_type": "code", "source": [ "def transform(examples):\n", " images = [preprocess(image.convert(\"RGB\")) for image in examples[\"image\"]]\n", " return {\"images\": images}\n", "\n" ], "metadata": { "id": "CvUPjQmqXsG1" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from diffusers import DDPMPipeline\n", "import math\n", "import os\n", "\n", "def make_grid(images, rows, cols):\n", " w, h = images[0].size\n", " grid = Image.new(\"RGB\", size=(cols * w, rows * h))\n", " for i, image in enumerate(images):\n", " grid.paste(image, box=(i % cols * w, i // cols * h))\n", " return grid\n", "\n", "\n", "def evaluate(config, epoch, pipeline):\n", " images = pipeline(\n", " batch_size=config.eval_batch_size,\n", " generator=torch.manual_seed(config.seed),\n", " ).images\n", "\n", " image_grid = make_grid(images, rows=4, cols=4)\n", "\n", " test_dir = os.path.join(config.output_dir, \"samples\")\n", " os.makedirs(test_dir, exist_ok=True)\n", " image_grid.save(f\"{test_dir}/{epoch:04d}.png\")\n", "\n" ], "metadata": { "id": "p6tO2qgGx-m3" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from accelerate import Accelerator\n", "from tqdm.auto import tqdm\n", "from pathlib import Path\n", "import os\n", "\n", "def train_loop(config, model, noise_scheduler, optimizer, train_dataloader, lr_scheduler):\n", " accelerator = Accelerator(\n", " mixed_precision=config.mixed_precision,\n", " gradient_accumulation_steps=config.gradient_accumulation_steps,\n", " log_with=\"tensorboard\",\n", " project_dir=os.path.join(config.output_dir, \"logs\"),\n", " )\n", "\n", " if accelerator.is_main_process:\n", " os.makedirs(config.output_dir,exist_ok=True)\n", " accelerator.init_trackers(\"train_example\")\n", "\n", " model, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(model, optimizer, train_dataloader, lr_scheduler)\n", "\n", " global_step = 0\n", "\n", " for epoch in range(config.num_epochs):\n", " progress_bar = tqdm(total=len(train_dataloader), disable=not accelerator.is_local_main_process)\n", " progress_bar.set_description(f\"Epoch {epoch}\")\n", "\n", " for step, batch in enumerate(train_dataloader):\n", " clean_images = batch[\"images\"]\n", "\n", " noise = torch.randn(clean_images.shape).to(clean_images.device)\n", " bs = clean_images.shape[0]\n", "\n", " timesteps = torch.randint(\n", " 0, noise_scheduler.config.num_train_timesteps, (bs,), device=clean_images.device\n", " ).long()\n", "\n", " noisy_images = noise_scheduler.add_noise(clean_images, noise, timesteps)\n", "\n", " with accelerator.accumulate(model):\n", " noise_pred = model(noisy_images, timesteps, return_dict=False)[0]\n", " loss = F.mse_loss(noise_pred,noise)\n", " accelerator.backward(loss)\n", "\n", " accelerator.clip_grad_norm_(model.parameters(),1.0)\n", " optimizer.step()\n", " lr_scheduler.step()\n", " optimizer.zero_grad()\n", "\n", " progress_bar.update(1)\n", " logs = {\"loss\": loss.detach().item(), \"lr\": lr_scheduler.get_last_lr()[0], \"step\": global_step}\n", " progress_bar.set_postfix(**logs)\n", " accelerator.log(logs, step=global_step)\n", " global_step += 1\n", "\n", " if accelerator.is_main_process:\n", " pipeline = DDPMPipeline(unet=accelerator.unwrap_model(model), scheduler=noise_scheduler)\n", "\n", " if (epoch + 1) % config.save_image_epochs == 0 or epoch == config.num_epochs - 1:\n", " evaluate(config, epoch, pipeline)\n", " if (epoch + 1) % config.save_model_epochs == 0 or epoch == config.num_epochs - 1:\n", " pipeline.save_pretrained(config.output_dir)\n", "\n", "\n", "\n", " upload_folder(\n", " repo_id=repo_id,\n", " folder_path=args.output_dir,\n", " commit_message=\"End of training\",\n", " ignore_patterns=[\"step_*\", \"epoch_*\"],\n", " )\n", "\n" ], "metadata": { "id": "Ae7g7TaCsnh7" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from accelerate import notebook_launcher\n", "import torch.nn.functional as F\n", "from diffusers.optimization import get_cosine_schedule_with_warmup\n", "import torch\n", "from PIL import Image\n", "from diffusers import DDPMScheduler\n", "from diffusers import UNet2DModel\n", "import torch\n", "from torchvision import transforms\n", "\n", "\n", "\n", "\n", "preprocess = transforms.Compose(\n", " [\n", " transforms.Resize((config.image_size, config.image_size)),\n", " transforms.RandomHorizontalFlip(),\n", " transforms.ToTensor(),\n", " transforms.Normalize([.5],[.5]),\n", " ]\n", ")\n", "\n", "dataset.set_transform(transform)\n", "\n", "train_dataloader = torch.utils.data.DataLoader(dataset, batch_size=config.train_batch_size, shuffle=True)\n", "\n", "model = UNet2DModel(sample_size=config.image_size,in_channels=3, out_channels=3, layers_per_block=2, block_out_channels=(128,128,256,256,512,512), down_block_types=(\"DownBlock2D\",\"DownBlock2D\",\"DownBlock2D\",\"DownBlock2D\",\"AttnDownBlock2D\",\"DownBlock2D\"), up_block_types=(\"UpBlock2D\",\"AttnUpBlock2D\",\"UpBlock2D\",\"UpBlock2D\",\"UpBlock2D\",\"UpBlock2D\"), )\n", "\n", "sample_image = dataset[0][\"images\"].unsqueeze(0)\n", "print(\"Input shape:\", sample_image.shape)\n", "\n", "print(\"Output shape:\", model(sample_image, timestep=0).sample.shape)\n", "\n", "noise_scheduler = DDPMScheduler(num_train_timesteps=1000)\n", "noise = torch.randn(sample_image.shape)\n", "time_steps = torch.LongTensor([50])\n", "noisy_image = noise_scheduler.add_noise(sample_image, noise, time_steps)\n", "Image.fromarray(((noisy_image.permute(0, 2, 3, 1) + 1.0) * 127.5).type(torch.uint8).numpy()[0])\n", "\n", "\n", "\n", "\n", "optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)\n", "lr_scheduler = get_cosine_schedule_with_warmup(\n", " optimizer=optimizer,\n", " num_warmup_steps=config.lr_warmup_steps,\n", " num_training_steps=(len(train_dataloader)*config.num_epochs),\n", ")\n", "\n", "\n", "\n", "args = (config, model, noise_scheduler, optimizer, train_dataloader, lr_scheduler)\n", "\n", "notebook_launcher(train_loop, args, num_processes=1)" ], "metadata": { "id": "FnPpL7H2yT8O" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "model = UNet2DModel.from_pretrained(config.output_dir, subfolder=\"unet\")\n", "optimizer = torch.optim.AdamW(model.parameters(), lr=config.learning_rate)\n", "lr_scheduler = get_cosine_schedule_with_warmup(\n", " optimizer=optimizer,\n", " num_warmup_steps=config.lr_warmup_steps,\n", " num_training_steps=(len(train_dataloader)*config.num_epochs),\n", ")\n", "args = (config, model, noise_scheduler, optimizer, train_dataloader, lr_scheduler)\n", "notebook_launcher(train_loop, args, num_processes=1)" ], "metadata": { "id": "K22cx-8snBIV" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!nvidia-smi" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Rqv9HTR22qXe", "outputId": "9480fd9d-5545-4ef8-f91c-f1dc8a02573a" }, "execution_count": null, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Sun Aug 6 08:13:38 2023 \n", "+-----------------------------------------------------------------------------+\n", "| NVIDIA-SMI 525.105.17 Driver Version: 525.105.17 CUDA Version: 12.0 |\n", "|-------------------------------+----------------------+----------------------+\n", "| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n", "| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n", "| | | MIG M. |\n", "|===============================+======================+======================|\n", "| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", "| N/A 77C P0 34W / 70W | 10807MiB / 15360MiB | 0% Default |\n", "| | | N/A |\n", "+-------------------------------+----------------------+----------------------+\n", " \n", "+-----------------------------------------------------------------------------+\n", "| Processes: |\n", "| GPU GI CI PID Type Process name GPU Memory |\n", "| ID ID Usage |\n", "|=============================================================================|\n", "+-----------------------------------------------------------------------------+\n" ] } ] } ] }