hyeonsieun
diff --git a/‎celeba_filenames_test.pickle
80.7 KB b/‎celeba_filenames_test.pickle
80.7 KB
diff --git a/‎celeba_filenames_train.pickle
347 KB b/‎celeba_filenames_train.pickle
347 KB
diff --git a/‎dataloader.py
Lines changed: 10 additions & 0 deletions b/‎dataloader.py
Lines changed: 10 additions & 0 deletions
diff --git a/‎fix_seed.py
Lines changed: 19 additions & 0 deletions b/‎fix_seed.py
Lines changed: 19 additions & 0 deletions
diff --git a/‎generate_image.py
Lines changed: 57 additions & 0 deletions b/‎generate_image.py
Lines changed: 57 additions & 0 deletions
diff --git a/‎main.ipynb
Lines changed: 160 additions & 0 deletions b/‎main.ipynb
Lines changed: 160 additions & 0 deletions
diff --git a/‎main.py
Lines changed: 38 additions & 0 deletions b/‎main.py
Lines changed: 38 additions & 0 deletions
@@ -0,0 +1,10 @@
+from torch.utils.data import DataLoader, RandomSampler, SequentialSampler
+
+def get_dataloader(args, dataset, is_train=True):
+    if is_train:
+        sampler = RandomSampler(dataset)
+    else:
+        sampler = SequentialSampler(dataset)
+
+    dataloader = DataLoader(dataset=dataset, sampler=sampler, batch_size=args.batch_size)
+    return dataloader
@@ -0,0 +1,19 @@
+import torch
+import numpy as np
+import random
+
+def seed_fix(int):
+    # PyTorch
+    torch.manual_seed(int)
+    torch.cuda.manual_seed(int)
+    torch.cuda.manual_seed_all(int) # for multi-GPU
+
+    # CuDNN
+    torch.backends.cudnn.deterministic = True
+    torch.backends.cudnn.benchmark = False
+
+    # Numpy
+    np.random.seed(int)
+
+    # Random
+    random.seed(int)
@@ -0,0 +1,57 @@
+import warnings
+warnings.filterwarnings(action="ignore")
+
+import argparse
+import click
+import os
+import torch
+import torchvision
+import clip
+from fix_seed import seed_fix
+from pathlib import Path
+from network import Generator, Discriminator
+from train_utils import *
+
+
+@torch.no_grad()
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--prompt', type=str, required=True)
+    parser.add_argument('--load_epoch', type=int, required=True)
+    parser.add_argument('--checkpoint_path', type=Path, required=True)
+    parser.add_argument('--show_hyp', action='store_true')
+    parser.add_argument('--clip_model', type=click.Choice(['B/32', 'L/14', 'B/16']), default='B/32')
+    args = parser.parse_args()
+    
+    # seed_fix(40)
+    hyp = torch.load(os.path.join(args.checkpoint_path, f"hyperparameter.pt"), map_location='cpu')
+    if args.show_hyp:
+        print_hyp(hyp)
+    
+    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+    clip_model, _ = clip.load(f"ViT-{args.clip_model}", device=device)
+    G = Generator(hyp['clip_embedding_dim'], hyp['projection_dim'], hyp['noise_dim'], hyp['g_in_chans'], hyp['g_out_chans'], hyp['num_stage'], device).to(device)
+    D_lst = [
+        Discriminator(hyp['projection_dim'], hyp['g_out_chans'], hyp['d_in_chans'], hyp['d_out_chans'], hyp['clip_embedding_dim'], curr_stage, device).to(device)
+        for curr_stage in range(hyp['num_stage'])
+    ]
+    load_checkpoint(G, D_lst, args.checkpoint_path, args.load_epoch)
+
+
+    prompt = clip.tokenize([args.prompt]).to(device)
+    txt_feature = clip_model.encode_text(prompt)
+    z = torch.randn(txt_feature.shape[0], hyp['noise_dim']).to(device)
+    txt_feature = normalize(txt_feature.to(device)).type(torch.float32)
+
+    fake_images, _, _ = G(txt_feature, z)
+    fake_image_64 = denormalize_image(fake_images[-3].detach().cpu()) 
+    fake_image_128 = denormalize_image(fake_images[-2].detach().cpu()) 
+    fake_image_256 = denormalize_image(fake_images[-1].detach().cpu()) 
+    # epoch_ret = torchvision.utils.make_grid(fake_image, padding=2, normalize=True)
+    torchvision.utils.save_image(fake_image_64, "result_64.png")
+    torchvision.utils.save_image(fake_image_128, "result_128.png")
+    torchvision.utils.save_image(fake_image_256, "result_256.png")
+
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,160 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "-ksoUw_9uFDY"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Setup (Do NOT modify)\n",
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')\n",
+        "%cd /content/drive/MyDrive/final\n",
+        "!pip install openai-clip"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ZSaektzjZbKG"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Data preprocessing (Train)\n",
+        "!python preproc_datasets_celeba_zip_train.py --source=./multimodal_celeba_hq.zip --dest train_data_6cap.zip --emb_dim 512 --transform=center-crop --width=256 --height=256"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Svp2gXHa4UDb"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Data preprocessing (Test)\n",
+        "!python preproc_datasets_celeba_zip_test.py --source=./multimodal_celeba_hq.zip --dest test_data_6cap.zip --emb_dim 512 --transform=center-crop --width=256 --height=256"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "pQY41tvBuJVO"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Train\n",
+        "\n",
+        "train_data = \"sample_train.zip\"#@param {\"type\": \"string\"}\n",
+        "batch_size = 24 #@param {\"type\": \"integer\"}\n",
+        "num_epochs = 10 #@param {\"type\": \"integer\"}\n",
+        "learning_rate = 0.0002 #@param {\"type\": \"number\"}\n",
+        "report_interval = 50 #@param {\"type\": \"integer\"}\n",
+        "noise_dim = 100 #@param {\"type\": \"integer\"}\n",
+        "projection_dim = 128 #@param {\"type\": \"integer\"}\n",
+        "clip_embedding_dim = 512 #@param {\"type\": \"integer\"}\n",
+        "checkpoint_path = \"model_exp1\" #@param {\"type\": \"string\"}\n",
+        "result_path = \"images_exp1\" #@param {\"type\": \"string\"}\n",
+        "use_uncond_loss = True #@param {\"type\": \"boolean\"}\n",
+        "use_contrastive_loss = True #@param {\"type\": \"boolean\"}\n",
+        "num_stage = 3 #@param {\"type\": \"integer\"}\n",
+        "resume_checkpoint_path = \"None\" #@param {\"type\": \"string\"}\n",
+        "resume_epoch = -1 #@param {\"type\": \"integer\"}\n",
+        "\n",
+        "test_cmd = f'''python main.py \\\n",
+        "    --train_data \"{train_data}\" \\\n",
+        "    --batch_size {batch_size} \\\n",
+        "    --num_epochs {num_epochs} \\\n",
+        "    --learning_rate {learning_rate} \\\n",
+        "    --report_interval {report_interval} \\\n",
+        "    --noise_dim {noise_dim} \\\n",
+        "    --projection_dim {projection_dim} \\\n",
+        "    --clip_embedding_dim {clip_embedding_dim} \\\n",
+        "    --checkpoint_path \"{checkpoint_path}\" \\\n",
+        "    --result_path \"{result_path}\" \\\n",
+        "    --num_stage {num_stage} \\\n",
+        "    --resume_epoch {resume_epoch} \\\n",
+        "    '''\n",
+        "if use_uncond_loss:\n",
+        "    test_cmd += \"--use_uncond_loss \"\n",
+        "if use_contrastive_loss:\n",
+        "    test_cmd += \"--use_contrastive_loss \"\n",
+        "if resume_checkpoint_path != \"None\":\n",
+        "    test_cmd += f'''--resume_checkpoint_path \"{resume_checkpoint_path}\"'''\n",
+        "\n",
+        "with open('./train_script.sh', 'w') as file:\n",
+        "    file.write(test_cmd)\n",
+        "\n",
+        "!bash train_script.sh"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "cellView": "form",
+        "id": "S7ueWJp1t-Zi"
+      },
+      "outputs": [],
+      "source": [
+        "#@title Test (Generate image)\n",
+        "\n",
+        "prompt = \"The woman is young and has blond hair, and arched eyebrows.\"#@param {\"type\": \"string\"}\n",
+        "load_epoch = 10 #@param {\"type\": \"integer\"}\n",
+        "checkpoint_path = \"model_exp1\" #@param {\"type\": \"string\"}\n",
+        "\n",
+        "test_cmd = f'''python generate_image.py \\\n",
+        "    --prompt \"{prompt}\" \\\n",
+        "    --load_epoch {load_epoch} \\\n",
+        "    --checkpoint_path \"{checkpoint_path}\"\n",
+        "    '''\n",
+        "\n",
+        "with open('./test_script.sh', 'w') as file:\n",
+        "    file.write(test_cmd)\n",
+        "\n",
+        "!bash test_script.sh\n",
+        "\n",
+        "\n",
+        "from IPython.display import Image\n",
+        "import os\n",
+        "img_64 = Image(os.path.join(\"result_64.png\"))\n",
+        "display(img_64)\n",
+        "img_128 = Image(os.path.join(\"result_128.png\"))\n",
+        "display(img_128)\n",
+        "img_256 = Image(os.path.join(\"result_256.png\"))\n",
+        "display(img_256)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "tH5gJKdVyUuu"
+      },
+      "outputs": [],
+      "source": []
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "machine_shape": "hm",
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
@@ -0,0 +1,38 @@
+import warnings
+warnings.filterwarnings(action="ignore")
+
+import argparse
+import click
+import os
+from fix_seed import seed_fix
+from train import train
+from pathlib import Path
+from typing import Union
+
+def main():
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--train_data', default='sample_train.zip', type=Path, help="path of directory containing training dataset")
+    parser.add_argument('--batch_size', type=int, default=64, help='Batch size')
+    parser.add_argument('--num_epochs', type=int, default=50, help='Number of epochs')
+    parser.add_argument('--learning_rate', type=float, default=2e-4, help='Learning rate')
+    parser.add_argument('--report_interval', type=int, default=100, help='Report interval')
+    parser.add_argument('--noise_dim', type=int, default=100, help= 'Input noise dimension to Generator')
+    parser.add_argument('--projection_dim', type=int, default=128, help= 'Noise projection dimension')
+    parser.add_argument('--clip_embedding_dim', type=int, default=512, help= 'CLIP embedding vector dimension')
+    parser.add_argument('--checkpoint_path', type=Path, default='model_exp1', help='Checkpoint path')
+    parser.add_argument('--result_path', type=Path, default='images_exp1', help='Generated image path')
+    parser.add_argument('--use_uncond_loss', action="store_true")
+    parser.add_argument('--use_contrastive_loss', action="store_true")
+    parser.add_argument('--num_stage', type=int, default=1)
+    parser.add_argument('--resume_checkpoint_path', default=None)
+    parser.add_argument('--resume_epoch', type=int, default=-1)
+    args = parser.parse_args()
+    
+    os.makedirs(args.checkpoint_path, exist_ok=True)
+    os.makedirs(args.result_path, exist_ok=True)
+
+    seed_fix(0)
+    train(args)
+
+if __name__ == "__main__":
+    main()