Skip to content

Commit eaf9766

Browse files
committed
update
1 parent d96f1f2 commit eaf9766

21 files changed

+280
-195
lines changed

README.md

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,4 +80,57 @@ Google drive: https://drive.google.com/drive/folders/1abNP4QKGbNnymjn8607BF0cwxX
8080

8181
## Training
8282

83-
Soon ...
83+
We provide scripts for training & evaluate DDIM and DiffAE (including latent DPM) on the following datasets: FFHQ128, FFHQ256, Bedroom128, Horse128, Celeba64 (D2C's crop).
84+
Usually, the evaluation results (FID's) will be available in `eval` directory.
85+
86+
Note: Most experiment requires at least 4x V100s during training the DPM models while requiring 1x 2080Ti during training the accompanying latent DPM.
87+
88+
89+
90+
**FFHQ128**
91+
```
92+
# diffae
93+
python run_ffhq128.py
94+
# ddim
95+
python run_ffhq128_ddim.py
96+
```
97+
98+
**FFHQ256**
99+
100+
We only trained the DiffAE due to high computation cost.
101+
This requires 8x V100s.
102+
```
103+
sbatch run_ffhq256.py
104+
```
105+
106+
After the task is done, you need to train the latent DPM (requiring only 1x 2080Ti)
107+
```
108+
python run_ffhq256_latent.py
109+
```
110+
111+
**Bedroom128**
112+
113+
```
114+
# diffae
115+
python run_bedroom128.py
116+
# ddim
117+
python run_bedroom128_ddim.py
118+
```
119+
120+
**Horse128**
121+
122+
```
123+
# diffae
124+
python run_horse128.py
125+
# ddim
126+
python run_horse128_ddim.py
127+
```
128+
129+
**Celeba64**
130+
131+
This experiment can be run on 2080Ti's.
132+
133+
```
134+
# diffae
135+
python run_celeba64.py
136+
```

evals/ffhq128_autoenc_130M.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

experiment.py

Lines changed: 11 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,6 @@ def is_last_accum(self, batch_idx):
235235
return (batch_idx + 1) % self.conf.accum_batches == 0
236236

237237
def infer_whole_dataset(self,
238-
both_flips=False,
239238
with_render=False,
240239
T_render=None,
241240
render_save_path=None):
@@ -247,25 +246,14 @@ def infer_whole_dataset(self,
247246
with_render: whether to also render the images corresponding to that latent
248247
render_save_path: lmdb output for the rendered images
249248
"""
250-
if both_flips:
251-
# both original pose and its flipped version
252-
data_a = self.conf.make_dataset()
253-
assert not (isinstance(data_a, CelebAlmdb) and data_a.crop_d2c
254-
), "doesn't support celeba dataset with d2c crop"
255-
data_a.transform = make_transform(self.conf.img_size, flip_prob=0)
256-
data_b = self.conf.make_dataset()
257-
data_b.transform = make_transform(self.conf.img_size, flip_prob=1)
258-
data = ConcatDataset([data_a, data_b])
249+
data = self.conf.make_dataset()
250+
if isinstance(data, CelebAlmdb) and data.crop_d2c:
251+
# special case where we need the d2c crop
252+
data.transform = make_transform(self.conf.img_size,
253+
flip_prob=0,
254+
crop_d2c=True)
259255
else:
260-
data = self.conf.make_dataset()
261-
if isinstance(data, CelebAlmdb) and data.crop_d2c:
262-
# special case where we need the d2c crop
263-
data.transform = make_transform(self.conf.img_size,
264-
flip_prob=0,
265-
crop_d2c=True)
266-
else:
267-
data.transform = make_transform(self.conf.img_size,
268-
flip_prob=0)
256+
data.transform = make_transform(self.conf.img_size, flip_prob=0)
269257

270258
# data = SubsetDataset(data, 21)
271259

@@ -690,24 +678,15 @@ def test_step(self, batch, *args, **kwargs):
690678

691679
# it will run only one step!
692680
print('global step:', self.global_step)
693-
# score = evaluate_lpips(sampler=self.eval_sampler,
694-
# model=self.ema_model,
695-
# conf=self.conf,
696-
# device=self.device,
697-
# val_data=self.val_data)
698-
# self.log('lpips', score)
699681
"""
700682
"infer" = predict the latent variables using the encoder on the whole dataset
701683
"""
702-
if 'infer' in self.conf.eval_programs or 'inferflip' in self.conf.eval_programs:
684+
if 'infer' in self.conf.eval_programs:
703685
if 'infer' in self.conf.eval_programs:
704686
print('infer ...')
705-
conds = self.infer_whole_dataset(both_flips=False).float()
706-
save_path = f'latent_infer/{self.conf.name}.pkl'
707-
elif 'inferflip' in self.conf.eval_programs:
708-
print('infer both ...')
709-
conds = self.infer_whole_dataset(both_flips=True).float()
710-
save_path = f'latent_infer_flip/{self.conf.name}.pkl'
687+
conds = self.infer_whole_dataset().float()
688+
# NOTE: always use this path for the latent.pkl files
689+
save_path = f'checkpoints/{self.conf.name}/latent.pkl'
711690
else:
712691
raise NotImplementedError()
713692

main.py

Lines changed: 0 additions & 103 deletions
This file was deleted.

manipulate.ipynb

Lines changed: 16 additions & 15 deletions
Large diffs are not rendered by default.

run_bedroom128.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from templates import *
2+
from templates_latent import *
3+
4+
if __name__ == '__main__':
5+
# train the autoenc moodel
6+
# this requires V100s.
7+
gpus = [0, 1, 2, 3]
8+
conf = bedroom128_autoenc()
9+
train(conf, gpus=gpus)
10+
11+
# infer the latents for training the latent DPM
12+
# NOTE: not gpu heavy, but more gpus can be of use!
13+
gpus = [0, 1, 2, 3]
14+
conf.eval_programs = ['infer']
15+
train(conf, gpus=gpus, mode='eval')
16+
17+
# train the latent DPM
18+
# NOTE: only need a single gpu
19+
gpus = [0]
20+
conf = bedroom128_autoenc_latent()
21+
train(conf, gpus=gpus)
22+
23+
# unconditional sampling score
24+
# NOTE: a lot of gpus can speed up this process
25+
gpus = [0, 1, 2, 3]
26+
conf.eval_programs = ['fid(10,10)']
27+
train(conf, gpus=gpus, mode='eval')

run_bedroom128_ddim.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from templates import *
2+
from templates_latent import *
3+
4+
if __name__ == '__main__':
5+
gpus = [0, 1, 2, 3]
6+
conf = bedroom128_ddpm()
7+
train(conf, gpus=gpus)
8+
9+
gpus = [0, 1, 2, 3]
10+
conf.eval_programs = ['fid10']
11+
train(conf, gpus=gpus, mode='eval')

run_celeba64.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from templates import *
2+
from templates_latent import *
3+
4+
if __name__ == '__main__':
5+
# train the autoenc moodel
6+
# this can be run on 2080Ti's.
7+
gpus = [0, 1, 2, 3]
8+
conf = celeba64d2c_autoenc()
9+
train(conf, gpus=gpus)
10+
11+
# infer the latents for training the latent DPM
12+
# NOTE: not gpu heavy, but more gpus can be of use!
13+
gpus = [0, 1, 2, 3]
14+
conf.eval_programs = ['infer']
15+
train(conf, gpus=gpus, mode='eval')
16+
17+
# train the latent DPM
18+
# NOTE: only need a single gpu
19+
gpus = [0]
20+
conf = celeba64d2c_autoenc_latent()
21+
train(conf, gpus=gpus)
22+
23+
# unconditional sampling score
24+
# NOTE: a lot of gpus can speed up this process
25+
gpus = [0, 1, 2, 3]
26+
conf.eval_programs = ['fid(10,10)']
27+
train(conf, gpus=gpus, mode='eval')

run_ffhq128.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from templates import *
2+
from templates_latent import *
3+
4+
if __name__ == '__main__':
5+
# train the autoenc moodel
6+
# this requires V100s.
7+
gpus = [0, 1, 2, 3]
8+
conf = ffhq128_autoenc_130M()
9+
train(conf, gpus=gpus)
10+
11+
# infer the latents for training the latent DPM
12+
# NOTE: not gpu heavy, but more gpus can be of use!
13+
gpus = [0, 1, 2, 3]
14+
conf.eval_programs = ['infer']
15+
train(conf, gpus=gpus, mode='eval')
16+
17+
# train the latent DPM
18+
# NOTE: only need a single gpu
19+
gpus = [0]
20+
conf = ffhq128_autoenc_latent()
21+
train(conf, gpus=gpus)
22+
23+
# unconditional sampling score
24+
# NOTE: a lot of gpus can speed up this process
25+
gpus = [0, 1, 2, 3]
26+
conf.eval_programs = ['fid(10,10)']
27+
train(conf, gpus=gpus, mode='eval')

run_ffhq128_ddim.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
from templates import *
2+
from templates_latent import *
3+
4+
if __name__ == '__main__':
5+
gpus = [0, 1, 2, 3]
6+
conf = ffhq128_ddpm_130M()
7+
train(conf, gpus=gpus)
8+
9+
gpus = [0, 1, 2, 3]
10+
conf.eval_programs = ['fid10']
11+
train(conf, gpus=gpus, mode='eval')

run_ffhq256.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
from templates import *
2+
from templates_latent import *
3+
4+
if __name__ == '__main__':
5+
# 256 requires 8x v100s, in our case, on two nodes.
6+
# do not run this directly, use `sbatch run_ffhq256.sh` to spawn the srun properly.
7+
gpus = [0, 1, 2, 3]
8+
nodes = 2
9+
conf = ffhq256_autoenc()
10+
train(conf, gpus=gpus, nodes=nodes)

run_ffhq256.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
#!/bin/sh
2+
#SBATCH --gres=gpu:4
3+
#SBATCH --cpus-per-gpu=8
4+
#SBATCH --mem-per-gpu=32GB
5+
#SBATCH --nodes=2
6+
#SBATCH --ntasks=8
7+
#SBATCH --partition=gpu-cluster
8+
#SBATCH --time=72:00:00
9+
10+
export NCCL_DEBUG=INFO
11+
export PYTHONFAULTHANDLER=1
12+
13+
srun python run_ffhq256.py

run_ffhq256_latent.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from templates import *
2+
from templates_latent import *
3+
4+
if __name__ == '__main__':
5+
# do run the run_ffhq256 before using the file to train the latent DPM
6+
7+
# infer the latents for training the latent DPM
8+
# NOTE: not gpu heavy, but more gpus can be of use!
9+
gpus = [0, 1, 2, 3]
10+
conf = ffhq256_autoenc()
11+
conf.eval_programs = ['infer']
12+
train(conf, gpus=gpus, mode='eval')
13+
14+
# train the latent DPM
15+
# NOTE: only need a single gpu
16+
gpus = [0]
17+
conf = ffhq256_autoenc_latent()
18+
train(conf, gpus=gpus)

0 commit comments

Comments
 (0)