Skip to content

Commit 33dfc8e

Browse files
committed
update week 15
1 parent 29b5e44 commit 33dfc8e

File tree

8 files changed

+2092
-45
lines changed

8 files changed

+2092
-45
lines changed

doc/pub/week15/html/week15-bs.html

Lines changed: 392 additions & 1 deletion
Large diffs are not rendered by default.

doc/pub/week15/html/week15-reveal.html

Lines changed: 360 additions & 1 deletion
Large diffs are not rendered by default.

doc/pub/week15/html/week15-solarized.html

Lines changed: 382 additions & 1 deletion
Large diffs are not rendered by default.

doc/pub/week15/html/week15.html

Lines changed: 382 additions & 1 deletion
Large diffs are not rendered by default.
0 Bytes
Binary file not shown.

doc/pub/week15/ipynb/week15.ipynb

Lines changed: 379 additions & 40 deletions
Large diffs are not rendered by default.

doc/pub/week15/pdf/week15.pdf

100 KB
Binary file not shown.

doc/src/week15/week15.do.txt

Lines changed: 197 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -345,12 +345,208 @@ estimated using only one random variable per timestep. As it is
345345
computed by summing up $T-1$ consistency terms, the final estimated
346346
value may have high variance for large $T$ values.
347347

348+
!split
349+
===== PyTorch implementation of a Denoising Diffusion Probabilistic Model (DDPM) trained on the MNIST dataset =====
350+
351+
The code covers:
352+
o Model definition (a simple U-Net-style convolutional network)
353+
o Forward diffusion (adding noise over $T$ timesteps)
354+
o Reverse denoising process
355+
o Training loop
356+
o Sampling from the trained model
357+
358+
359+
This example is adapted from several open-source tutorials and
360+
implementations, demonstrating how to build a diffusion model from
361+
scratch in under 200 lines of PyTorch.
362+
I have borrowed extensively from
363+
o Jackson-Kang’s PyTorch diffusion tutorial, see URL:"https://github.com/Jackson-Kang/Pytorch-Diffusion-Model-Tutorial" and
364+
o awjuliani’s PyTorch DDPM implementation, see URL:"https://github.com/awjuliani/pytorch-diffusion"
365+
366+
367+
368+
!split
369+
===== Problem with diffusion models =====
370+
371+
372+
Diffusion models gradually corrupt data by adding Gaussian noise over
373+
a sequence of timesteps and then learn to reverse this noising process
374+
with a neural network.
375+
376+
The corruption schedule is typically linear or cosine in variance.
377+
378+
During training, the network is optimized to predict the original
379+
noise added at each timestep, using a mean-squared error loss.
380+
381+
At inference, one starts from random noise and iteratively applies the
382+
learned denoising steps to generate new samples.
383+
384+
!split
385+
===== Imports and Utilities =====
386+
387+
!bc pycod
388+
import torch
389+
import torch.nn as nn
390+
import torch.nn.functional as F
391+
from torchvision import datasets, transforms
392+
from torch.utils.data import DataLoader
393+
import matplotlib.pyplot as plt
394+
import math
395+
!ec
396+
397+
!split
398+
===== Hyperparameters and schedules =====
399+
400+
!bc pycod
401+
device = 'cuda' if torch.cuda.is_available() else 'cpu'
402+
403+
# Training settings
404+
batch_size = 128
405+
epochs = 5
406+
lr = 2e-4
407+
img_size = 28
408+
channels = 1
409+
410+
# Diffusion hyperparameters
411+
T = 300 # number of diffusion steps [oai_citation:5‡Medium](https://papers-100-lines.medium.com/diffusion-models-from-scratch-mnist-data-tutorial-in-100-lines-of-pytorch-code-a609e1558cee?utm_source=chatgpt.com)
412+
beta_start, beta_end = 1e-4, 0.02
413+
betas = torch.linspace(beta_start, beta_end, T, device=device) # linear schedule [oai_citation:6‡Medium](https://medium.com/data-science/diffusion-model-from-scratch-in-pytorch-ddpm-9d9760528946?utm_source=chatgpt.com)
414+
alphas = 1. - betas
415+
alphas_cumprod = torch.cumprod(alphas, dim=0)
416+
!ec
417+
418+
!split
419+
===== Data Loading =====
420+
421+
!bc pycod
422+
transform = transforms.Compose([
423+
transforms.ToTensor(),
424+
transforms.Normalize((0.5,), (0.5,)),
425+
])
426+
427+
train_ds = datasets.MNIST('.', train=True, download=True, transform=transform)
428+
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
429+
!ec
430+
431+
!split
432+
===== Model definition =====
433+
434+
We present a lightweight U-Net inspired model for noise prediction:
435+
!bc pycod
436+
class SimpleUNet(nn.Module):
437+
def __init__(self, c):
438+
super().__init__()
439+
self.enc1 = nn.Conv2d(c, 64, 3, padding=1)
440+
self.enc2 = nn.Conv2d(64, 128, 3, padding=1)
441+
self.dec1 = nn.ConvTranspose2d(128, 64, 3, padding=1)
442+
self.dec2 = nn.ConvTranspose2d(64, c, 3, padding=1)
443+
self.act = nn.ReLU()
444+
# timestep embedding to condition on t
445+
self.time_mlp = nn.Sequential(
446+
nn.Linear(1, 64),
447+
nn.ReLU(),
448+
nn.Linear(64, 64),
449+
)
450+
451+
def forward(self, x, t):
452+
# x: [B, C, H, W], t: [B]
453+
h = self.act(self.enc1(x)) # [oai_citation:7‡GitHub](https://github.com/Jackson-Kang/Pytorch-Diffusion-Model-Tutorial?utm_source=chatgpt.com)
454+
h = self.act(self.enc2(h))
455+
# add time embedding
456+
t = t.unsqueeze(-1)
457+
temb = self.time_mlp(t) # [oai_citation:8‡GitHub](https://github.com/tonyduan/diffusion?utm_source=chatgpt.com)
458+
temb = temb.view(-1, 64, 1, 1)
459+
h = h + temb
460+
h = self.act(self.dec1(h))
461+
return self.dec2(h)
462+
!ec
463+
464+
!split
465+
===== Forward Diffusion $q(x_t\vert x_0)$ =====
466+
467+
468+
!bc pycod
469+
def q_sample(x0, t, noise=None):
470+
"""Add noise to x0 at timestep t."""
471+
if noise is None:
472+
noise = torch.randn_like(x0)
473+
sqrt_acp = alphas_cumprod[t]**0.5
474+
sqrt_1macp = (1 - alphas_cumprod[t])**0.5
475+
return sqrt_acp.view(-1,1,1,1)*x0 + sqrt_1macp.view(-1,1,1,1)*noise
476+
!ec
477+
478+
!split
479+
===== Cost/Loss function =====
480+
481+
!bc pycod
482+
def diffusion_loss(model, x0):
483+
"""Compute MSE between predicted noise and true noise."""
484+
B = x0.size(0)
485+
t = torch.randint(0, T, (B,), device=device).long()
486+
noise = torch.randn_like(x0)
487+
x_noisy = q_sample(x0, t, noise)
488+
pred_noise = model(x_noisy, t.float()/T)
489+
return F.mse_loss(pred_noise, noise)
490+
!ec
491+
492+
!split
493+
===== Training loop =====
494+
495+
496+
!bc pycod
497+
model = SimpleUNet(channels).to(device)
498+
opt = torch.optim.Adam(model.parameters(), lr=lr)
499+
500+
for epoch in range(epochs):
501+
total_loss = 0
502+
for x, _ in train_loader:
503+
x = x.to(device)
504+
loss = diffusion_loss(model, x)
505+
opt.zero_grad()
506+
loss.backward()
507+
opt.step()
508+
total_loss += loss.item()
509+
print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss/len(train_loader):.4f}")
510+
!ec
511+
512+
!split
513+
===== Sampling (Reverse Diffusion) =====
514+
515+
516+
!bc pycod
517+
@torch.no_grad()
518+
def p_sample_loop(model, shape):
519+
x = torch.randn(shape, device=device)
520+
for i in reversed(range(T)):
521+
t = torch.full((shape[0],), i, device=device).float()/T
522+
eps_pred = model(x, t)
523+
beta_t = betas[i]
524+
alpha_t = alphas[i]
525+
acp_t = alphas_cumprod[i]
526+
coef1 = 1 / alpha_t.sqrt()
527+
coef2 = beta_t / ( (1 - acp_t).sqrt() )
528+
x = coef1*(x - coef2*eps_pred)
529+
if i > 0:
530+
z = torch.randn_like(x)
531+
sigma = beta_t.sqrt()
532+
x = x + sigma*z
533+
return x
534+
535+
# Generate samples
536+
samples = p_sample_loop(model, (16, channels, img_size, img_size))
537+
samples = samples.clamp(-1,1).cpu()
538+
grid = torchvision.utils.make_grid(samples, nrow=4, normalize=True)
539+
plt.figure(figsize=(5,5))
540+
plt.imshow(grid.permute(1,2,0))
541+
plt.axis('off')
542+
!ec
543+
348544

349545

350546
!split
351547
===== More details =====
352548

353-
For more details and implementaions, see Calvin Luo at URL:"https://arxiv.org/abs/2208.11970"
549+
For more details and implementations, see Calvin Luo at URL:"https://arxiv.org/abs/2208.11970"
354550

355551

356552

0 commit comments

Comments
 (0)