In [1]:
import torch
import torch.nn as nn
import torchvision
from torchvision import models,transforms

In [2]:
# Loading a CNN using torchvision
model_vgg = models.vgg16(weights='DEFAULT')
print(model_vgg)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:04<00:00, 120MB/s] 


VGG(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU(inplace=True)
    (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): ReLU(inplace=True)
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (6): ReLU(inplace=True)
    (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (8): ReLU(inplace=True)
    (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (13): ReLU(inplace=True)
    (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (15): ReLU(inplace=True)
    (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1

In [3]:
# Creating a custom DL architecture (here a simple linear model...)
class NewModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lin = nn.Linear(3, 2)

    def forward(self, x):
        return self.lin(x)

In [5]:
# Using backward for the computation of the gradient
x = torch.Tensor([1, 0, 1])
x.requires_grad = True

model = NewModel()
y = torch.sum(model(x))

print("Before backprop:", x.grad)

y.backward()

print("After backprop:", x.grad)

Before backprop: None
After backprop: tensor([-0.1076,  0.5554, -0.5603])


## Example of a typical training pipeline

In [None]:
### PARAMETERS
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
num_epochs = 100

### DATA, MODEL, LOSS, OPTIMIZER AND SCHEDULER
dataloader = ... # YOUR DATA
model = ... # YOUR MODEL
criterion = ... # YOUR LOSS FUNCTION
optimizer = ... # YOUR OPTIMIZER
scheduler = ... # YOUR SCHEDULER

### TRAINING LOOP
# Prepares the model for training (needed for some models)
model.to(device)
model.train()

for epoch in range(num_epochs):
    # One training epoch over the whole dataset
    for inputs, targets in dataloader:
        # One mini-batch, put on the desired devide (cpu or gpu)
        inputs, targets = inputs.to(device), targets.to(device)

        # Reinitialize the gradients before any computation
        optimizer.zero_grad()

        # Computation of the model's output and loss on the mini-batch
        outputs = model(inputs)
        loss = criterion(outputs, targets)

        # Computation of the gradient on the mini-batch
        loss.backward()

        # One iteration of the optimizer and update of the step-size
        optimizer.step()

        # Then we can compute statistics and store loss values
        ...

    # Update of the step-size
    scheduler.step()
    print('Loss: {:.4f} Acc: {:.4f}'.format(..., ...))