PyTorch Essentials

What

The dominant deep learning framework. Tensors (like NumPy arrays but with GPU support and automatic differentiation).

Core concepts

Tensors

import torch
 
t = torch.tensor([1.0, 2.0, 3.0])
t = torch.zeros(3, 4)
t = torch.randn(3, 4)              # random normal
t = torch.from_numpy(np_array)     # from NumPy
 
t.shape       # size
t.dtype       # data type
t.device      # cpu or cuda
t.to("cuda")  # move to GPU

Autograd — automatic differentiation

x = torch.tensor(3.0, requires_grad=True)
y = x ** 2 + 2 * x + 1   # y = x² + 2x + 1
y.backward()                # compute dy/dx
x.grad                      # tensor(8.) — derivative at x=3: 2*3 + 2 = 8

Building a model

import torch.nn as nn
 
class SimpleNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim),
        )
 
    def forward(self, x):
        return self.net(x)

Training loop

model = SimpleNet(784, 128, 10)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
loss_fn = nn.CrossEntropyLoss()
 
for epoch in range(num_epochs):
    for X_batch, y_batch in dataloader:
        logits = model(X_batch)
        loss = loss_fn(logits, y_batch)
 
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

DataLoader

from torch.utils.data import DataLoader, TensorDataset
 
dataset = TensorDataset(X_tensor, y_tensor)
loader = DataLoader(dataset, batch_size=32, shuffle=True)