logistic_regression_mnist_digits_idx

library(rTorch)

nn          <- torch$nn
transforms  <- torchvision$transforms

torch$set_default_dtype(torch$float)

Hyperparameters

# Hyper-parameters 
input_size    <- 784L
num_classes   <- 10L
num_epochs    <- 5L
batch_size    <- 100L
learning_rate <- 0.001

Read datasets

# MNIST dataset (images and labels)
# IDX format
train_dataset = torchvision$datasets$MNIST(root='../inst/raw_data', 
                                           train=TRUE, 
                                           transform=transforms$ToTensor(),
                                           download=TRUE)

test_dataset = torchvision$datasets$MNIST(root='../inst/raw_data', 
                                          train=FALSE, 
                                          transform=transforms$ToTensor())

# Data loader (input pipeline)
train_loader = torch$utils$data$DataLoader(dataset=train_dataset, 
                                           batch_size=batch_size, 
                                           shuffle=TRUE)

test_loader = torch$utils$data$DataLoader(dataset=test_dataset, 
                                          batch_size=batch_size, 
                                          shuffle=FALSE)
class(train_loader)
#> [1] "torch.utils.data.dataloader.DataLoader"
#> [2] "python.builtin.object"
length(train_loader)
#> [1] 2

Define the model

# Logistic regression model
model = nn$Linear(input_size, num_classes)

# Loss and optimizer
# nn.CrossEntropyLoss() computes softmax internally
criterion = nn$CrossEntropyLoss()  
optimizer = torch$optim$SGD(model$parameters(), lr=learning_rate)  
print(model)
#> Linear(in_features=784, out_features=10, bias=True)

Training

# Train the model
iter_train_loader <- iterate(train_loader)
total_step <-length(iter_train_loader)
for (epoch in 1:num_epochs) {
    i <-  0
    for (obj in iter_train_loader) {
        
        images <- obj[[1]]   # tensor torch.Size([64, 3, 28, 28])
        labels <- obj[[2]]   # tensor torch.Size([64]), labels from 0 to 9
        # cat(i, "\t"); print(images$shape)

        # Reshape images to (batch_size, input_size)
        images <- images$reshape(-1L, 28L*28L)
        # images <- torch$as_tensor(images$reshape(-1L, 28L*28L), dtype=torch$double)

        # Forward pass
        outputs <- model(images)
        loss <- criterion(outputs, labels)

        # Backward and optimize
        optimizer$zero_grad()
        loss$backward()
        optimizer$step()

        if ((i+1) %% 100 == 0) {
            cat(sprintf('Epoch [%d/%d], Step [%d/%d], Loss: %f \n',
                epoch+1, num_epochs, i+1, total_step, loss$item()))
        }
        i <-  i + 1
    }
}  
#> Epoch [2/5], Step [100/600], Loss: 2.193622 
#> Epoch [2/5], Step [200/600], Loss: 2.095055 
#> Epoch [2/5], Step [300/600], Loss: 1.993158 
#> Epoch [2/5], Step [400/600], Loss: 1.914482 
#> Epoch [2/5], Step [500/600], Loss: 1.828060 
#> Epoch [2/5], Step [600/600], Loss: 1.821115 
#> Epoch [3/5], Step [100/600], Loss: 1.732071 
#> Epoch [3/5], Step [200/600], Loss: 1.653721 
#> Epoch [3/5], Step [300/600], Loss: 1.592439 
#> Epoch [3/5], Step [400/600], Loss: 1.525097 
#> Epoch [3/5], Step [500/600], Loss: 1.436987 
#> Epoch [3/5], Step [600/600], Loss: 1.502717 
#> Epoch [4/5], Step [100/600], Loss: 1.446447 
#> Epoch [4/5], Step [200/600], Loss: 1.366274 
#> Epoch [4/5], Step [300/600], Loss: 1.341085 
#> Epoch [4/5], Step [400/600], Loss: 1.280446 
#> Epoch [4/5], Step [500/600], Loss: 1.181919 
#> Epoch [4/5], Step [600/600], Loss: 1.288031 
#> Epoch [5/5], Step [100/600], Loss: 1.259706 
#> Epoch [5/5], Step [200/600], Loss: 1.173801 
#> Epoch [5/5], Step [300/600], Loss: 1.176926 
#> Epoch [5/5], Step [400/600], Loss: 1.122017 
#> Epoch [5/5], Step [500/600], Loss: 1.010820 
#> Epoch [5/5], Step [600/600], Loss: 1.138838 
#> Epoch [6/5], Step [100/600], Loss: 1.130953 
#> Epoch [6/5], Step [200/600], Loss: 1.039641 
#> Epoch [6/5], Step [300/600], Loss: 1.063141 
#> Epoch [6/5], Step [400/600], Loss: 1.013657 
#> Epoch [6/5], Step [500/600], Loss: 0.890745 
#> Epoch [6/5], Step [600/600], Loss: 1.030636

Prediction

# Adjust weights and reset gradients
iter_test_loader <- iterate(test_loader)

with(torch$no_grad(), {
    correct <-  0
    total <-  0
    for (obj in iter_test_loader) {
        images <- obj[[1]]   # tensor torch.Size([64, 3, 28, 28])
        labels <- obj[[2]]   # tensor torch.Size([64]), labels from 0 to 9
        images = images$reshape(-1L, 28L*28L)
        # images <- torch$as_tensor(images$reshape(-1L, 28L*28L), dtype=torch$double)
        outputs = model(images)
        .predicted = torch$max(outputs$data, 1L)
        predicted <- .predicted[1L]
        total = total + labels$size(0L)
        correct = correct + sum((predicted$numpy() == labels$numpy()))
    }
    cat(sprintf('Accuracy of the model on the 10000 test images: %f %%', (100 * correct / total)))
  
})
#> Accuracy of the model on the 10000 test images: 83.730000 %

Save the model

# Save the model checkpoint
torch$save(model$state_dict(), 'model.ckpt')