Logistic Regression - rTorch

Alfonso R. Reyes

2018-01-22

Import libraries

library(rTorch)

Variable   <- import("torch.autograd")$Variable
np         <- import("numpy")
optim      <- import("torch.optim") 
py         <- import_builtins()

Load the data

# reproducible
torch$manual_seed(42L)
#> <torch._C.Generator>

# load or download MNIST dataset
mnist <- dataset_mnist(onehot = FALSE)

trX   <- mnist[[1]]; teX = mnist[[2]]; trY = mnist[[3]]; teY = mnist[[4]]

trX <- torch$from_numpy(trX)$float()      # FloatTensor
teX <- torch$from_numpy(teX)$float()      # FloatTensor
trY <- torch$from_numpy(trY)$long()       # LongTensor
teY <- torch$from_numpy(teY)$long()       # LongTensor

Model parameters

# in Python was: n_examples, n_features = trX.size()
# using new R function torch_size()
n_examples    <- torch_size(trX$size())[1]
n_features    <- torch_size(trX$size())[2]

learning_rate <- 0.01
momentum      <- 0.9
n_classes     <- 10L
batch_size    <- 100L
epochs        <- 5

Build the model

build_model <- function(input_dim, output_dim) {
    # We don't need the softmax layer here since CrossEntropyLoss already
    # uses it internally.
    model <- torch$nn$Sequential()
    model$add_module("linear",
                     torch$nn$Linear(input_dim, output_dim, bias = FALSE))
    return(model)
}

train <- function(model, loss, optimizer, x, y) {
    x = Variable(x, requires_grad = FALSE)
    y = Variable(y, requires_grad = FALSE)
    
    # reset gradient
    optimizer$zero_grad()
    
    # forward
    fx     <- model$forward(x)
    output <- loss$forward(fx, y)
    
    # backward
    output$backward()
    
    # update parameters
    optimizer$step()
    
    return(output$data$index(0L))
}

predict <- function(model, x) {
    xvar <-  Variable(x, requires_grad = FALSE)
    output = model$forward(xvar)
    return(np$argmax(output$data, axis = 1L))
}

batching <- function(k) {
    k <- k - 1                             # index in Python start at [0]
    start <- as.integer(k * batch_size)
    end   <- as.integer((k + 1) * batch_size)
    
    cost  <- train(model, loss, optimizer,
                       trX$narrow(0L, start, end-start),
                       trY$narrow(0L, start, end-start))
    
    # allow ccost to accumulate. beware of the <<-
    ccost <<- ccost + cost$numpy()   # because we don't have yet `+` func
    return(list(model = model, cost = ccost))
}


model     <- build_model(n_features, n_classes)
loss      <- torch$nn$CrossEntropyLoss(size_average = TRUE)
optimizer <- optim$SGD(model$parameters(), lr = learning_rate, momentum = momentum)

Perform optimization

# loop through epochs
for (i in seq(1, epochs)) {
    ccost <- 0.0
    num_batches <- n_examples %/% batch_size
    
    # using lapply for the batch
    batch_li <- lapply(seq(1, num_batches), batching)[[num_batches]]
    ccost    <- batch_li$cost
    predY    <- predict(batch_li$model, teX)
    cat(sprintf("Epoch = %3d, cost = %f, acc = %.2f%% \n",
              i, ccost / num_batches, 100 * mean(predY$numpy() == teY$numpy())))
}
#> Epoch =   1, cost = 0.547787, acc = 90.15% 
#> Epoch =   2, cost = 0.365290, acc = 90.95% 
#> Epoch =   3, cost = 0.338373, acc = 91.26% 
#> Epoch =   4, cost = 0.324327, acc = 91.48% 
#> Epoch =   5, cost = 0.315265, acc = 91.73%

# Epochs
#    5    Epoch =   1, cost = 0.547787, acc = 90.15% 
#    5    Epoch =   5, cost = 0.315265, acc = 91.73%
#   50    Epoch =   1, cost = 0.547787, acc = 90.15% 
#   50    Epoch =  50, cost = 0.261484, acc = 92.42%