classification: convolutional architecture

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

The goal of this vignette is to demonstrate how to use LBBNNs with a convolutional architecture. We will only use a dummy dataset here, but the torchvision package can be used to try it out on real datasets e.g. MNIST.

Demonstration of how to download KMNIST using torchvision

if(!requireNamespace("torchvision"))
  install.packages("torchvision")
torch::torch_manual_seed(42)
dir <- "./dataset/kmnist"
kmnist_transform <- function(x) {
  d <- dim(x)
  if (length(d) == 3 && d[3] > 1 && d[1] == d[2]) {#if shape [28,28,batch] as on windows and linux(?)
    x <- torchvision::transform_to_tensor(x) #now shape should be [batch, 28,28]
    x <- x$unsqueeze(2) #add the channel dimension - > [batch,1,28,28]
  }
  else{ #on mac, everything is fine 
    x <- torchvision::transform_to_tensor(x)
  }
  return(x)
}
#get datasets from torchvision and define training and test loaders
train_ds <- torchvision::kmnist_dataset(
  dir,
  download = TRUE,
  transform = kmnist_transform)

test_ds <- torchvision::kmnist_dataset(
  dir,
  train = FALSE,
  transform = kmnist_transform)

train_loader_kmnist <- torch::dataloader(train_ds, batch_size = 100, shuffle = TRUE)
test_loader_kmnist <- torch::dataloader(test_ds, batch_size = 100)

Create dummy dataset with the same shape as KMNIST

torch::torch_manual_seed(42)
x <- torch::torch_randn(200, 1, 28, 28)
y <- torch::torch_randint(1, 11, size = 200)
dataset <- torch::tensor_dataset(x, y)
train_loader <- torch::dataloader(dataset, batch_size = 100)

Create the layers that define the architecture of our convolutional network

Here we manually define the layers, as we will use both convolutional and feed-forward layers. They are defined as follows:

device <- "cpu"
conv_layer_1 <- lbbnn_conv2d(in_channels = 1, out_channels = 32, kernel_size = 5,
                             prior_inclusion = 0.5, standard_prior = 1,
                             density_init = c(-10, 10), num_transforms = 2,
                             flow = FALSE, hidden_dims = c(200, 200),
                             device = device)
conv_layer_2 <- lbbnn_conv2d(in_channels = 32, out_channels = 64, kernel_size = 5,
                             prior_inclusion = 0.5, standard_prior = 1,
                             density_init = c(-10, 15), num_transforms = 2,
                             flow = FALSE, hidden_dims = c(200, 200),
                             device = device)

linear_layer_1 <- lbbnn_linear(in_features = 1024, out_features = 300,
                               prior_inclusion = 0.5, standard_prior = 1,
                               density_init = c(-10, 10), num_transforms = 2,
                               flow = FALSE, hidden_dims = c(200, 200), device = device,
                               bias_inclusion_prob = FALSE, conv_net = TRUE)

linear_layer_2 <- lbbnn_linear(in_features = 300, out_features = 10,
                               prior_inclusion = 0.5, standard_prior = 1,
                               density_init = c(-5, 15),num_transforms = 2,
                               flow = FALSE, hidden_dims = c(200, 200), device = device,
                               bias_inclusion_prob = FALSE, conv_net = TRUE)

Define the model object

We include pooling layers between the convolutional layers.

LBBNN_ConvNet <- torch::nn_module(
  "LBBNN_ConvNet",
  
  initialize = function(conv1, conv2, fc1 ,fc2 ,device = device) {
    self$problem_type <- "multiclass classification"
    self$input_skip <- FALSE
    self$conv1 <- conv1
    self$conv2 <- conv2
    self$fc1 <- fc1
    self$fc2 <- fc2
    self$pool <- torch::nn_max_pool2d(2)
    self$act <- torch::nn_leaky_relu()
    self$out <- torch::nn_log_softmax(dim = 2)
    self$pout <- torch::nn_softmax(dim = 2)
    self$loss_fn <- torch::nn_nll_loss(reduction = "sum")
  },
  
  forward = function(x, MPM = FALSE, predict = FALSE) {
    x = self$act(self$conv1(x, MPM))
    x = self$pool(x)
    x = self$act(self$conv2(x, MPM))
    x = self$pool(x)
    x = torch::torch_flatten(x,start_dim = 2)
    x = self$act(self$fc1(x, MPM))
    if(!predict)
      x = self$out(self$fc2(x ,MPM))
    else
      x = self$pout(self$fc2(x ,MPM))
  },
  kl_div = function(){
    kl <- self$conv1$kl_div() + self$conv2$kl_div() +
      self$fc1$kl_div() + self$fc2$kl_div()
    return(kl)
  },
  density = function(){
    alphas <- NULL
    alphas <- c(as.numeric(self$conv1$alpha), as.numeric(self$conv2$alpha)
                ,as.numeric(self$fc1$alpha), as.numeric(self$fc2$alpha))
    return(mean(alphas > 0.5))
    
    
  },
  compute_paths = function(){
    NULL
  },
  density_active_path = function(){
    NA
  }
)

model_conv <- LBBNN_ConvNet(conv_layer_1, conv_layer_2, linear_layer_1,
                       linear_layer_2, device)
model_conv$to(device = device)

Train and validate the model

These functions work the same as with feed-forward architectures. Training can be accelerated on GPU where available.

train_lbbnn(epochs = 2, LBBNN = model_conv, lr = 0.01, train_dl = train_loader,
            device = device)
#> 
#> Epoch 1, training: loss = 892055.75000, acc = 0.08500, density = 0.51523
#> 
#> Epoch 2, training: loss = 888615.37500, acc = 0.11000, density = 0.51430
validate_lbbnn(model_conv, num_samples = 2, test_dl = train_loader, 
               device = device)
#> $accuracy_full_model
#> [1] 0.13
#> 
#> $accuracy_sparse
#> [1] 0.13
#> 
#> $density
#> [1] 0.5138763
#> 
#> $density_active_path
#> [1] NA

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.