The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Keras-like API in ggmlR

ggmlR provides a high-level Keras-like interface for building and training neural networks on top of the ggml C tensor library. This vignette walks through the main APIs using the built-in iris and mtcars datasets.

Installation

# install.packages("ggmlR")   # once on CRAN
library(ggmlR)

1. Sequential API

The sequential API mirrors keras.Sequential: stack layers with |>, compile, fit, evaluate, predict.

1.1 Prepare data — iris (3-class classification)

data(iris)
set.seed(42)

x <- as.matrix(iris[, 1:4])
x <- scale(x)                        # standardise

# one-hot encode species
y <- model.matrix(~ Species - 1, iris)  # [150 x 3]

idx   <- sample(nrow(x))
n_tr  <- 120L
x_tr  <- x[idx[1:n_tr],  ]
y_tr  <- y[idx[1:n_tr],  ]
x_val <- x[idx[(n_tr+1):150], ]
y_val <- y[idx[(n_tr+1):150], ]

1.2 Build and compile

model <- ggml_model_sequential() |>
  ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |>
  ggml_layer_batch_norm() |>
  ggml_layer_dropout(0.3, stochastic = TRUE) |>
  ggml_layer_dense(16L, activation = "relu") |>
  ggml_layer_dense(3L,  activation = "softmax") |>
  ggml_compile(
    optimizer = "adam",
    loss      = "categorical_crossentropy",
    metrics   = c("accuracy")
  )

print(model)

1.3 Train

model <- ggml_fit(
  model,
  x_tr, y_tr,
  epochs           = 100L,
  batch_size       = 32L,
  validation_split = 0.0,   # we supply our own val set below
  verbose          = 1L
)

1.4 Evaluate and predict

score <- ggml_evaluate(model, x_val, y_val, batch_size = 32L)
cat(sprintf("Val loss: %.4f   Val accuracy: %.4f\n", score$loss, score$accuracy))

probs   <- ggml_predict(model, x_val, batch_size = 32L)
classes <- apply(probs, 1, which.max)
true    <- apply(y_val, 1, which.max)
cat("Confusion matrix:\n")
print(table(true = true, predicted = classes))

1.5 Save and reload

path <- tempfile(fileext = ".rds")
ggml_save_model(model, path)
cat(sprintf("Saved: %s (%.1f KB)\n", path, file.size(path) / 1024))

model2 <- ggml_load_model(path)
score2 <- ggml_evaluate(model2, x_val, y_val, batch_size = 32L)
cat(sprintf("Reloaded model accuracy: %.4f\n", score2$accuracy))

2. Functional API

The functional API lets you build DAGs with multiple inputs/branches/outputs. Use ggml_input() to declare inputs, pipe through layers, then call ggml_model().

2.1 Single-input DAG — residual-style network on iris

inp  <- ggml_input(shape = 4L)

h    <- inp |> ggml_layer_dense(32L, activation = "relu") |> ggml_layer_batch_norm()
h    <- h   |> ggml_layer_dense(16L, activation = "relu")
out  <- h   |> ggml_layer_dense(3L,  activation = "softmax")

model_fn <- ggml_model(inputs = inp, outputs = out) |>
  ggml_compile(optimizer = "adam", loss = "categorical_crossentropy",
               metrics = c("accuracy"))

model_fn <- ggml_fit(model_fn, x_tr, y_tr,
                     epochs = 100L, batch_size = 32L, verbose = 0L)

score_fn <- ggml_evaluate(model_fn, x_val, y_val, batch_size = 32L)
cat(sprintf("Functional model accuracy: %.4f\n", score_fn$accuracy))

2.2 Multi-input model — mtcars regression

Predict mpg from two input groups: engine features and transmission features.

data(mtcars)
set.seed(7)

# Input group 1: engine (disp, hp, wt)
# Input group 2: transmission / gearbox (cyl, gear, carb, am)
engine <- as.matrix(scale(mtcars[, c("disp","hp","wt")]))
trans  <- as.matrix(scale(mtcars[, c("cyl","gear","carb","am")]))
y_mpg  <- matrix(scale(mtcars$mpg), ncol = 1L)          # [32 x 1]

# small dataset — use all for training, evaluate on same data for demo
x1 <- engine;  x2 <- trans

inp1 <- ggml_input(shape = 3L, name = "engine")
inp2 <- ggml_input(shape = 4L, name = "transmission")

branch1 <- inp1 |> ggml_layer_dense(16L, activation = "relu")
branch2 <- inp2 |> ggml_layer_dense(16L, activation = "relu")

merged  <- ggml_layer_add(list(branch1, branch2))        # element-wise add
out_reg <- merged |>
  ggml_layer_dense(8L, activation = "relu") |>
  ggml_layer_dense(1L, activation = "linear")

model_reg <- ggml_model(inputs = list(inp1, inp2), outputs = out_reg) |>
  ggml_compile(optimizer = "adam", loss = "mse")

model_reg <- ggml_fit(model_reg,
                      x = list(x1, x2), y = y_mpg,
                      epochs = 200L, batch_size = 16L, verbose = 0L)

preds <- ggml_predict(model_reg, x = list(x1, x2), batch_size = 16L)
cat(sprintf("Pearson r (scaled mpg): %.4f\n", cor(preds, y_mpg)))

3. Callbacks

Callbacks plug into ggml_fit() via the callbacks argument.

3.1 Early stopping

cb_stop <- callback_early_stopping(
  monitor   = "val_loss",
  patience  = 15L,
  min_delta = 1e-4,
  restore_best_weights = TRUE
)

model_cb <- ggml_model_sequential() |>
  ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |>
  ggml_layer_dense(3L,  activation = "softmax") |>
  ggml_compile(optimizer = "adam", loss = "categorical_crossentropy")

model_cb <- ggml_fit(model_cb, x_tr, y_tr,
                     epochs           = 300L,
                     batch_size       = 32L,
                     validation_split = 0.1,
                     callbacks        = list(cb_stop),
                     verbose          = 1L)

3.2 Learning-rate schedulers

# Cosine annealing
cb_cosine <- callback_lr_cosine(T_max = 100L, lr_min = 1e-5)

# Step decay: halve LR every 30 epochs
cb_step <- callback_lr_step(step_size = 30L, gamma = 0.5)

# Reduce on plateau
cb_plateau <- callback_reduce_lr_on_plateau(
  monitor  = "val_loss",
  factor   = 0.5,
  patience = 10L,
  min_lr   = 1e-6
)

model_lr <- ggml_model_sequential() |>
  ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |>
  ggml_layer_dense(3L,  activation = "softmax") |>
  ggml_compile(optimizer = "adam", loss = "categorical_crossentropy",
               lr = 1e-3)

model_lr <- ggml_fit(model_lr, x_tr, y_tr,
                     epochs           = 150L,
                     batch_size       = 32L,
                     validation_split = 0.1,
                     callbacks        = list(cb_cosine),
                     verbose          = 0L)

4. Autograd Engine (ag_*)

The autograd engine provides PyTorch-style dynamic computation graphs. Tensors are column-major: shape is [features, batch].

4.1 Prepare data (iris, col-major)

# transpose: rows = features, cols = samples
x_tr_ag  <- t(x_tr)    # [4, 120]
y_tr_ag  <- t(y_tr)    # [3, 120]
x_val_ag <- t(x_val)   # [4, 30]
y_val_ag <- t(y_val)

4.2 Build a model with ag_sequential

ag_mod <- ag_sequential(
  ag_linear(4L,  32L, activation = "relu"),
  ag_batch_norm(32L),
  ag_dropout(0.3),
  ag_linear(32L, 16L, activation = "relu"),
  ag_linear(16L,  3L)
)

params <- ag_mod$parameters()
opt    <- optimizer_adam(params, lr = 1e-3)

4.3 Training loop

BS <- 32L
n  <- ncol(x_tr_ag)

ag_train(ag_mod)
set.seed(42)

for (ep in seq_len(150L)) {
  perm <- sample(n)
  for (b in seq_len(ceiling(n / BS))) {
    idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))]
    xb  <- ag_tensor(x_tr_ag[, idx, drop = FALSE])
    yb  <- y_tr_ag[, idx, drop = FALSE]

    with_grad_tape({
      loss <- ag_softmax_cross_entropy_loss(ag_mod$forward(xb), yb)
    })
    grads <- backward(loss)
    opt$step(grads)
    opt$zero_grad()
  }

  if (ep %% 50L == 0L)
    cat(sprintf("epoch %d  loss %.4f\n", ep, loss$data[1]))
}

4.4 Inference and accuracy

ag_eval(ag_mod)

# forward in chunks, apply softmax manually
ag_predict_cm <- function(model, x_cm, chunk = 64L) {
  n   <- ncol(x_cm)
  out <- matrix(0.0, nrow(model$forward(ag_tensor(x_cm[,1,drop=FALSE]))$data), n)
  for (s in seq(1L, n, by = chunk)) {
    e  <- min(s + chunk - 1L, n)
    lg <- model$forward(ag_tensor(x_cm[, s:e, drop = FALSE]))$data
    ev <- exp(lg - apply(lg, 2, max))
    out[, s:e] <- ev / colSums(ev)
  }
  out
}

probs_ag <- ag_predict_cm(ag_mod, x_val_ag)          # [3, 30]
pred_ag  <- apply(probs_ag, 2, which.max)
true_ag  <- apply(y_val_ag, 1, which.max)            # col-major: rows = classes
acc_ag   <- mean(pred_ag == true_ag)
cat(sprintf("Autograd val accuracy: %.4f\n", acc_ag))

4.5 LR scheduler + gradient clipping

ag_mod2 <- ag_sequential(
  ag_linear(4L,  64L, activation = "relu"),
  ag_batch_norm(64L),
  ag_dropout(0.3),
  ag_linear(64L, 32L, activation = "relu"),
  ag_linear(32L,  3L)
)
params2 <- ag_mod2$parameters()
opt2    <- optimizer_adam(params2, lr = 1e-3)
sch2    <- lr_scheduler_cosine(opt2, T_max = 150L, lr_min = 1e-5)

ag_train(ag_mod2)
set.seed(42)

for (ep in seq_len(150L)) {
  perm <- sample(n)
  for (b in seq_len(ceiling(n / BS))) {
    idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))]
    xb  <- ag_tensor(x_tr_ag[, idx, drop = FALSE])
    yb  <- y_tr_ag[, idx, drop = FALSE]

    with_grad_tape({
      loss2 <- ag_softmax_cross_entropy_loss(ag_mod2$forward(xb), yb)
    })
    grads2 <- backward(loss2)
    clip_grad_norm(params2, grads2, max_norm = 5.0)
    opt2$step(grads2)
    opt2$zero_grad()
  }
  sch2$step()
}

ag_eval(ag_mod2)
probs2 <- ag_predict_cm(ag_mod2, x_val_ag)
acc2   <- mean(apply(probs2, 2, which.max) == true_ag)
cat(sprintf("ag + cosine + clip  val accuracy: %.4f\n", acc2))

4.6 Manual model from raw ag_param

For full control, build a network directly from parameters:

make_net <- function(n_in, n_hidden, n_out) {
  W1 <- ag_param(matrix(rnorm(n_hidden * n_in) * sqrt(2/n_in),  n_hidden, n_in))
  b1 <- ag_param(matrix(0.0, n_hidden, 1L))
  W2 <- ag_param(matrix(rnorm(n_out * n_hidden) * sqrt(2/n_hidden), n_out, n_hidden))
  b2 <- ag_param(matrix(0.0, n_out, 1L))

  list(
    forward    = function(x)
      ag_add(ag_matmul(W2, ag_relu(ag_add(ag_matmul(W1, x), b1))), b2),
    parameters = function() list(W1=W1, b1=b1, W2=W2, b2=b2)
  )
}

set.seed(1)
net    <- make_net(4L, 32L, 3L)
opt_r  <- optimizer_adam(net$parameters(), lr = 1e-3)

for (ep in seq_len(200L)) {
  perm <- sample(n)
  for (b in seq_len(ceiling(n / BS))) {
    idx <- perm[seq((b-1L)*BS+1L, min(b*BS, n))]
    xb  <- ag_tensor(x_tr_ag[, idx, drop = FALSE])
    yb  <- y_tr_ag[, idx, drop = FALSE]
    with_grad_tape({ loss_r <- ag_softmax_cross_entropy_loss(net$forward(xb), yb) })
    gr <- backward(loss_r)
    opt_r$step(gr);  opt_r$zero_grad()
  }
}

probs_r <- ag_predict_cm(net, x_val_ag)
acc_r   <- mean(apply(probs_r, 2, which.max) == true_ag)
cat(sprintf("Raw ag_param val accuracy: %.4f\n", acc_r))

5. GPU / device selection

# Use Vulkan GPU if available, fall back to CPU
device <- tryCatch({
  ag_device("gpu")
  "gpu"
}, error = function(e) "cpu")

cat("Running on:", device, "\n")

# Mixed precision (f16 on GPU, f32 on CPU)
ag_dtype(if (device == "gpu") "f16" else "f32")

After calling ag_device() and ag_dtype(), all subsequent ag_param and ag_tensor calls use the selected device and dtype. The sequential API (ggml_fit) picks up the backend automatically.


API summary

Task Sequential / Functional Autograd
Build model ggml_model_sequential() + ggml_layer_* ag_sequential() or raw ag_param
Functional DAG ggml_input()ggml_model()
Compile ggml_compile() manual optimizer
Train ggml_fit() with_grad_tape + backward + opt$step
Evaluate ggml_evaluate() manual loop
Predict ggml_predict() manual forward
Save / load ggml_save_model() / ggml_load_model()
Callbacks callback_early_stopping(), callback_lr_* lr_scheduler_*, clip_grad_norm
Device automatic ag_device() / ag_dtype()

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.