The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
ggmlR provides a high-level Keras-like interface for building and
training neural networks on top of the ggml C tensor library. This
vignette walks through the main APIs using the built-in
iris and mtcars datasets.
The sequential API mirrors keras.Sequential: stack
layers with |>, compile, fit, evaluate, predict.
data(iris)
set.seed(42)
x <- as.matrix(iris[, 1:4])
x <- scale(x) # standardise
# one-hot encode species
y <- model.matrix(~ Species - 1, iris) # [150 x 3]
idx <- sample(nrow(x))
n_tr <- 120L
x_tr <- x[idx[1:n_tr], ]
y_tr <- y[idx[1:n_tr], ]
x_val <- x[idx[(n_tr+1):150], ]
y_val <- y[idx[(n_tr+1):150], ]model <- ggml_model_sequential() |>
ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |>
ggml_layer_batch_norm() |>
ggml_layer_dropout(0.3, stochastic = TRUE) |>
ggml_layer_dense(16L, activation = "relu") |>
ggml_layer_dense(3L, activation = "softmax") |>
ggml_compile(
optimizer = "adam",
loss = "categorical_crossentropy",
metrics = c("accuracy")
)
print(model)## ggmlR Sequential Model
## ============================================================
## Layer Output Shape Params
## ------------------------------------------------------------
## dense (32) 160
## batch_norm (32) 64
## dropout (32) 0
## dense (16) 528
## dense (3) 51
## ============================================================
## Total parameters: 803
## Compiled: yes
model <- ggml_fit(
model,
x_tr, y_tr,
epochs = 100L,
batch_size = 32L,
validation_split = 0.0, # we supply our own val set below
verbose = 0L
)## Note: dropping last 24 sample(s) (120 -> 96) because batch_size=32 must divide evenly. Training metrics are computed on 96 samples only.
score <- ggml_evaluate(model, x_val, y_val, batch_size = 16L)
cat(sprintf("Val loss: %.4f Val accuracy: %.4f\n", score$loss, score$accuracy))## Val loss: 0.1843 Val accuracy: 0.9333
probs <- ggml_predict(model, x_val, batch_size = 16L)
classes <- apply(probs, 1, which.max)
true <- apply(y_val, 1, which.max)
cat("Confusion matrix:\n")## Confusion matrix:
## predicted
## true 1 2 3
## 1 9 0 0
## 2 0 10 1
## 3 0 1 9
path <- tempfile(fileext = ".rds")
ggml_save_model(model, path)
cat(sprintf("Saved: %s (%.1f KB)\n", path, file.size(path) / 1024))## Saved: /tmp/Rtmp4Cr0gw/file8d47e44a1d6d2.rds (4.3 KB)
model2 <- ggml_load_model(path)
score2 <- ggml_evaluate(model2, x_val, y_val, batch_size = 16L)
cat(sprintf("Reloaded model accuracy: %.4f\n", score2$accuracy))## Reloaded model accuracy: 0.9333
The functional API lets you build DAGs with multiple
inputs/branches/outputs. Use ggml_input() to declare
inputs, pipe through layers, then call ggml_model().
inp <- ggml_input(shape = 4L)
h <- inp |> ggml_layer_dense(32L, activation = "relu") |> ggml_layer_batch_norm()
h <- h |> ggml_layer_dense(16L, activation = "relu")
out <- h |> ggml_layer_dense(3L, activation = "softmax")
model_fn <- ggml_model(inputs = inp, outputs = out) |>
ggml_compile(optimizer = "adam", loss = "categorical_crossentropy",
metrics = c("accuracy"))
model_fn <- ggml_fit(model_fn, x_tr, y_tr,
epochs = 100L, batch_size = 32L, verbose = 0L)## Truncating data from 120 to 96 samples (batch_size=32 must divide evenly)
score_fn <- ggml_evaluate(model_fn, x_val, y_val, batch_size = 16L)
cat(sprintf("Functional model accuracy: %.4f\n", score_fn$accuracy))## Functional model accuracy: 0.9333
Predict mpg from two input groups: engine features and
transmission features.
data(mtcars)
set.seed(7)
# Input group 1: engine (disp, hp, wt)
# Input group 2: transmission / gearbox (cyl, gear, carb, am)
engine <- as.matrix(scale(mtcars[, c("disp","hp","wt")]))
trans <- as.matrix(scale(mtcars[, c("cyl","gear","carb","am")]))
y_mpg <- matrix(scale(mtcars$mpg), ncol = 1L) # [32 x 1]
# small dataset — use all for training, evaluate on same data for demo
x1 <- engine; x2 <- trans
inp1 <- ggml_input(shape = 3L, name = "engine")
inp2 <- ggml_input(shape = 4L, name = "transmission")
branch1 <- inp1 |> ggml_layer_dense(16L, activation = "relu")
branch2 <- inp2 |> ggml_layer_dense(16L, activation = "relu")
merged <- ggml_layer_add(list(branch1, branch2)) # element-wise add
out_reg <- merged |>
ggml_layer_dense(8L, activation = "relu") |>
ggml_layer_dense(1L)
model_reg <- ggml_model(inputs = list(inp1, inp2), outputs = out_reg) |>
ggml_compile(optimizer = "adam", loss = "mse")
model_reg <- ggml_fit(model_reg,
x = list(x1, x2), y = y_mpg,
epochs = 200L, batch_size = 16L, verbose = 0L)
preds <- ggml_predict(model_reg, x = list(x1, x2), batch_size = 16L)
cat(sprintf("Pearson r (scaled mpg): %.4f\n", cor(preds, y_mpg)))## Pearson r (scaled mpg): 0.9070
Callbacks plug into ggml_fit() via the
callbacks argument.
cb_stop <- ggml_callback_early_stopping(
monitor = "val_loss",
patience = 15L,
min_delta = 1e-4
)
model_cb <- ggml_model_sequential() |>
ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |>
ggml_layer_dense(3L, activation = "softmax") |>
ggml_compile(optimizer = "adam", loss = "categorical_crossentropy")
model_cb <- ggml_fit(model_cb, x_tr, y_tr,
epochs = 300L,
batch_size = 32L,
validation_split = 0.1,
callbacks = list(cb_stop),
verbose = 0L)## Note: dropping last 24 sample(s) (120 -> 96) because batch_size=32 must divide evenly. Training metrics are computed on 96 samples only.
# Cosine annealing
cb_cosine <- ggml_schedule_cosine_decay(T_max = 100L, eta_min = 1e-5)
# Step decay: halve LR every 30 epochs
cb_step <- ggml_schedule_step_decay(step_size = 30L, gamma = 0.5)
# Reduce on plateau
cb_plateau <- ggml_schedule_reduce_on_plateau(
monitor = "val_loss",
factor = 0.5,
patience = 10L,
min_lr = 1e-6
)
model_lr <- ggml_model_sequential() |>
ggml_layer_dense(32L, activation = "relu", input_shape = 4L) |>
ggml_layer_dense(3L, activation = "softmax") |>
ggml_compile(optimizer = "adam", loss = "categorical_crossentropy")
model_lr <- ggml_fit(model_lr, x_tr, y_tr,
epochs = 150L,
batch_size = 32L,
validation_split = 0.1,
callbacks = list(cb_cosine),
verbose = 0L)## Note: dropping last 24 sample(s) (120 -> 96) because batch_size=32 must divide evenly. Training metrics are computed on 96 samples only.
ag_*)The autograd engine provides PyTorch-style dynamic computation
graphs. Tensors are column-major: shape is
[features, batch].
ag_sequentialBS <- 32L
n <- ncol(x_tr_ag)
ag_train(ag_mod)
set.seed(42)
for (ep in seq_len(150L)) {
perm <- sample(n)
for (b in seq_len(ceiling(n / BS))) {
idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))]
xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE])
yb <- y_tr_ag[, idx, drop = FALSE]
with_grad_tape({
loss <- ag_softmax_cross_entropy_loss(ag_mod$forward(xb), yb)
})
grads <- backward(loss)
opt$step(grads)
opt$zero_grad()
}
if (ep %% 50L == 0L)
cat(sprintf("epoch %d loss %.4f\n", ep, loss$data[1]))
}## epoch 50 loss 0.2152
## epoch 100 loss 0.0587
## epoch 150 loss 0.0081
ag_eval(ag_mod)
# forward in chunks, apply softmax manually
ag_predict_cm <- function(model, x_cm, chunk = 64L) {
n <- ncol(x_cm)
out <- matrix(0.0, nrow(model$forward(ag_tensor(x_cm[,1,drop=FALSE]))$data), n)
for (s in seq(1L, n, by = chunk)) {
e <- min(s + chunk - 1L, n)
lg <- model$forward(ag_tensor(x_cm[, s:e, drop = FALSE]))$data
ev <- exp(lg - apply(lg, 2, max))
out[, s:e] <- ev / colSums(ev)
}
out
}
probs_ag <- ag_predict_cm(ag_mod, x_val_ag) # [3, 30]
pred_ag <- apply(probs_ag, 2, which.max)
true_ag <- apply(y_val_ag, 1, which.max) # col-major: rows = classes
acc_ag <- mean(pred_ag == true_ag)
cat(sprintf("Autograd val accuracy: %.4f\n", acc_ag))## Autograd val accuracy: 0.1000
ag_mod2 <- ag_sequential(
ag_linear(4L, 64L, activation = "relu"),
ag_batch_norm(64L),
ag_dropout(0.3),
ag_linear(64L, 32L, activation = "relu"),
ag_linear(32L, 3L)
)
params2 <- ag_mod2$parameters()
opt2 <- optimizer_adam(params2, lr = 1e-3)
sch2 <- lr_scheduler_cosine(opt2, T_max = 150L, lr_min = 1e-5)
ag_train(ag_mod2)
set.seed(42)
for (ep in seq_len(150L)) {
perm <- sample(n)
for (b in seq_len(ceiling(n / BS))) {
idx <- perm[seq((b-1L)*BS + 1L, min(b*BS, n))]
xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE])
yb <- y_tr_ag[, idx, drop = FALSE]
with_grad_tape({
loss2 <- ag_softmax_cross_entropy_loss(ag_mod2$forward(xb), yb)
})
grads2 <- backward(loss2)
clip_grad_norm(params2, grads2, max_norm = 5.0)
opt2$step(grads2)
opt2$zero_grad()
}
sch2$step()
}
ag_eval(ag_mod2)
probs2 <- ag_predict_cm(ag_mod2, x_val_ag)
acc2 <- mean(apply(probs2, 2, which.max) == true_ag)
cat(sprintf("ag + cosine + clip val accuracy: %.4f\n", acc2))## ag + cosine + clip val accuracy: 0.1000
ag_paramFor full control, build a network directly from parameters:
make_net <- function(n_in, n_hidden, n_out) {
W1 <- ag_param(matrix(rnorm(n_hidden * n_in) * sqrt(2/n_in), n_hidden, n_in))
b1 <- ag_param(matrix(0.0, n_hidden, 1L))
W2 <- ag_param(matrix(rnorm(n_out * n_hidden) * sqrt(2/n_hidden), n_out, n_hidden))
b2 <- ag_param(matrix(0.0, n_out, 1L))
list(
forward = function(x)
ag_add(ag_matmul(W2, ag_relu(ag_add(ag_matmul(W1, x), b1))), b2),
parameters = function() list(W1=W1, b1=b1, W2=W2, b2=b2)
)
}
set.seed(1)
net <- make_net(4L, 32L, 3L)
opt_r <- optimizer_adam(net$parameters(), lr = 1e-3)
for (ep in seq_len(200L)) {
perm <- sample(n)
for (b in seq_len(ceiling(n / BS))) {
idx <- perm[seq((b-1L)*BS+1L, min(b*BS, n))]
xb <- ag_tensor(x_tr_ag[, idx, drop = FALSE])
yb <- y_tr_ag[, idx, drop = FALSE]
with_grad_tape({ loss_r <- ag_softmax_cross_entropy_loss(net$forward(xb), yb) })
gr <- backward(loss_r)
opt_r$step(gr); opt_r$zero_grad()
}
}
probs_r <- ag_predict_cm(net, x_val_ag)
acc_r <- mean(apply(probs_r, 2, which.max) == true_ag)
cat(sprintf("Raw ag_param val accuracy: %.4f\n", acc_r))## Raw ag_param val accuracy: 0.1333
# Use Vulkan GPU if available, fall back to CPU
device <- tryCatch({
ag_device("gpu")
"gpu"
}, error = function(e) "cpu")
cat("Running on:", device, "\n")## Running on: gpu
After calling ag_device() and ag_dtype(),
all subsequent ag_param and ag_tensor calls
use the selected device and dtype. The sequential API
(ggml_fit) picks up the backend automatically.
| Task | Sequential / Functional | Autograd |
|---|---|---|
| Build model | ggml_model_sequential() +
ggml_layer_* |
ag_sequential() or raw ag_param |
| Functional DAG | ggml_input() → ggml_model() |
— |
| Compile | ggml_compile() |
manual optimizer |
| Train | ggml_fit() |
with_grad_tape + backward +
opt$step |
| Evaluate | ggml_evaluate() |
manual loop |
| Predict | ggml_predict() |
manual forward |
| Save / load | ggml_save_model() / ggml_load_model() |
— |
| Callbacks | ggml_callback_early_stopping(),
ggml_schedule_* |
lr_scheduler_*, clip_grad_norm |
| Device | automatic | ag_device() / ag_dtype() |
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.