The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Compilation and Call Overhead

This article measures two different costs:

The comparison target is the callme package, which builds ordinary .Call() entry points with R CMD SHLIB. That means it goes through the platform compiler toolchain (gcc/clang on the usual Unix-like targets), so we should expect stronger optimization than TinyCC for steady-state machine code. That does not make the comparison useless, but it does mean the runtime results combine two effects:

The point is not that the two packages expose identical APIs. They do not. Instead, the comparison asks a narrower question:

Three Minimal Cases

We use three small workloads:

The fill_rand() case is the fairer array-oriented comparison:

The rand_unif() case intentionally stresses the extra copy path:

#include <R.h>
#include <Rinternals.h>
#include <Rmath.h>
#include <stdlib.h>

void noop(void) {}

void fill_rand(double* out, int n) {
  if (n < 0) {
    Rf_error("n must be non-negative");
  }

  GetRNGstate();
  for (int i = 0; i < n; ++i) {
    out[i] = unif_rand();
  }
  PutRNGstate();
}

double* rand_unif(int n) {
  if (n < 0) {
    Rf_error("n must be non-negative");
  }
  if (n == 0) {
    return (double*) malloc(sizeof(double));
  }

  double *out = (double*) malloc(sizeof(double) * (size_t) n);
  if (!out) {
    Rf_error("malloc failed");
  }

  GetRNGstate();
  for (int i = 0; i < n; ++i) {
    out[i] = unif_rand();
  }
  PutRNGstate();
  return out;
}
Click to show R code
rtinycc_code <- "#include <R.h>\n#include <Rinternals.h>\n#include <Rmath.h>\n#include <stdlib.h>\n\nvoid noop(void) {}\n\nvoid fill_rand(double* out, int n) {\n  if (n < 0) {\n    Rf_error(\"n must be non-negative\");\n  }\n\n  GetRNGstate();\n  for (int i = 0; i < n; ++i) {\n    out[i] = unif_rand();\n  }\n  PutRNGstate();\n}\n\ndouble* rand_unif(int n) {\n  if (n < 0) {\n    Rf_error(\"n must be non-negative\");\n  }\n  if (n == 0) {\n    return (double*) malloc(sizeof(double));\n  }\n\n  double *out = (double*) malloc(sizeof(double) * (size_t) n);\n  if (!out) {\n    Rf_error(\"malloc failed\");\n  }\n\n  GetRNGstate();\n  for (int i = 0; i < n; ++i) {\n    out[i] = unif_rand();\n  }\n  PutRNGstate();\n  return out;\n}"
#include <R.h>
#include <Rinternals.h>
#include <Rmath.h>

SEXP noop(void) {
  return R_NilValue;
}

SEXP fill_rand(SEXP out_, SEXP n_) {
  int n = asInteger(n_);
  if (n < 0) {
    Rf_error("n must be non-negative");
  }

  if (TYPEOF(out_) != REALSXP) {
    Rf_error("out must be a numeric vector");
  }

  if (XLENGTH(out_) < n) {
    Rf_error("out is shorter than n");
  }

  double *out = REAL(out_);
  GetRNGstate();
  for (int i = 0; i < n; ++i) {
    out[i] = unif_rand();
  }
  PutRNGstate();

  return out_;
}

SEXP rand_unif(SEXP n_) {
  int n = asInteger(n_);
  if (n < 0) {
    Rf_error("n must be non-negative");
  }

  SEXP out = PROTECT(allocVector(REALSXP, n));
  double *ptr = REAL(out);

  GetRNGstate();
  for (int i = 0; i < n; ++i) {
    ptr[i] = unif_rand();
  }
  PutRNGstate();

  UNPROTECT(1);
  return out;
}
Click to show R code
callme_code <- "#include <R.h>\n#include <Rinternals.h>\n#include <Rmath.h>\n\nSEXP noop(void) {\n  return R_NilValue;\n}\n\nSEXP fill_rand(SEXP out_, SEXP n_) {\n  int n = asInteger(n_);\n  if (n < 0) {\n    Rf_error(\"n must be non-negative\");\n  }\n\n  if (TYPEOF(out_) != REALSXP) {\n    Rf_error(\"out must be a numeric vector\");\n  }\n\n  if (XLENGTH(out_) < n) {\n    Rf_error(\"out is shorter than n\");\n  }\n\n  double *out = REAL(out_);\n  GetRNGstate();\n  for (int i = 0; i < n; ++i) {\n    out[i] = unif_rand();\n  }\n  PutRNGstate();\n\n  return out_;\n}\n\nSEXP rand_unif(SEXP n_) {\n  int n = asInteger(n_);\n  if (n < 0) {\n    Rf_error(\"n must be non-negative\");\n  }\n\n  SEXP out = PROTECT(allocVector(REALSXP, n));\n  double *ptr = REAL(out);\n\n  GetRNGstate();\n  for (int i = 0; i < n; ++i) {\n    ptr[i] = unif_rand();\n  }\n  PutRNGstate();\n\n  UNPROTECT(1);\n  return out;\n}"
build_rtinycc_module <- function() {
  tcc_ffi() |>
    tcc_source(rtinycc_code) |>
    tcc_bind(
      noop = list(args = list(), returns = "void"),
      fill_rand = list(args = list("numeric_array", "i32"), returns = "void"),
      rand_unif = list(
        args = list("i32"),
        returns = list(type = "numeric_array", length_arg = 1, free = TRUE)
      )
    ) |>
    tcc_compile()
}

build_callme_module <- function() {
  before <- names(getLoadedDLLs())
  mod <- callme::compile(callme_code, env = NULL, verbosity = 0)
  dlls <- getLoadedDLLs()
  new_names <- setdiff(names(dlls), before)
  new_names <- new_names[startsWith(new_names, "callme_")]
  attr(mod, "dll_paths") <- unname(vapply(
    dlls[new_names],
    function(x) x[["path"]],
    character(1)
  ))
  mod
}

unload_callme_dlls <- function(dll_paths) {
  dll_paths <- rev(unique(dll_paths))
  if (is.null(dll_paths) || !length(dll_paths)) {
    return(invisible(NULL))
  }
  for (dll_path in dll_paths) {
    if (is.character(dll_path) && nzchar(dll_path) && file.exists(dll_path)) {
      try(dyn.unload(dll_path), silent = TRUE)
    }
  }
  invisible(NULL)
}

build_and_dispose_callme_module <- function() {
  mod <- build_callme_module()
  dll_paths <- attr(mod, "dll_paths", exact = TRUE)
  rm(mod)
  gc()
  unload_callme_dlls(dll_paths)
  invisible(NULL)
}

callme_runtime_reason <- NULL
can_run_callme <- FALSE

if (!has_callme) {
  callme_runtime_reason <- "`callme` is not installed."
} else if (.Platform$OS.type == "windows") {
  callme_runtime_reason <- paste(
    "`callme` comparisons are skipped on Windows during vignette builds",
    "because the helper DLL compilation step is not reliable in CI."
  )
} else {
  callme_probe <- tryCatch(
    {
      build_and_dispose_callme_module()
      NULL
    },
    error = identity
  )

  if (inherits(callme_probe, "error")) {
    callme_runtime_reason <- paste(
      "`callme` comparisons were skipped because runtime compilation failed:",
      conditionMessage(callme_probe)
    )
  } else {
    can_run_callme <- TRUE
  }
}

can_run_benchmarks <- can_run_callme && has_bench && has_profmem

if (is.null(callme_runtime_reason) && !has_bench) {
  callme_runtime_reason <- "`bench` is not installed."
} else if (is.null(callme_runtime_reason) && !has_profmem) {
  callme_runtime_reason <- paste(
    "`bench` runtime comparisons are skipped because memory profiling",
    "is not available in this R build."
  )
} else if (is.null(callme_runtime_reason)) {
  callme_runtime_reason <- "Executable comparisons are enabled."
}

with_benchmark_modules <- function(fun) {
  rt_mod <- build_rtinycc_module()
  cm_mod <- build_callme_module()
  dll_paths <- attr(cm_mod, "dll_paths", exact = TRUE)

  on.exit({
    rm(rt_mod, cm_mod)
    gc()
    unload_callme_dlls(dll_paths)
  }, add = TRUE)

  fun(rt_mod, cm_mod)
}

median_elapsed <- function(expr, times = 3L) {
  expr <- substitute(expr)
  env <- parent.frame()
  stats::median(replicate(
    times,
    {
      gc()
      t0 <- proc.time()[["elapsed"]]
      eval(expr, envir = env)
      proc.time()[["elapsed"]] - t0
    }
  ))
}

run_noop <- function(fun, n) {
  for (i in seq_len(n)) {
    fun()
  }
  invisible(NULL)
}

run_rand <- function(fun, n, reps) {
  for (i in seq_len(reps)) {
    invisible(fun(n))
  }
  invisible(NULL)
}

run_fill <- function(fun, n, reps) {
  for (i in seq_len(reps)) {
    out <- numeric(n)
    invisible(fun(out, n))
  }
  invisible(NULL)
}

rtinycc_recipe <- tcc_ffi() |>
  tcc_source(rtinycc_code) |>
  tcc_bind(
    noop = list(args = list(), returns = "void"),
    fill_rand = list(args = list("numeric_array", "i32"), returns = "void"),
    rand_unif = list(
      args = list("i32"),
      returns = list(type = "numeric_array", length_arg = 1, free = TRUE)
    )
  )

generated_code <- Rtinycc:::generate_ffi_code(
  symbols = rtinycc_recipe$symbols,
  headers = rtinycc_recipe$headers,
  c_code = rtinycc_recipe$c_code,
  is_external = FALSE,
  structs = rtinycc_recipe$structs,
  unions = rtinycc_recipe$unions,
  enums = rtinycc_recipe$enums,
  globals = rtinycc_recipe$globals,
  container_of = rtinycc_recipe$container_of,
  field_addr = rtinycc_recipe$field_addr,
  struct_raw_access = rtinycc_recipe$struct_raw_access,
  introspect = rtinycc_recipe$introspect
)

Availability

has_callme
#> [1] TRUE

If callme, bench, or R memory profiling is unavailable, or if the current build environment cannot compile the temporary callme helper DLL, the executable comparisons below are skipped.

has_bench
#> [1] TRUE
has_profmem
#> [1] TRUE
can_run_callme
#> [1] TRUE
can_run_benchmarks
#> [1] TRUE

Current comparison status:

callme_runtime_reason
#> [1] "Executable comparisons are enabled."

Compilation Latency

This measures module build time, not call time.

compile_times <- data.frame(
  implementation = c("Rtinycc", "callme"),
  seconds = c(
    median_elapsed(build_rtinycc_module(), times = 3L),
    median_elapsed(build_and_dispose_callme_module(), times = 3L)
  )
)

compile_times$milliseconds <- round(compile_times$seconds * 1000, 1)
compile_times
#>   implementation seconds milliseconds
#> 1        Rtinycc   0.012           12
#> 2         callme   0.295          295

The expected pattern is:

Generated Wrapper Code

The generated code makes the extra return-path work explicit. In particular, the rand_unif() wrapper allocates an R vector, memcpy()s the native double* buffer into it, then free()s the original buffer. In contrast, fill_rand() uses the borrowed numeric_array input path.

Rtinycc:::rtinycc_c_block(generated_code)
/* TinyCC workaround: _Complex not supported */
#define _Complex

#include <R.h>
#include <Rinternals.h>
#ifndef STRING_PTR_RO
#define STRING_PTR_RO STRING_PTR
#endif
void RC_free_finalizer(SEXP ext);
void RC_owned_native_finalizer(SEXP ext);
SEXP RC_make_borrowed_view(void *ptr, SEXP tag, SEXP owner);
SEXP RC_make_unowned_ptr(void *ptr, SEXP tag);
SEXP RC_make_owned_ptr(void *ptr, SEXP tag);
SEXP RC_make_owned_composite_ptr(void *ptr, SEXP tag);

#include <stdint.h>
#include <stdbool.h>
#include <stddef.h>
#include <limits.h>
#include <math.h>
#include <string.h>

/* User code */
#include <R.h>
#include <Rinternals.h>
#include <Rmath.h>
#include <stdlib.h>

void noop(void) {}

void fill_rand(double* out, int n) {
  if (n < 0) {
    Rf_error("n must be non-negative");
  }

  GetRNGstate();
  for (int i = 0; i < n; ++i) {
    out[i] = unif_rand();
  }
  PutRNGstate();
}

double* rand_unif(int n) {
  if (n < 0) {
    Rf_error("n must be non-negative");
  }
  if (n == 0) {
    return (double*) malloc(sizeof(double));
  }

  double *out = (double*) malloc(sizeof(double) * (size_t) n);
  if (!out) {
    Rf_error("malloc failed");
  }

  GetRNGstate();
  for (int i = 0; i < n; ++i) {
    out[i] = unif_rand();
  }
  PutRNGstate();
  return out;
}

/* R callable wrappers for bound symbols */
SEXP R_wrap_noop(void) {
  // No arguments

  // Call and return
   noop();
     return R_NilValue;
}


SEXP R_wrap_fill_rand(SEXP arg1_, SEXP arg2_) {
  if (TYPEOF(arg1_) != REALSXP) Rf_error("expected numeric vector for argument 'arg1'");
  double* arg1 = REAL(arg1_);
  int _arg2 = asInteger(arg2_);
  if (_arg2 == NA_INTEGER) Rf_error("integer value is NA");
  if (_arg2 < INT32_MIN || _arg2 > INT32_MAX) Rf_error("i32 out of range");
  int32_t arg2 = (int32_t)_arg2;

  // Call and return
   fill_rand(arg1, arg2);
     return R_NilValue;
}


SEXP R_wrap_rand_unif(SEXP arg1_) {
  int _arg1 = asInteger(arg1_);
  if (_arg1 == NA_INTEGER) Rf_error("integer value is NA");
  if (_arg1 < INT32_MIN || _arg1 > INT32_MAX) Rf_error("i32 out of range");
  int32_t arg1 = (int32_t)_arg1;

  // Call and return
   double* __rtinycc_ret = rand_unif(arg1);
   if (!__rtinycc_ret) return R_NilValue;
   SEXP out = PROTECT(allocVector(REALSXP, arg1));
     if (arg1 > 0) memcpy(REAL(out), __rtinycc_ret, sizeof(double) * arg1);
     if (__rtinycc_ret) free(__rtinycc_ret);
     UNPROTECT(1);
     return out;
}

noop() Call Overhead

This is the smallest useful call path. It approximates the lower bound on call overhead above a plain .Call() entry point.

noop_bench <- with_benchmark_modules(function(rt_mod, cm_mod) {
  n_noop <- 1000L

  bench::mark(
    Rtinycc = run_noop(rt_mod$noop, n_noop),
    callme = run_noop(cm_mod$noop, n_noop),
    iterations = 20,
    check = TRUE,
    memory = has_profmem,
    filter_gc = FALSE
  )
})

noop_bench
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 Rtinycc       618µs    690µs     1444.      13KB        0
#> 2 callme        240µs    244µs     4039.        0B        0

Interpretation:

fill_rand(out, n) And Zero-Copy Arrays

This is the fairer vector comparison because both implementations fill an existing R numeric vector instead of returning a newly allocated result.

fill_bench_n4096 <- with_benchmark_modules(function(rt_mod, cm_mod) {
  bench::mark(
    Rtinycc = run_fill(rt_mod$fill_rand, 4096L, 100L),
    callme = run_fill(cm_mod$fill_rand, 4096L, 100L),
    iterations = 20,
    check = FALSE,
    memory = has_profmem,
    filter_gc = FALSE
  )
})

fill_bench_n4096
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 Rtinycc      1.66ms   2.48ms      361.    3.15MB     18.1
#> 2 callme       1.32ms   1.38ms      628.    3.13MB     31.4

Interpretation:

rand_unif(n) And Copy Cost

Here the implementation work is still small, but the return path differs:

We time both a tiny and a larger return size.

rand_results <- with_benchmark_modules(function(rt_mod, cm_mod) {
  rand_bench_n1 <- bench::mark(
    Rtinycc = run_rand(rt_mod$rand_unif, 1L, 1000L),
    callme = run_rand(cm_mod$rand_unif, 1L, 1000L),
    iterations = 20,
    check = FALSE,
    memory = has_profmem,
    filter_gc = FALSE
  )

  rand_bench_n4096 <- bench::mark(
    Rtinycc = run_rand(rt_mod$rand_unif, 4096L, 100L),
    callme = run_rand(cm_mod$rand_unif, 4096L, 100L),
    iterations = 20,
    check = FALSE,
    memory = has_profmem,
    filter_gc = FALSE
  )

  list(rand_bench_n1 = rand_bench_n1, rand_bench_n4096 = rand_bench_n4096)
})

rand_results$rand_bench_n1
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 Rtinycc       889µs   1.07ms      825.    15.4KB     41.3
#> 2 callme        472µs 507.31µs     1873.        0B      0
rand_results$rand_bench_n4096
#> # A tibble: 2 × 6
#>   expression      min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr> <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 Rtinycc      1.74ms    2.4ms      379.    3.13MB     18.9
#> 2 callme       1.11ms   1.99ms      462.    3.13MB     23.1

The usual pattern is:

What These Numbers Mean

The benchmark gives a reasonable mental model:

So the package is usually strongest when:

It is less ideal when:

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.