From raw data to model with tidyILD

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

tidyILD authors

2026-04-17

This vignette walks through the full tidyILD pipeline: prepare data, inspect structure, apply within-between decomposition and lags, fit a mixed-effects model, and run diagnostics and plots.

For which temporal assumptions (lags in the mean, residual AR, time-varying effects, state-space) fit your question, see vignette("temporal-dynamics-model-choice", package = "tidyILD").

Simulate and prepare

library(tidyILD)
# Simulate simple ILD
d <- ild_simulate(n_id = 10, n_obs_per = 12, irregular = TRUE, seed = 42)
# Prepare: encode time structure and add .ild_* columns
x <- ild_prepare(d, id = "id", time = "time", gap_threshold = 7200)

Inspect

ild_summary(x)
#> $summary
#> # A tibble: 1 × 7
#>    n_id n_obs time_min time_max prop_gap median_dt_sec iqr_dt_sec
#>   <int> <int>    <dbl>    <dbl>    <dbl>         <dbl>      <dbl>
#> 1    10   120        0   40136.        0         3612.       640.
#> 
#> $n_units
#> [1] 10
#> 
#> $n_obs
#> [1] 120
#> 
#> $time_range
#> [1]     0 40136
#> 
#> $spacing
#> $spacing$median_dt
#> [1] 3612.244
#> 
#> $spacing$iqr_dt
#> [1] 640.2958
#> 
#> $spacing$n_intervals
#> [1] 110
#> 
#> $spacing$pct_gap
#> [1] 0
#> 
#> $spacing$by_id
#> # A tibble: 10 × 5
#>       id median_dt iqr_dt n_intervals pct_gap
#>    <int>     <dbl>  <dbl>       <int>   <dbl>
#>  1     1     3627.   549.          11       0
#>  2     2     3702.   854.          11       0
#>  3     3     3617.   740.          11       0
#>  4     4     3321.   702.          11       0
#>  5     5     3616.   588.          11       0
#>  6     6     3438.   481.          11       0
#>  7     7     3767.   816.          11       0
#>  8     8     3591.   499.          11       0
#>  9     9     3609.   194.          11       0
#> 10    10     3700.   521.          11       0
#> 
#> 
#> $n_gaps
#> [1] 0
#> 
#> $pct_gap
#> [1] 0
ild_spacing_class(x)
#> [1] "regular-ish"

Within-person centering and lags

x <- ild_center(x, y)
x <- ild_lag(x, y, mode = "gap_aware", max_gap = 7200)

Fit a model

Without residual autocorrelation (lmer):

fit0 <- ild_lme(y ~ 1 + (1 | id), data = x, ar1 = FALSE, warn_no_ar1 = FALSE)

With AR1 residual correlation (nlme):

fit1 <- ild_lme(y ~ 1, data = x, ar1 = TRUE, correlation_class = "CAR1")

Diagnostics and plots

diag <- ild_diagnostics(fit1, data = x)
names(diag)  # meta, data, stats
#> [1] "meta"  "data"  "stats"
names(plot_ild_diagnostics(diag))  # plot names for requested types
#> [1] "residual_acf"        "residuals_vs_fitted" "residuals_vs_time"  
#> [4] "qq"
# Pooled residual ACF (tibble)
head(diag$stats$acf$pooled)
#> # A tibble: 6 × 2
#>     lag      acf
#>   <dbl>    <dbl>
#> 1     0  1      
#> 2     1  0.121  
#> 3     2 -0.00832
#> 4     3 -0.113  
#> 5     4 -0.0886 
#> 6     5 -0.147
# By-id ACF when by_id = TRUE: one tibble per person
names(diag$stats$acf$by_id)
#>  [1] "1"  "2"  "3"  "4"  "5"  "6"  "7"  "8"  "9"  "10"
head(diag$stats$acf$by_id[[1]])
#> # A tibble: 6 × 2
#>     lag     acf
#>   <dbl>   <dbl>
#> 1     0  1     
#> 2     1  0.361 
#> 3     2 -0.0851
#> 4     3  0.149 
#> 5     4  0.196 
#> 6     5  0.0752

ild_plot(x, type = "trajectory", var = "y", max_ids = 5)

Trajectory plot

ild_plot(fit1, type = "fitted")

Fitted vs observed

MSM-style weights (IPTW and IPCW) — optional

For a marginal structural model–style sensitivity analysis, you can build inverse probability of treatment weights: either pooled (ild_iptw_weights()) or sequential MSM for time-varying A_t (ild_iptw_msm_weights() after building history with ild_lag()), then inverse probability of censoring weights for monotone dropout (ild_ipcw_weights()), multiply into a joint analysis weight with ild_joint_msm_weights(), and refit with ild_ipw_refit(). This path targets treatment assignment and loss-to-follow-up; it is distinct from outcome missingness IPW (ild_missing_model() + ild_ipw_weights()). Assumptions (positivity, correct models) are still yours; for uncertainty with estimated weights, use ild_msm_bootstrap() (see ?ild_msm_inference) instead of trusting default lmer SEs alone—weight_policy = "reestimate_weights" with a weights_fn that rebuilds IPW on each bootstrap resample is often the appropriate choice when you want first-stage uncertainty reflected; fixed_weights resamples clusters but keeps the weights attached to resampled rows (faster, approximate).

# Example skeleton (not run in the vignette build)
x2 <- ild_simulate(n_id = 12, n_obs_per = 10, seed = 1)
x2$stress <- rnorm(nrow(x2))
x2$trt <- rbinom(nrow(x2), 1L, 0.45)
x2 <- ild_prepare(x2, id = "id", time = "time")
x2 <- ild_center(x2, y)
x2 <- ild_iptw_weights(x2, treatment = "trt", predictors = "stress")
# Sequential A_t: x2 <- ild_lag(x2, stress); x2 <- ild_lag(x2, trt); ...
# x2 <- ild_iptw_msm_weights(x2, treatment = "trt", history = ~ stress_lag1 + trt_lag1)
x2 <- ild_ipcw_weights(x2, predictors = "stress")
x2 <- ild_joint_msm_weights(x2)
fit_msm <- ild_lme(y ~ y_bp + y_wp + stress + (1 | id), data = x2,
  ar1 = FALSE, warn_no_ar1 = FALSE, warn_uncentered = FALSE)
fit_msm_w <- ild_ipw_refit(fit_msm, data = x2)

Balance and overlap: After weights are attached, use ild_msm_balance() (weighted SMDs), ild_ipw_ess() (Kish effective sample size), and ild_msm_overlap_plot() (propensity densities by treatment—pooled IPTW or sequential MSM via attr(x, "ild_iptw_msm_fits")). Optional: ild_diagnose(..., balance = TRUE, balance_treatment = "trt", balance_covariates = c("stress")) fills causal$balance and may trigger guardrails GR_MSM_BALANCE_SMD_HIGH / GR_MSM_ESS_LOW. ild_autoplot(bundle, section = "causal", type = "overlap", treatment = "trt") draws overlap when ggplot2 is available.

For a full assumptions-oriented walkthrough (exchangeability, positivity/overlap, consistency, and weight-model correctness) and simulation-based recovery checks, see vignette("msm-identification-and-recovery", package = "tidyILD"). When using ild_msm_fit(), check fit_obj$inference$status/reason for degraded paths, or set strict_inference = TRUE to fail fast.

A minimal bootstrap illustration (small n_boot for speed; increase in real analyses):

set.seed(3)
xb <- ild_simulate(n_id = 10, n_obs_per = 5, seed = 3)
xb$stress <- rnorm(nrow(xb))
xb <- ild_prepare(xb, id = "id", time = "time")
xb <- ild_center(xb, y)
xb$.ipw <- runif(nrow(xb), 0.85, 1.15)
fb <- ild_lme(y ~ y_bp + y_wp + stress + (1 | id), data = xb,
  ar1 = FALSE, warn_no_ar1 = FALSE, warn_uncentered = FALSE)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 5.42413 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
fwb <- ild_ipw_refit(fb, data = xb, weights = ".ipw")
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 5.61967 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
bs_fixed <- ild_msm_bootstrap(fwb, n_boot = 20L, weight_policy = "fixed_weights", seed = 3)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.683848 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 2.19955 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.475695 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 2.88868 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 4.51989 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.402879 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 2.04425 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.50228 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> unable to evaluate scaled gradient
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge: degenerate Hessian with 1 negative eigenvalues
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 1.7552 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> boundary (singular) fit: see help('isSingular')
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.0308156 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 1.59443 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 4.50989 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.175108 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.14053 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 4.03176 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
tidy_ild_msm_bootstrap(bs_fixed)
#> # A tibble: 4 × 18
#>   term        component effect_level  estimate std_error   conf_low conf_high
#>   <chr>       <chr>     <chr>            <dbl>     <dbl>      <dbl>     <dbl>
#> 1 (Intercept) fixed     population   -1.20e-17  1.69e-16 -2.29 e-16  3.53e-16
#> 2 y_bp        fixed     between       1   e+ 0  3.72e-16  1.000e+ 0  1   e+ 0
#> 3 y_wp        fixed     within        1   e+ 0  1.85e-16  1    e+ 0  1   e+ 0
#> 4 stress      fixed     unknown      -5.93e-17  1.60e-16 -3.55 e-16  2.13e-16
#> # ℹ 11 more variables: statistic <dbl>, p_value <dbl>, interval_type <chr>,
#> #   engine <chr>, model_class <chr>, rhat <dbl>, ess_bulk <dbl>,
#> #   ess_tail <dbl>, pd <dbl>, rope_low <dbl>, rope_high <dbl>
# reestimate_weights: weights_fn must return ILD with the weight column, e.g. re-run IPTW pipeline:
bs_re <- ild_msm_bootstrap(fwb, n_boot = 12L, weight_policy = "reestimate_weights",
  seed = 4, weights_fn = function(d) { d$.ipw <- runif(nrow(d), 0.85, 1.15); d })
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 2.13143 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 2.70039 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.765417 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.87247 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> unable to evaluate scaled gradient
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge: degenerate Hessian with 1 negative eigenvalues
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 1.43315 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 1.33261 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.804983 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.110021 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 2.08795 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.1423 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
#> Model failed to converge with max|grad| = 0.506945 (tol = 0.002, component 1)
#> Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
#>  - Rescale variables?

Reproducibility

Use a fixed seed when simulating or fitting models so results can be recreated. The pipeline is deterministic for a given seed and data. When saving results (e.g. after [ild_lme()] or [ild_diagnostics()]), you can attach a reproducibility manifest and save a single bundle with [ild_manifest()] and [ild_bundle()]:

# Optional: build a manifest with scenario and seed, then bundle the fit for saving
manifest <- ild_manifest(seed = 42, scenario = ild_summary(x), include_session = FALSE)
bundle <- ild_bundle(fit1, manifest = manifest, label = "model_ar1")
# saveRDS(bundle, "run.rds")  # one file with result + manifest + label
names(bundle)
#> [1] "result"   "manifest" "label"

For simulation-based recovery and power under the bundled ild_simulate() DGP, see vignette("benchmark-simulation-recovery", package = "tidyILD").

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.