## ----setup, include = FALSE-----------------------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.width = 7,
  fig.height = 4.8,
  message = FALSE,
  warning = FALSE
)


## ----simulate-data--------------------------------------------------------------------------------
library(ctsem)
library(ggplot2)

set.seed(49)

n_subjects <- 20
n_obs <- 80

raw_param_names <- c("t0m", "drift", "diffusion", "mmean")
raw_cor <- matrix(c(
   1.00,  0.45, -0.25,  0.35,
   0.45,  1.00, -0.55,  0.25,
  -0.25, -0.55,  1.00, -0.20,
   0.35,  0.25, -0.20,  1.00
), nrow = 4, byrow = TRUE,
dimnames = list(raw_param_names, raw_param_names))
raw_sd <- c(t0m = .7, drift = .55, diffusion = .35, mmean = 2.45)
raw_mean <- c(t0m = 0, drift = -.4, diffusion = log(exp(.45) - 1), mmean = 0)
raw_cov <- diag(raw_sd) %*% raw_cor %*% diag(raw_sd)
raw_truth_mat <- MASS::mvrnorm(
  n = n_subjects,
  mu = raw_mean,
  Sigma = raw_cov)
colnames(raw_truth_mat) <- raw_param_names
raw_truth <- data.frame(id = seq_len(n_subjects), raw_truth_mat,
  check.names = FALSE)

softplus <- function(x) ifelse(x > 30, x, log1p(exp(x)))
truth <- data.frame(
  id = seq_len(n_subjects),
  t0m = raw_truth$t0m,
  drift = -softplus(-raw_truth$drift),
  diffusion = softplus(raw_truth$diffusion),
  mmean = raw_truth$mmean
)

datalist <- vector("list", n_subjects)
for(i in seq_len(n_subjects)){
  gm <- ctModel(
    silent = TRUE,
    Tpoints = n_obs,
    latentNames = "eta",
    manifestNames = "Y",
    LAMBDA = matrix(1),
    T0MEANS = matrix(truth$t0m[i], 1, 1),
    DRIFT = matrix(truth$drift[i], 1, 1),
    DIFFUSION = matrix(truth$diffusion[i], 1, 1),
    CINT = matrix(0),
    T0VAR = matrix(0),
    MANIFESTMEANS = matrix(truth$mmean[i], 1, 1),
    MANIFESTVAR = matrix(.35)
  )

  dat_i <- data.frame(ctGenerate(
    ctmodelobj = gm,
    n.subjects = 1,
    burnin = 0,
    dtmean = .1,
    logdtsd = 0,
    wide = FALSE))
  dat_i$id <- i
  datalist[[i]] <- dat_i
}

dat <- do.call(rbind, datalist)
dat <- dat[, c("id", "time", "Y")]


## ----plot-data------------------------------------------------------------------------------------
ggplot(dat[dat$id <= 4, ], aes(time, Y, group = id, colour = factor(id))) +
  geom_line(linewidth = .35) + theme_bw()+
  labs(colour = "subject")


## ----model----------------------------------------------------------------------------------------
fit_model <- ctModel(
  type = "ct",
  silent = TRUE,
  latentNames = "eta",
  manifestNames = "Y",
  LAMBDA = matrix(1),
  T0MEANS = matrix("t0m||TRUE", 1, 1),
  DRIFT = matrix("drift||TRUE", 1, 1),
  DIFFUSION = matrix("diffusion||TRUE", 1, 1),
  MANIFESTMEANS = matrix("mmean||TRUE", 1, 1),
  MANIFESTVAR = matrix("merror||FALSE", 1, 1)
)


## ----eb-fit---------------------------------------------------------------------------------------
cores=2

eb_fit <- ctEmpiricalBayesFit(
  datalong = dat,
  model = fit_model,
  priors = TRUE,
  optimize = TRUE,
  cores = cores,
  Npasses = 2
)

eb_summary <- summary(eb_fit, use = "rawest", sdscale = "unit")

eb_summary$initialpopmeans
eb_summary$outliers$initial
eb_summary$popmeans
eb_summary$correlations$final


## ----eb-adjusted-model----------------------------------------------------------------------------
eb_fit$adjustedmodel$pars[
  eb_fit$adjustedmodel$pars$param %in% c("t0m", "drift", "diffusion", "mmean"),
  c("matrix", "param", "transform", "indvarying", "sdscale")]


## ----re-fit---------------------------------------------------------------------------------------
re_fit <- ctFit(
  datalong = dat,
  model = fit_model,
  priors = TRUE,
  cores = cores
)


## ----comparison-helpers---------------------------------------------------------------------------
extract_subject_point <- function(fit){
  cp <- ctSummaryMatrices(fit)
  c(
    t0m = cp$T0MEANS["eta", 1],
    drift = cp$DRIFT["eta", "eta"],
    diffusion = cp$DIFFUSION["eta", "eta"],
    mmean = cp$MANIFESTMEANS["Y", 1]
  )
}

initial_subject <- do.call(rbind, lapply(eb_fit$initialfits,
  extract_subject_point))
eb_subject <- do.call(rbind, lapply(eb_fit$fits, extract_subject_point))

re_subject <- ctSubjectPars(re_fit, pointest = TRUE)[1, ,
  c("t0m", "drift", "diffusion", "mmean")]

truth_mat <- as.matrix(truth[, c("t0m", "drift", "diffusion", "mmean")])

recovery_summary <- function(est, truth){
  data.frame(
    param = colnames(truth),
    correlation = diag(stats::cor(est, truth)),
    rmse = sqrt(colMeans((est - truth)^2)),
    estimate_sd = apply(est, 2, sd),
    true_sd = apply(truth, 2, sd),
    row.names = NULL
  )
}


## ----initial-final-eb-recovery--------------------------------------------------------------------
eb_pass_recovery <- rbind(
  cbind(method = "Initial model-prior fits",
    recovery_summary(initial_subject, truth_mat)),
  cbind(method = "Final EB-prior fits",
    recovery_summary(eb_subject, truth_mat))
)

knitr::kable(eb_pass_recovery, digits = 3)


## ----initial-final-eb-plot------------------------------------------------------------------------
eb_pass_plot_data <- rbind(
  data.frame(method = "Initial model-prior fits", id = truth$id,
    param = rep(colnames(truth_mat), each = n_subjects),
    true = as.vector(truth_mat),
    estimate = as.vector(initial_subject)),
  data.frame(method = "Final EB-prior fits", id = truth$id,
    param = rep(colnames(truth_mat), each = n_subjects),
    true = as.vector(truth_mat),
    estimate = as.vector(eb_subject))
)

ggplot(eb_pass_plot_data, aes(true, estimate, colour = method)) +
  geom_abline(slope = 1, intercept = 0, linewidth = .3) +
  geom_point(alpha = .55, size = 1.4) +
  facet_wrap(~ param, scales = "free") +
  labs(x = "Generating value", y = "Estimated subject value", colour = NULL)


## ----correlation-recovery-------------------------------------------------------------------------
lower_cor_table <- function(reference, estimates){
  lower <- lower.tri(reference)
  pair_index <- which(lower, arr.ind = TRUE)
  out <- data.frame(
    pair = paste(rownames(reference)[pair_index[, 1]],
      colnames(reference)[pair_index[, 2]], sep = "__"),
    truth = as.vector(reference[lower]),
    check.names = FALSE)
  for(nm in names(estimates)){
    out[[nm]] <- as.vector(estimates[[nm]][lower])
  }
  out
}

true_raw_cor <- stats::cor(as.matrix(raw_truth[, raw_param_names]))
initial_eb_raw_cor <- stats::cor(eb_fit$initialraw[, raw_param_names])
final_eb_raw_cor <- stats::cor(
  eb_fit$passoriginalraw[[length(eb_fit$passoriginalraw)]][, raw_param_names])

re_raw_names <- ctsem:::getparnames(re_fit, reonly = TRUE)
re_rawpopcorr <- re_fit$stanfit$transformedparsfull$rawpopcorr[1, , ]
dimnames(re_rawpopcorr) <- list(re_raw_names, re_raw_names)
re_rawpopcorr <- re_rawpopcorr[raw_param_names, raw_param_names]

raw_cor_recovery <- lower_cor_table(true_raw_cor, list(
  "Initial EB raw estimates" = initial_eb_raw_cor,
  "Final EB raw estimates" = final_eb_raw_cor,
  "Random-effects population raw" = re_rawpopcorr
))

knitr::kable(raw_cor_recovery, digits = 3)


## ----subject-correlation-recovery-----------------------------------------------------------------
true_subject_cor <- stats::cor(truth_mat)
initial_subject_cor <- stats::cor(initial_subject)
eb_subject_cor <- stats::cor(eb_subject)
re_subject_cor <- stats::cor(re_subject)

subject_cor_recovery <- lower_cor_table(true_subject_cor, list(
  "Initial EB subject values" = initial_subject_cor,
  "Final EB subject values" = eb_subject_cor,
  "Random-effects subject values" = re_subject_cor
))

knitr::kable(subject_cor_recovery, digits = 3)


## ----final-eb-random-effects-recovery-------------------------------------------------------------
recovery <- rbind(
  cbind(method = "EB subject fits", recovery_summary(eb_subject, truth_mat)),
  cbind(method = "Random effects", recovery_summary(re_subject, truth_mat))
)

knitr::kable(recovery, digits = 3)


## ----comparison-plot------------------------------------------------------------------------------
plot_data <- rbind(
  data.frame(method = "EB subject fits", id = truth$id,
    param = rep(colnames(truth_mat), each = n_subjects),
    true = as.vector(truth_mat),
    estimate = as.vector(eb_subject)),
  data.frame(method = "Random effects", id = truth$id,
    param = rep(colnames(truth_mat), each = n_subjects),
    true = as.vector(truth_mat),
    estimate = as.vector(re_subject))
)

ggplot(plot_data, aes(true, estimate, colour = method)) +
  geom_abline(slope = 1, intercept = 0, linewidth = .3) +
  geom_point(alpha = .55, size = 1.4) +
  facet_wrap(~ param, scales = "free") +
  labs(x = "Generating value", y = "Estimated subject value", colour = NULL)

