The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
library(gsDesign)
library(gsDesign2)
library(dplyr)
library(gt)
library(simtrial)
library(tidyr)
library(survival)
In the survival (base R) package, the log-rank and Cox estimation procedures apply (by default) a correction to “fix” roundoff errors. These are implemented with the timefix
option (by default timefix = TRUE
) via the aeqSurv()
function. However, in the simtrial package, (and also Hmisc), such a correction is not implemented; Consequently, there can be discrepancies between simtrial and base R survival (survdiff()
, coxph()
, and survfit()
).
For details on the aeqSurv()
function, see Therneau, 2016 and the ?aeqSurv
function documentation.
In the following, we describe a simulation scenario where a discrepancy is generated and illustrate how discrepancies can be resolved (if desired) by pre-processing survival times with aeqSurv()
and thus replicating survdiff()
and coxph()
default calculations.
In the simulated dataset, two observations are generated:
aeqSurv()
, these times are tied and set to \(Y=0.306132604679\).We define various true data generating model scenarios and convert for use in gsDesign2. Here, we are using a single scenario where discrepancies were found. This is just for illustration to inform the user of simtrial that discrepancies can occur and how to resolve via aeqSurv()
, if desired.
24_months <- 0.35
survival_at_ log(.35) / log(.25)
hr <- 12
control_median <- c(log(2) / control_median, (log(.25) - log(.2)) / 12)
control_rate <-
tribble(
scenarios <-~Scenario, ~Name, ~Period, ~duration, ~Survival,
0, "Control", 0, 0, 1,
0, "Control", 1, 24, .25,
0, "Control", 2, 12, .2,
1, "PH", 0, 0, 1,
1, "PH", 1, 24, .35,
1, "PH", 2, 12, .2^hr,
2, "3-month delay", 0, 0, 1,
2, "3-month delay", 1, 3, exp(-3 * control_rate[1]),
2, "3-month delay", 2, 21, .35,
2, "3-month delay", 3, 12, .2^hr,
3, "6-month delay", 0, 0, 1,
3, "6-month delay", 1, 6, exp(-6 * control_rate[1]),
3, "6-month delay", 2, 18, .35,
3, "6-month delay", 3, 12, .2^hr,
4, "Crossing", 0, 0, 1,
4, "Crossing", 1, 3, exp(-3 * control_rate[1] * 1.3),
4, "Crossing", 2, 21, .35,
4, "Crossing", 3, 12, .2^hr,
5, "Weak null", 0, 0, 1,
5, "Weak null", 1, 24, .25,
5, "Weak null", 2, 12, .2,
6, "Strong null", 0, 0, 1,
6, "Strong null", 1, 3, exp(-3 * control_rate[1] * 1.5),
6, "Strong null", 2, 3, exp(-6 * control_rate[1]),
6, "Strong null", 3, 18, .25,
6, "Strong null", 4, 12, .2,
)# scenarios |> gt()
scenarios |>
fr <- group_by(Scenario) |>
# filter(Scenario == 2) |>
mutate(
Month = cumsum(duration),
x_rate = -(log(Survival) - log(lag(Survival, default = 1))) /
duration,
rate = ifelse(Month > 24, control_rate[2], control_rate[1]),
hr = x_rate / rate
|>
) select(-x_rate) |>
filter(Period > 0, Scenario > 0) |>
ungroup()
# fr |> gt() |> fmt_number(columns = everything(), decimals = 2)
fr |> mutate(fail_rate = rate, dropout_rate = 0.001, stratum = "All")
fr <-
# MWLR
fixed_design_mb(
mwlr <-tau = 12,
enroll_rate = define_enroll_rate(duration = 12, rate = 1),
fail_rate = fr |> filter(Scenario == 2),
alpha = 0.025, power = .85, ratio = 1,
study_duration = 36
|> to_integer()
)
mwlr$enroll_rate er <-
set.seed(3219)
fr[c(14:17), ]
dgm <-
data.frame(
fail_rate <-stratum = rep("All", 2 * nrow(dgm)),
period = rep(dgm$Period, 2),
treatment = c(
rep("control", nrow(dgm)),
rep("experimental", nrow(dgm))
),duration = rep(dgm$duration, 2),
rate = c(dgm$rate, dgm$rate * dgm$hr)
)
$stratum <- "All"
dgm# Constant dropout rate for both treatment arms and all scenarios
data.frame(
dropout_rate <-stratum = rep("All", 2),
period = rep(1, 2),
treatment = c("control", "experimental"),
duration = rep(100, 2),
rate = rep(.001, 2)
)
Simulated dataset with discrepancy between logrank test of simtrial::wlr()
and survdiff()
(also compare to score test of coxph()
[same as survdiff()
with default timefix = TRUE
]).
395
ss <-
set.seed(8316951 + ss * 1000)
# Generate a dataset
sim_pw_surv(
dat <-n = 698,
enroll_rate = er,
fail_rate = fail_rate,
dropout_rate = dropout_rate
)
cut_data_by_date(dat, 36)
analysis_data <-
analysis_data
dfa <-
$treat <- ifelse(dfa$treatment == "experimental", 1, 0)
dfa
dfa |> wlr(weight = fh(rho = 0, gamma = 0))
z1 <-
survdiff(Surv(tte, event) ~ treat, data = dfa)
check <-
# Note, for `coxph()`, use
# cph.score <- summary(coxph(Surv(tte, event) ~ treat, data = dfa, control = coxph.control(timefix = TRUE)))$sctest
cat("Log-rank wlr() vs survdiff()", c(z1$z^2, check$chisq), "\n")
## Log-rank wlr() vs survdiff() 0.1577428 0.1577954
Verify that timefix = FALSE
in coxph()
agrees with wlr()
:
summary(coxph(
cph.score <-Surv(tte, event) ~ treat,
data = dfa,
control = coxph.control(timefix = FALSE)
$sctest
))cat("Log-rank wlr() vs Cox score z^2", c(z1$z^2, cph.score["test"]), "\n")
## Log-rank wlr() vs Cox score z^2 0.1577428 0.1577428
Pre-processing survival times with aeqSurv()
to implement timefix = TRUE
procedure.
Verify wlr()
and survdiff()
now agree.
dfa[, "tte"]
Y <- dfa[, "event"]
Delta <-
aeqSurv(Surv(Y, Delta))
tfixed <- tfixed[, "time"]
Y <- tfixed[, "status"]
Delta <-# Use aeqSurv version
$tte2 <- Y
dfa$event2 <- Delta
dfa
# wlr() after "timefix"
dfa
dfa2 <-$tte <- dfa2$tte2
dfa2$event <- dfa2$event2
dfa2 dfa2 |> wlr(weight = fh(rho = 0, gamma = 0))
z1new <-cat("Log-rank wlr() with timefix vs survdiff() z^2", c(z1new$z^2, check$chisq), "\n")
## Log-rank wlr() with timefix vs survdiff() z^2 0.1577954 0.1577954
Where do they differ (tte2
are times after aeqSurv()
)?
dfa[order(dfa$tte2), ]
dfa <-
seq(1, nrow(dfa))
id <-
exp(dfa$tte) - exp(dfa$tte2)
diff <- which(abs(diff) > 0)
id_diff <-
seq(id_diff - 2, id_diff + 2)
tolook <-
dfa[tolook, c("tte", "tte2", "event", "event2", "treatment")]
dfcheck <-print(dfcheck, digits = 12)
## tte tte2 event event2 treatment
## 13 0.276251560170 0.276251560170 1 1 experimental
## 143 0.298789385712 0.298789385712 1 1 control
## 464 0.306132722582 0.306132604679 1 1 control
## 516 0.306132604679 0.306132604679 1 1 experimental
## 605 0.336489970678 0.336489970678 1 1 experimental
Verify coxph()
(default) and coxph()
with aeqSurv()
pre-processing (using tte2
as outcome and setting timefix = FALSE
) are identical:
Also note that here ties do not have impact because in separate arms.
# Check Cox with ties
summary(coxph(Surv(tte, event) ~ treatment, data = dfa, ties = "breslow"))$conf.int
cox_breslow <- summary(coxph(Surv(tte, event) ~ treatment, data = dfa, ties = "efron"))$conf.int
cox_efron <-cat("Cox Breslow and Efron hr (tte, timefix=TRUE):", c(cox_breslow[1], cox_efron[1]), "\n")
## Cox Breslow and Efron hr (tte, timefix=TRUE): 0.9657106 0.9657106
# Here ties do not have impact because in separate arms
summary(coxph(Surv(tte2, event2) ~ treatment, data = dfa, ties = "breslow", control = coxph.control(timefix = FALSE)))$conf.int
cox_breslow <- summary(coxph(Surv(tte2, event2) ~ treatment, data = dfa, ties = "efron", control = coxph.control(timefix = FALSE)))$conf.int
cox_efron <-cat("Cox Breslow and Efron hr (tte2, timefix=FALSE):", c(cox_breslow[1], cox_efron[1]), "\n")
## Cox Breslow and Efron hr (tte2, timefix=FALSE): 0.9657106 0.9657106
So here there is a difference between tte
and tte2
times, but there is not an impact of ties for Cox between "breslow"
and "efron"
because the ties (single tie in tte2
) are in separate arms.
Lastly, artificially change treatment so that two observations are tied within the same treatment arm which generates difference between "breslow"
and "efron"
options for ties
:
# Create tie within treatment arm by changing treatment
dfa
dfa3 <-19, "treat"] <- 1.0
dfa3[
summary(coxph(Surv(tte, event) ~ treat, data = dfa3, ties = "breslow", control = coxph.control(timefix = TRUE)))$conf.int
cox_breslow <- summary(coxph(Surv(tte, event) ~ treat, data = dfa3, ties = "efron", control = coxph.control(timefix = TRUE)))$conf.int
cox_efron <-cat("Cox Breslow and Efron hr (tte, timefix=TRUE)=", c(cox_breslow[1], cox_efron[1]), "\n")
## Cox Breslow and Efron hr (tte, timefix=TRUE)= 0.9729723 0.9729778
Same as
summary(coxph(Surv(tte2, event2) ~ treat, data = dfa3, ties = "breslow", control = coxph.control(timefix = FALSE)))$conf.int
cox_breslow <- summary(coxph(Surv(tte2, event2) ~ treat, data = dfa3, ties = "efron", control = coxph.control(timefix = FALSE)))$conf.int
cox_efron <-cat("Cox Breslow and Efron hr (tte2, timefix=FALSE)=", c(cox_breslow[1], cox_efron[1]), "\n")
## Cox Breslow and Efron hr (tte2, timefix=FALSE)= 0.9729723 0.9729778
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.