The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
This is the example code for Dunkler D, Sauerbrei W, Heinze G (2016). Global, Parameterwise and Joint Shrinkage Factor Estimation. Journal of Statistical Software. 69(8), 1-19. http://dx.doi.org/10.18637/jss.v069.i08.
################################################################################
### R code for
### "Global, Parameterwise and Joint Shrinkage Factor Estimation"
### written by Daniela Dunkler, March 2016
################################################################################
## works with R 3.2.3 & shrink-package 1.2.1
## load the packages
library("shrink")
library("survival")
library("mfp")
library("aod") # for wald-test
##
## Attache Paket: 'aod'
## Das folgende Objekt ist maskiert 'package:survival':
##
## rats
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=C LC_CTYPE=German_Austria.1252
## [3] LC_MONETARY=German_Austria.1252 LC_NUMERIC=C
## [5] LC_TIME=German_Austria.1252
##
## time zone: Europe/Vienna
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] aod_1.3.2 mfp_1.5.4 survival_3.5-5 shrink_1.2.3 knitr_1.45
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.4 xfun_0.40 bslib_0.5.1
## [4] ggplot2_3.4.4 htmlwidgets_1.6.2 lattice_0.21-8
## [7] numDeriv_2016.8-1.1 vctrs_0.6.3 tools_4.3.1
## [10] generics_0.1.3 sandwich_3.0-2 tibble_3.2.1
## [13] fansi_1.0.4 cluster_2.1.4 pkgconfig_2.0.3
## [16] Matrix_1.6-1 data.table_1.14.8 checkmate_2.2.0
## [19] lifecycle_1.0.3 compiler_4.3.1 stringr_1.5.0
## [22] MatrixModels_0.5-2 munsell_0.5.0 codetools_0.2-19
## [25] SparseM_1.81 quantreg_5.97 htmltools_0.5.6
## [28] sass_0.4.7 yaml_2.3.7 htmlTable_2.4.2
## [31] Formula_1.2-5 pillar_1.9.0 jquerylib_0.1.4
## [34] MASS_7.3-60 cachem_1.0.8 rms_6.7-0
## [37] Hmisc_5.1-1 rpart_4.1.19 multcomp_1.4-25
## [40] nlme_3.1-162 tidyselect_1.2.0 digest_0.6.33
## [43] polspline_1.1.23 mvtnorm_1.2-3 stringi_1.7.12
## [46] dplyr_1.1.2 splines_4.3.1 fastmap_1.1.1
## [49] grid_4.3.1 colorspace_2.1-0 cli_3.6.1
## [52] magrittr_2.0.3 base64enc_0.1-3 utf8_1.2.3
## [55] TH.data_1.1-2 foreign_0.8-84 scales_1.2.1
## [58] backports_1.4.1 rmarkdown_2.25 nnet_7.3-19
## [61] gridExtra_2.3 zoo_1.8-12 evaluate_0.22
## [64] rlang_1.1.1 glue_1.6.2 rstudioapi_0.15.0
## [67] jsonlite_1.8.7 R6_2.5.1
## Section 2.1: Deep Vein Thrombosis Study
## load data
data("deepvein")
# number of observations, events, median observation time, and names of
# variables
nrow(deepvein)
## [1] 929
## Call: survfit(formula = Surv(time, status2) ~ 1, data = deepvein)
##
## n events median 0.95LCL 0.95UCL
## [1,] 929 782 37.8 35.6 41.7
##
## 0 1
## 782 147
##
## 0 1
## 84.18 15.82
## [1] "age" "bmi" "durther" "fiimut" "fvleid" "loc"
## [7] "log2ddim" "pnr" "sex" "status" "status2" "time"
## Section 2.2: Breast Cancer Study
## load data
data("GBSG")
# number of observations, events, median observation time, and names of
# variables
nrow(GBSG)
## [1] 686
##
## 0 1
## 387 299
##
## 0 1
## 56.41 43.59
GBSG$cens2 <- abs(GBSG$cens-1)
# median observation time is given in days here
survfit(Surv(rfst, cens2) ~ 1, data = GBSG)
## Call: survfit(formula = Surv(rfst, cens2) ~ 1, data = GBSG)
##
## n events median 0.95LCL 0.95UCL
## [1,] 686 387 1645 1570 1717
## [1] 53.93443
## [1] "age" "cens" "cens2" "esm" "htreat" "id"
## [7] "menostat" "posnodal" "prm" "rfst" "tumgrad" "tumsize"
## Section 5.1: Deep Vein Thrombosis Study
# set the reference level for all categorical variables
deepvein$sex <- relevel(deepvein$sex, ref = "female")
deepvein$fiimut <- relevel(deepvein$fiimut, ref = "absent")
deepvein$fvleid <- relevel(deepvein$fvleid, ref = "absent")
deepvein$loc <- relevel(deepvein$loc, ref = "PE")
# contrasts(deepvein$loc)
## fit full model, and compute shrinkage factors (jackknife estimates and dfbeta
## approximations)
fitfull <- coxph(Surv(time, status) ~ log2ddim + bmi + durther + age +
sex + loc + fiimut + fvleid, data = deepvein, x = TRUE)
summary(fitfull)
## Call:
## coxph(formula = Surv(time, status) ~ log2ddim + bmi + durther +
## age + sex + loc + fiimut + fvleid, data = deepvein, x = TRUE)
##
## n= 929, number of events= 147
##
## coef exp(coef) se(coef) z Pr(>|z|)
## log2ddim 0.219517 1.245475 0.085739 2.560 0.01046 *
## bmi 0.005865 1.005883 0.019051 0.308 0.75817
## durther 0.021881 1.022122 0.023681 0.924 0.35550
## age -0.003973 0.996035 0.006583 -0.603 0.54622
## sexmale 0.495927 1.642019 0.189718 2.614 0.00895 **
## locdistal -0.905095 0.404504 0.311078 -2.910 0.00362 **
## locproximal -0.179351 0.835813 0.180336 -0.995 0.31996
## fiimutpresent -0.162573 0.849954 0.390499 -0.416 0.67718
## fvleidpresent -0.108886 0.896833 0.194228 -0.561 0.57506
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## log2ddim 1.2455 0.8029 1.0528 1.4734
## bmi 1.0059 0.9942 0.9690 1.0442
## durther 1.0221 0.9784 0.9758 1.0707
## age 0.9960 1.0040 0.9833 1.0090
## sexmale 1.6420 0.6090 1.1321 2.3816
## locdistal 0.4045 2.4722 0.2199 0.7442
## locproximal 0.8358 1.1964 0.5870 1.1902
## fiimutpresent 0.8500 1.1765 0.3954 1.8272
## fvleidpresent 0.8968 1.1150 0.6129 1.3123
##
## Concordance= 0.61 (se = 0.026 )
## Likelihood ratio test= 26.2 on 9 df, p=0.002
## Wald test = 23.07 on 9 df, p=0.006
## Score (logrank) test = 23.9 on 9 df, p=0.004
## wald-test for categorical predictor "loc"
wald.test(Sigma = vcov(fitfull)[c("locdistal", "locproximal"),
c("locdistal", "locproximal")],
b = fitfull$coeff[c("locdistal", "locproximal")], Terms = 1:2)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 8.6, df = 2, P(> X2) = 0.014
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.6069174
##
## Shrunken Regression Coefficients:
## log2ddim bmi durther age sexmale
## 0.133228852 0.003559850 0.013279842 -0.002411042 0.300986429
## locdistal locproximal fiimutpresent fvleidpresent
## -0.549317699 -0.108851134 -0.098668180 -0.066084738
## apply backward elimination to full model*, and compute shrinkage factors to
## selected model
## * dummy variables of loc are jointly tested and selected
fitd <- step(fitfull, direction = "backward")
## Start: AIC=1784.06
## Surv(time, status) ~ log2ddim + bmi + durther + age + sex + loc +
## fiimut + fvleid
##
## Df AIC
## - bmi 1 1782.2
## - fiimut 1 1782.2
## - fvleid 1 1782.4
## - age 1 1782.4
## - durther 1 1782.9
## <none> 1784.1
## - log2ddim 1 1788.6
## - sex 1 1789.2
## - loc 2 1790.5
##
## Step: AIC=1782.15
## Surv(time, status) ~ log2ddim + durther + age + sex + loc + fiimut +
## fvleid
##
## Df AIC
## - fiimut 1 1780.3
## - fvleid 1 1780.5
## - age 1 1780.5
## - durther 1 1781.0
## <none> 1782.2
## - log2ddim 1 1786.6
## - sex 1 1787.2
## - loc 2 1788.6
##
## Step: AIC=1780.34
## Surv(time, status) ~ log2ddim + durther + age + sex + loc + fvleid
##
## Df AIC
## - fvleid 1 1778.7
## - age 1 1778.7
## - durther 1 1779.1
## <none> 1780.3
## - log2ddim 1 1784.9
## - sex 1 1785.3
## - loc 2 1786.7
##
## Step: AIC=1778.66
## Surv(time, status) ~ log2ddim + durther + age + sex + loc
##
## Df AIC
## - age 1 1777.0
## - durther 1 1777.4
## <none> 1778.7
## - log2ddim 1 1783.2
## - sex 1 1783.4
## - loc 2 1785.0
##
## Step: AIC=1777.01
## Surv(time, status) ~ log2ddim + durther + sex + loc
##
## Df AIC
## - durther 1 1775.8
## <none> 1777.0
## - log2ddim 1 1781.5
## - sex 1 1782.6
## - loc 2 1783.3
##
## Step: AIC=1775.77
## Surv(time, status) ~ log2ddim + sex + loc
##
## Df AIC
## <none> 1775.8
## - log2ddim 1 1780.3
## - sex 1 1781.2
## - loc 2 1782.8
## Call:
## coxph(formula = Surv(time, status) ~ log2ddim + sex + loc, data = deepvein,
## x = TRUE)
##
## coef exp(coef) se(coef) z p
## log2ddim 0.21879 1.24457 0.08543 2.561 0.01043
## sexmale 0.49091 1.63380 0.18473 2.657 0.00787
## locdistal -0.92237 0.39758 0.31007 -2.975 0.00293
## locproximal -0.20505 0.81461 0.17867 -1.148 0.25112
##
## Likelihood ratio test=24.49 on 4 df, p=6.367e-05
## n= 929, number of events= 147
## wald-test for categorical predictor "loc"
wald.test(Sigma = vcov(fitd)[c("locdistal", "locproximal"),
c("locdistal", "locproximal")],
b = fitd$coeff[c("locdistal", "locproximal")], Terms = 1:2)
## Wald test:
## ----------
##
## Chi-squared test:
## X2 = 9.1, df = 2, P(> X2) = 0.01
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.8076362
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal locproximal
## 0.1767045 0.3964779 -0.7449390 -0.1656066
## Shrinkage Factors (type=parameterwise, method=jackknife):
## log2ddim sexmale locdistal locproximal
## 0.7321036 0.8351074 0.8393993 0.1321006
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal locproximal
## 0.16017851 0.40996379 -0.77423621 -0.02708736
## log2ddim sexmale locdistal locproximal
## 0.3703055 0.3699730 0.3268146 0.8696058
shrink(fitd, type = "parameterwise", method = "jackknife",
join = list(c("locdistal", "locproximal")))
## Shrinkage Factors (type=parameterwise with join option, method=jackknife):
## log2ddim sexmale locdistal locproximal
## 0.7806284 0.8363976 0.8055175 0.8055175
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal locproximal
## 0.1707954 0.4105972 -0.7429847 -0.1651721
## dummy variables of loc are separately tested and selected
## generate dummy variables, fit full model and then apply backward elimination
deepvein <- cbind(deepvein, model.matrix( ~ loc, data = deepvein)[, -1])
fitfull2 <- coxph(Surv(time, status) ~ log2ddim + bmi + durther + age +
sex + locdistal + locproximal + fiimut + fvleid,
data = deepvein, x = TRUE) # fitfull2 is identical to fitfull
fitd2 <- step(fitfull2, direction = "backward")
## Start: AIC=1784.06
## Surv(time, status) ~ log2ddim + bmi + durther + age + sex + locdistal +
## locproximal + fiimut + fvleid
##
## Df AIC
## - bmi 1 1782.2
## - fiimut 1 1782.2
## - fvleid 1 1782.4
## - age 1 1782.4
## - durther 1 1782.9
## - locproximal 1 1783.1
## <none> 1784.1
## - log2ddim 1 1788.6
## - sex 1 1789.2
## - locdistal 1 1792.5
##
## Step: AIC=1782.15
## Surv(time, status) ~ log2ddim + durther + age + sex + locdistal +
## locproximal + fiimut + fvleid
##
## Df AIC
## - fiimut 1 1780.3
## - fvleid 1 1780.5
## - age 1 1780.5
## - durther 1 1781.0
## - locproximal 1 1781.2
## <none> 1782.2
## - log2ddim 1 1786.6
## - sex 1 1787.2
## - locdistal 1 1790.6
##
## Step: AIC=1780.34
## Surv(time, status) ~ log2ddim + durther + age + sex + locdistal +
## locproximal + fvleid
##
## Df AIC
## - fvleid 1 1778.7
## - age 1 1778.7
## - durther 1 1779.1
## - locproximal 1 1779.3
## <none> 1780.3
## - log2ddim 1 1784.9
## - sex 1 1785.3
## - locdistal 1 1788.7
##
## Step: AIC=1778.66
## Surv(time, status) ~ log2ddim + durther + age + sex + locdistal +
## locproximal
##
## Df AIC
## - age 1 1777.0
## - durther 1 1777.4
## - locproximal 1 1777.8
## <none> 1778.7
## - log2ddim 1 1783.2
## - sex 1 1783.4
## - locdistal 1 1787.0
##
## Step: AIC=1777.01
## Surv(time, status) ~ log2ddim + durther + sex + locdistal + locproximal
##
## Df AIC
## - durther 1 1775.8
## - locproximal 1 1776.2
## <none> 1777.0
## - log2ddim 1 1781.5
## - sex 1 1782.6
## - locdistal 1 1785.3
##
## Step: AIC=1775.77
## Surv(time, status) ~ log2ddim + sex + locdistal + locproximal
##
## Df AIC
## - locproximal 1 1775.1
## <none> 1775.8
## - log2ddim 1 1780.3
## - sex 1 1781.2
## - locdistal 1 1784.8
##
## Step: AIC=1775.1
## Surv(time, status) ~ log2ddim + sex + locdistal
##
## Df AIC
## <none> 1775.1
## - log2ddim 1 1778.9
## - sex 1 1780.7
## - locdistal 1 1782.8
## Call:
## coxph(formula = Surv(time, status) ~ log2ddim + sex + locdistal,
## data = deepvein, x = TRUE)
##
## n= 929, number of events= 147
##
## coef exp(coef) se(coef) z Pr(>|z|)
## log2ddim 0.20392 1.22621 0.08483 2.404 0.0162 *
## sexmale 0.49535 1.64107 0.18496 2.678 0.0074 **
## locdistal -0.84053 0.43148 0.30277 -2.776 0.0055 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## log2ddim 1.2262 0.8155 1.0384 1.4480
## sexmale 1.6411 0.6094 1.1421 2.3581
## locdistal 0.4315 2.3176 0.2384 0.7811
##
## Concordance= 0.602 (se = 0.026 )
## Likelihood ratio test= 23.16 on 3 df, p=4e-05
## Wald test = 20.03 on 3 df, p=2e-04
## Score (logrank) test = 20.85 on 3 df, p=1e-04
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.8439991
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal
## 0.1721124 0.4180761 -0.7094099
## Shrinkage Factors (type=parameterwise, method=jackknife):
## log2ddim sexmale locdistal
## 0.7700839 0.8317218 0.8975722
##
## Shrunken Regression Coefficients:
## log2ddim sexmale locdistal
## 0.1570393 0.4119946 -0.7544400
## Table 2
t2_jack <- c(shrink(fitd, type = "global", method = "jackknife")$ShrinkageFactors,
pwsf$ShrinkageFactors,
shrink(fitd, type = "parameterwise", method = "jackknife",
join = list(c("locdistal", "locproximal")))$ShrinkageFactors,
system.time(shrink(fitd, type = "parameterwise", method = "jackknife"))[1])
t2_dfbeta <- c(shrink(fitd, type = "global", method = "dfbeta")$ShrinkageFactors,
shrink(fitd, type = "parameterwise", method = "dfbeta")$ShrinkageFactors,
shrink(fitd, type = "parameterwise", method = "dfbeta",
join = list(c("locdistal", "locproximal")))$ShrinkageFactors,
system.time(shrink(fitd, type = "parameterwise", method = "dfbeta"))[1])
t2 <- cbind(Jackknife = t2_jack, DFBETA = t2_dfbeta)
Table2 <- cbind(t2, round((t2[, 2] - t2[, 1]) / t2[, 1] * 100, 1))
Table2[, 1:2] <- round(Table2[, 1:2], 4)
dimnames(Table2)[[2]][3] <- "Relative difference"
Table2 <- rbind(Table2[1,], rep(NA, 3), Table2[2:5,], rep(NA, 3), Table2[6:8,],
rep(NA, 3), Table2[10,])
dimnames(Table2)[[1]][c(1:2, 7, 10, 12)] <- c("Global shrinkage",
"Parameterwise shrinkage", "Joint shrinkage", "loc", "Computing time")
Table2
## Jackknife DFBETA Relative difference
## Global shrinkage 0.8076 0.8123 0.6
## Parameterwise shrinkage NA NA NA
## log2ddim 0.7321 0.7385 0.9
## sexmale 0.8351 0.8373 0.3
## locdistal 0.8394 0.8449 0.7
## locproximal 0.1321 0.1470 11.2
## Joint shrinkage NA NA NA
## log2ddim 0.7806 0.7864 0.7
## sexmale 0.8364 0.8386 0.3
## loc 0.8055 0.8111 0.7
## NA NA NA
## Computing time 0.5600 0.0100 -98.2
## [1] 0.0149
## Figure 1: Data simulated from deep vein thrombosis study: Comparison of Jackknife and
## DFBETA-approximated global shrinkage factors for selected and unselected
## models.
deepvein0 <- subset(deepvein, status == 0)
deepvein1 <- subset(deepvein, status == 1)
n0 <- nrow(deepvein0)
n1 <- nrow(deepvein1)
ratio <- 0.2 # based on n1 / n0
## Note: Running this code is time-consuming, since 2 * B * S data sets are analyzed.
## size <- seq(from = 200, to = 2000, length.out = 19) # based on nrow(deepvein)
# B <- 100
## You may want to reduce B and length.out of size: e.g.
## size <- seq(from = 200, to = 2000, length.out = 4)
## B <- 3
#
# S <- length(size)
#
# shrinkGJ <- shrinkGD <- shrinkGJsel <- shrinkGDsel <- matrix(NA, nrow = B, ncol = S)
# set.seed(954681)
#
# for (s in 1:S) {
# cat("\n", s)
# for (b in 1:B) {
# cat(".")
# data <- rbind(deepvein0[sample(x = 1:n0, size = size[s]*(1-ratio), replace = TRUE),],
# deepvein1[sample(x = 1:n1, size = size[s]*ratio, replace = TRUE),])
#
# fitfull <- coxph(Surv(time, status) ~ log2ddim + bmi + durther + age + sex + loc +
# fiimut + fvleid, data = data, x = TRUE)
# fitsel <- step(fitfull, direction = "backward", trace = 0)
#
# if (!is.null(fitsel$coefficients)) { # no null model selected
# shrinkGJsel[b, s] <- shrink(fitsel, type = "global", method = "jackknife")$ShrinkageFactors
# shrinkGDsel[b, s] <- shrink(fitsel, type = "global", method = "dfbeta")$ShrinkageFactors
# }
#
#
# fit <- coxph(Surv(time, status) ~ log2ddim + sex + loc, data = data, x = TRUE)
#
# shrinkGJ[b, s] <- shrink(fit, type = "global", method = "jackknife")$ShrinkageFactors
# shrinkGD[b, s] <- shrink(fit, type = "global", method = "dfbeta")$ShrinkageFactors
# }
# }
## In some smaller data sets (and especially when shrinkage factors are estimated with the
## Jackknife method) there might be issues with convergence in individual data sets.
## However, coxph will issue a warning.
#
#
# shrinkGDselm <- apply(shrinkGDsel, 2, median)
# shrinkGJselm <- apply(shrinkGJsel, 2, median)
#
# cbind(n = size, Diff_J_D_sel = shrinkGJselm - shrinkGDselm)
#
# shrinkGDm <- apply(shrinkGD, 2, median)
# shrinkGJm <- apply(shrinkGJ, 2, median)
#
# cbind(n = size, Diff_J_D = shrinkGJm - shrinkGDm)
#
# xrange <- c(0.5, 1)
## xrange <- range(shrinkGDselm, shrinkGJselm, shrinkGDm, shrinkGJm)
## pdf("compJvsD.pdf", width = 6, height = 4)
# par(oma = c(0, 0, 0.5, 0.5), mar = c(3.5, 4, 0, 0))
# plot(range(size), xrange, type = "n", las = 1, xlab = "", ylab = "", xaxt = "n")
# axis(1, at = size)
# mtext(side = 1, text = "Sample size", line = 2.3)
# mtext(side = 2, text = "Global shrinkage factor", line = 2.8)
#
# points(size, shrinkGDselm, pch = 3, col = "darkolivegreen4", cex = 1.5)
# points(size, shrinkGJselm, pch = 1, col = "darkgoldenrod3", cex = 1.3)
# lines(size, shrinkGDselm, lty = 2, col = "darkolivegreen4", lwd = 2)
# lines(size, shrinkGJselm, lty = 3, col = "darkgoldenrod3", lwd = 2)
#
# points(size, shrinkGDm, pch = 3, col = "firebrick3", cex = 1.5)
# points(size, shrinkGJm, pch = 1, col = "dodgerblue3", cex = 1.3)
# lines(size, shrinkGDm, lty = 2, col = "firebrick3", lwd = 2)
# lines(size, shrinkGJm, lty = 3, col = "dodgerblue3", lwd = 2)
#
# legend(x = 1600, y = 0.62, legend = c("Jackknife", "DFBETA"), lwd = 2, title = "unselected",
# col = c("dodgerblue3", "firebrick3"), inset = 0.05, bty = "n", pch = c(1, 3),
# text.col = c("dodgerblue3", "firebrick3"), title.col = "black")
#
# legend(x = 1000, y = 0.62, legend = c("Jackknife", "DFBETA"), lwd = 2, title = "selected",
# col = c("darkgoldenrod3", "chartreuse4"), inset = 0.05, bty = "n", pch = c(1, 3),
# text.col = c("darkgoldenrod3", "chartreuse4"), title.col = "black")
## dev.off()
## Section 5.2: Breast Cancer Study
## define predictors as suggested by Sauerbrei (1999, Applied Statistics)
GBSG$enodes <- exp(-0.12*GBSG$posnodal)
# create ordinal dummy-coded variable tumgrad for grades 1 to 3
contrasts(GBSG$tumgrad) <- matrix(c(0, 1, 1, 0, 0, 1), ncol = 2, byrow = FALSE,
dimnames = list(1:3, c("tumgrad1", "tumgrad2")))
contrasts(GBSG$tumgrad)
## tumgrad1 tumgrad2
## 1 0 0
## 2 1 0
## 3 1 1
# for nicer variable names use default dimnames
contrasts(GBSG$tumgrad) <- matrix(c(0, 1, 1, 0, 0, 1), ncol = 2, byrow = FALSE)
## # model selection (backward elimination) and estimation
fitg <- mfp(Surv(rfst, cens) ~ fp(age) + fp(prm) + fp(esm) + fp(tumsize) +
fp(enodes) + tumgrad + menostat + strata(htreat),
family = cox, data = GBSG, alpha = 0.05, select = 0.05)
print(fitg)
## Call:
## mfp(formula = Surv(rfst, cens) ~ fp(age) + fp(prm) + fp(esm) +
## fp(tumsize) + fp(enodes) + tumgrad + menostat + strata(htreat),
## data = GBSG, family = cox, alpha = 0.05, select = 0.05)
##
##
## Deviance table:
## Resid. Dev
## Null model 3198.026
## Linear model 3077.005
## Final model 3055.989
##
## Fractional polynomials:
## df.initial select alpha df.final power1 power2
## enodes 4 0.05 0.05 1 1 .
## prm 4 0.05 0.05 2 0.5 .
## tumgrad1 1 0.05 0.05 1 1 .
## tumgrad2 1 0.05 0.05 0 . .
## tumsize 4 0.05 0.05 0 . .
## menostat2 1 0.05 0.05 0 . .
## age 4 0.05 0.05 4 -2 -1
## esm 4 0.05 0.05 0 . .
##
##
## Transformations of covariates:
## formula
## age I((age/100)^-2)+I((age/100)^-1)
## prm I(((prm+1)/100)^0.5)
## esm <NA>
## tumsize <NA>
## enodes I(enodes^1)
## tumgrad tumgrad
## menostat <NA>
##
## coef exp(coef) se(coef) z p
## enodes.1 -1.9782 0.13832 0.2272 -8.706 0.00e+00
## prm.1 -0.5717 0.56456 0.1109 -5.154 2.55e-07
## tumgrad1.1 0.5137 1.67141 0.2495 2.059 3.95e-02
## age.1 0.6060 1.83303 0.1188 5.101 3.37e-07
## age.2 -2.6539 0.07037 0.5902 -4.496 6.91e-06
##
## Likelihood ratio test=142 on 5 df, p=0 n= 686
## Table 3
## (p-values unconditional on the selected degree and power for prm, age, and
## enodes = fitg$pvalues)
varorder <- c("age.1", "age.2", "prm.1", "enodes.1", "tumgrad1.1")
t3_beta_j <- c(NA, round(fitg$coefficients, 3)[varorder])
t3_df <- c(fitg$df.initial[7, ], NA, NA, fitg$df.initial[c(2, 1, 3), ])
t3_p <- c(round(fitg$pvalues[7, "p.null"], 3), NA, NA,
round(fitg$pvalues[c(2, 1, 3), "p.null"], 3))
t3_pwsf0 <- shrink(fitg, type = "parameterwise", method = "jackknife")
t3_pwsf <- round(c(NA, t3_pwsf0$ShrinkageFactors[varorder]), 3)
t3_pwsfse <- round(c(NA, sqrt(diag(t3_pwsf0$ShrinkageFactorsVCOV))[varorder]), 3)
t3_jointsf0 <- shrink(fitg, type = "parameterwise", method = "jackknife",
join=list(c("age.1", "age.2")))
t3_jointsf <- round(c(t3_jointsf0$ShrinkageFactors[varorder[1]], NA, NA,
t3_jointsf0$ShrinkageFactors[varorder[-c(1:2)]]), 3)
t3_jointsfse <- round(c(sqrt(diag(t3_jointsf0$ShrinkageFactorsVCOV))["join.age.1"],
NA, NA,
sqrt(diag(t3_jointsf0$ShrinkageFactorsVCOV))[varorder[-c(1:2)]]), 3)
Table3 <- cbind("beta_j" = t3_beta_j, "df" = t3_df, "p value" = t3_p,
"PWSF" = t3_pwsf, "PWSF SE" = t3_pwsfse,
"Joint shrinkage" = t3_jointsf,
"Joint shrinkage SE" = t3_jointsfse)
dimnames(Table3)[[1]][1] <- "age"
Table3
## beta_j df p value PWSF PWSF SE Joint shrinkage Joint shrinkage SE
## age NA 4 0.001 NA NA 0.876 0.188
## age.1 0.606 NA NA 0.811 0.236 NA NA
## age.2 -2.654 NA NA 0.782 0.277 NA NA
## prm.1 -0.572 4 0.000 0.978 0.189 0.982 0.189
## enodes.1 -1.978 4 0.000 0.987 0.116 0.986 0.116
## tumgrad1.1 0.514 1 0.028 0.811 0.453 0.809 0.452
## compute shrinkage factors
globalsf <- shrink(fitg, type = "global", method = "jackknife")
globalsf
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.9526639
##
## Shrunken Regression Coefficients:
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## -1.8845360 -0.5446421 0.4893540 0.5772861 -2.5282947
## global
## global 0.08062613
## Shrinkage Factors (type=parameterwise, method=jackknife):
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## 0.9874769 0.9777288 0.8108045 0.8108243 0.7822610
##
## Shrunken Regression Coefficients:
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## -1.9534020 -0.5589719 0.4164852 0.4913355 -2.0760587
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## 0.116 0.189 0.453 0.236 0.277
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## enodes.1 1.000 -0.055 -0.078 0.030 0.021
## prm.1 -0.055 1.000 -0.200 0.026 0.032
## tumgrad1.1 -0.078 -0.200 1.000 -0.040 -0.035
## age.1 0.030 0.026 -0.040 1.000 0.984
## age.2 0.021 0.032 -0.035 0.984 1.000
jointsf <- shrink(fitg, type = "parameterwise", method = "jackknife",
join=list(c("age.1", "age.2")))
jointsf
## Shrinkage Factors (type=parameterwise with join option, method=jackknife):
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## 0.9862779 0.9816686 0.8094979 0.8763299 0.8763299
##
## Shrunken Regression Coefficients:
## enodes.1 prm.1 tumgrad1.1 age.1 age.2
## -1.9510302 -0.5612242 0.4158141 0.5310300 -2.3257103
## join.age.1 enodes.1 prm.1 tumgrad1.1
## 0.188 0.116 0.189 0.452
## Figure 2: selected model: Log hazard relative to 50 years
# refit model fitg explicitly including the two dummy variables of
# tumgrad
GBSG <- cbind(GBSG, model.matrix(~tumgrad, data = GBSG)[, -1])
fitg <- mfp(Surv(rfst, cens) ~ fp(age) + fp(prm) + fp(esm) + fp(tumsize) +
fp(enodes) + tumgrad1 + tumgrad2 + menostat + strata(htreat),
family = cox, data = GBSG, alpha = 0.05, select = 0.05)
globalsf <- shrink(fitg, type = "global", method = "jackknife")
pwsf <- shrink(fitg, type = "parameterwise", method = "jackknife")
jointsf <- shrink(fitg, type = "parameterwise", method = "jackknife",
join=list(c("age.1", "age.2")))
# define new data for which prediction is requested
# newdatref is the new data set for the reference observation
age <- 30:75
newdat <- data.frame(age = age, enodes = 1, prm = 1, tumgrad1 = 0,
htreat = factor(1))
newdatref <- data.frame(age = 50, enodes = 1, prm = 1, tumgrad1 = 0,
htreat = factor(1))
xaxis <- seq(from = min(age), to = max(age), by = 5)
# pdf("plotgbsg.pdf", width = 6, height = 6)
par(fig = c(0, 1, 0, 0.3), new = FALSE, oma = c(0, 0, 0.5, 0.5),
mar = c(3.5, 4, 0, 0))
hist(GBSG$age[(GBSG$age >= min(age)) & (GBSG$age <= max(age))],
breaks = seq(from = min(age), to = max(age), by = 2.5), main = "",
xlim = range(age), xaxs = "r", yaxs = "r", xlab = "", ylab = "",
axes = FALSE, col = "gray88")
axis(side = 1, at = xaxis)
axis(side = 2, at = seq(from = 0, to = 60, by = 20), las = 2)
mtext(side = 1, text = "Age", line = 2.3)
mtext(side = 2, text = "Frequency", line = 2.8)
box()
par(fig = c(0,1,0.3,1), new = TRUE, oma = c(0, 0, 0.5, 0.5),
mar = c(1, 4, 0, 0))
plot(range(age), c(-0.05, 0.8), xlab = "", ylab = "", type = "n", yaxs = "i",
xaxt = "n", yaxt = "n")
axis(side = 1, at = xaxis, labels = rep("", times = length(xaxis)))
axis(side = 2, at = seq(from = 0, to = 0.7, by = 0.1), las = 2,
labels = seq(from = 0, to = 0.7, by = 0.1))
mtext(side = 2, text = "Log hazard relative to 50 years", line = 2.8)
lines(x = age, y = predict(fitg, newdata = newdat, type = "lp") -
predict(fitg, newdata = newdatref, type = "lp"), lty = 1, col = "gray25",
lwd = 2)
lines(x = age, y = predict(globalsf, newdata = newdat, type = "lp") -
predict(globalsf, newdata = newdatref, type = "lp"), lty = 5,
col = "forestgreen", lwd = 2)
lines(x = age, y = predict(pwsf, newdata = newdat, type="lp") -
predict(pwsf, newdata = newdatref, type = "lp"), lty = 2,
col = "dodgerblue3", lwd = 2)
lines(x = age, y = predict(jointsf, newdata = newdat, type = "lp") -
predict(jointsf, newdata = newdatref, type = "lp"), lty = 4,
col = "firebrick3", lwd = 2)
legend("topright", inset = 0.02, bty = "n", lty = c(1, 5, 4, 2),
title = "SHRINKAGE",
legend = c("No", "Global", "Joint", "Parameterwise"),
col = c("gray25", "forestgreen", "firebrick3", "dodgerblue3"), lwd = 2,
text.col = c("gray25", "forestgreen", "firebrick3", "dodgerblue3"))
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.