The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

JSS Example Code

Daniela Dunkler

2016-03-09

This is the example code for Dunkler D, Sauerbrei W, Heinze G (2016). Global, Parameterwise and Joint Shrinkage Factor Estimation. Journal of Statistical Software. 69(8), 1-19. http://dx.doi.org/10.18637/jss.v069.i08.

Example R Code

################################################################################
### R code for
### "Global, Parameterwise and Joint Shrinkage Factor Estimation"
### written by Daniela Dunkler, March 2016
################################################################################

## works with R 3.2.3 & shrink-package 1.2.1

## load the packages
library("shrink")
library("survival")
library("mfp")
library("aod")                                                  # for wald-test
## 
## Attache Paket: 'aod'
## Das folgende Objekt ist maskiert 'package:survival':
## 
##     rats
sessionInfo()
## R version 4.3.1 (2023-06-16 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 17763)
## 
## Matrix products: default
## 
## 
## locale:
## [1] LC_COLLATE=C                    LC_CTYPE=German_Austria.1252   
## [3] LC_MONETARY=German_Austria.1252 LC_NUMERIC=C                   
## [5] LC_TIME=German_Austria.1252    
## 
## time zone: Europe/Vienna
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] aod_1.3.2      mfp_1.5.4      survival_3.5-5 shrink_1.2.3   knitr_1.45    
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.4        xfun_0.40           bslib_0.5.1        
##  [4] ggplot2_3.4.4       htmlwidgets_1.6.2   lattice_0.21-8     
##  [7] numDeriv_2016.8-1.1 vctrs_0.6.3         tools_4.3.1        
## [10] generics_0.1.3      sandwich_3.0-2      tibble_3.2.1       
## [13] fansi_1.0.4         cluster_2.1.4       pkgconfig_2.0.3    
## [16] Matrix_1.6-1        data.table_1.14.8   checkmate_2.2.0    
## [19] lifecycle_1.0.3     compiler_4.3.1      stringr_1.5.0      
## [22] MatrixModels_0.5-2  munsell_0.5.0       codetools_0.2-19   
## [25] SparseM_1.81        quantreg_5.97       htmltools_0.5.6    
## [28] sass_0.4.7          yaml_2.3.7          htmlTable_2.4.2    
## [31] Formula_1.2-5       pillar_1.9.0        jquerylib_0.1.4    
## [34] MASS_7.3-60         cachem_1.0.8        rms_6.7-0          
## [37] Hmisc_5.1-1         rpart_4.1.19        multcomp_1.4-25    
## [40] nlme_3.1-162        tidyselect_1.2.0    digest_0.6.33      
## [43] polspline_1.1.23    mvtnorm_1.2-3       stringi_1.7.12     
## [46] dplyr_1.1.2         splines_4.3.1       fastmap_1.1.1      
## [49] grid_4.3.1          colorspace_2.1-0    cli_3.6.1          
## [52] magrittr_2.0.3      base64enc_0.1-3     utf8_1.2.3         
## [55] TH.data_1.1-2       foreign_0.8-84      scales_1.2.1       
## [58] backports_1.4.1     rmarkdown_2.25      nnet_7.3-19        
## [61] gridExtra_2.3       zoo_1.8-12          evaluate_0.22      
## [64] rlang_1.1.1         glue_1.6.2          rstudioapi_0.15.0  
## [67] jsonlite_1.8.7      R6_2.5.1

Section 2.1: Deep Vein Thrombosis Study

## Section 2.1: Deep Vein Thrombosis Study
## load data
data("deepvein")

# number of observations, events, median observation time, and names of
# variables
nrow(deepvein)
## [1] 929
deepvein$status2 <- abs(deepvein$status-1)
survfit(Surv(time, status2) ~ 1, data = deepvein)
## Call: survfit(formula = Surv(time, status2) ~ 1, data = deepvein)
## 
##        n events median 0.95LCL 0.95UCL
## [1,] 929    782   37.8    35.6    41.7
table(deepvein$status)
## 
##   0   1 
## 782 147
round(100*table(deepvein$status)/nrow(deepvein), 2)
## 
##     0     1 
## 84.18 15.82
sort(names(deepvein))
##  [1] "age"      "bmi"      "durther"  "fiimut"   "fvleid"   "loc"     
##  [7] "log2ddim" "pnr"      "sex"      "status"   "status2"  "time"

Section 2.2: Breast Cancer Study

## Section 2.2: Breast Cancer Study
## load data
data("GBSG")


# number of observations, events, median observation time, and names of
# variables
nrow(GBSG)
## [1] 686
table(GBSG$cens)
## 
##   0   1 
## 387 299
round(100*table(GBSG$cens)/nrow(GBSG), 2)
## 
##     0     1 
## 56.41 43.59
GBSG$cens2 <- abs(GBSG$cens-1)
# median observation time is given in days here
survfit(Surv(rfst, cens2) ~ 1, data = GBSG)
## Call: survfit(formula = Surv(rfst, cens2) ~ 1, data = GBSG)
## 
##        n events median 0.95LCL 0.95UCL
## [1,] 686    387   1645    1570    1717
# median observation time in months
1645 / 30.5
## [1] 53.93443
sort(names(GBSG))
##  [1] "age"      "cens"     "cens2"    "esm"      "htreat"   "id"      
##  [7] "menostat" "posnodal" "prm"      "rfst"     "tumgrad"  "tumsize"

Section 5.1: Deep Vein Thrombosis Study

## Section 5.1: Deep Vein Thrombosis Study
# set the reference level for all categorical variables
deepvein$sex <- relevel(deepvein$sex, ref = "female")
deepvein$fiimut <- relevel(deepvein$fiimut, ref = "absent")
deepvein$fvleid <- relevel(deepvein$fvleid, ref = "absent")
deepvein$loc <- relevel(deepvein$loc, ref = "PE")
# contrasts(deepvein$loc)


## fit full model, and compute shrinkage factors (jackknife estimates and dfbeta
## approximations)
fitfull <- coxph(Surv(time, status) ~ log2ddim + bmi + durther + age +
                 sex + loc + fiimut + fvleid, data = deepvein, x = TRUE)
summary(fitfull)
## Call:
## coxph(formula = Surv(time, status) ~ log2ddim + bmi + durther + 
##     age + sex + loc + fiimut + fvleid, data = deepvein, x = TRUE)
## 
##   n= 929, number of events= 147 
## 
##                    coef exp(coef)  se(coef)      z Pr(>|z|)   
## log2ddim       0.219517  1.245475  0.085739  2.560  0.01046 * 
## bmi            0.005865  1.005883  0.019051  0.308  0.75817   
## durther        0.021881  1.022122  0.023681  0.924  0.35550   
## age           -0.003973  0.996035  0.006583 -0.603  0.54622   
## sexmale        0.495927  1.642019  0.189718  2.614  0.00895 **
## locdistal     -0.905095  0.404504  0.311078 -2.910  0.00362 **
## locproximal   -0.179351  0.835813  0.180336 -0.995  0.31996   
## fiimutpresent -0.162573  0.849954  0.390499 -0.416  0.67718   
## fvleidpresent -0.108886  0.896833  0.194228 -0.561  0.57506   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##               exp(coef) exp(-coef) lower .95 upper .95
## log2ddim         1.2455     0.8029    1.0528    1.4734
## bmi              1.0059     0.9942    0.9690    1.0442
## durther          1.0221     0.9784    0.9758    1.0707
## age              0.9960     1.0040    0.9833    1.0090
## sexmale          1.6420     0.6090    1.1321    2.3816
## locdistal        0.4045     2.4722    0.2199    0.7442
## locproximal      0.8358     1.1964    0.5870    1.1902
## fiimutpresent    0.8500     1.1765    0.3954    1.8272
## fvleidpresent    0.8968     1.1150    0.6129    1.3123
## 
## Concordance= 0.61  (se = 0.026 )
## Likelihood ratio test= 26.2  on 9 df,   p=0.002
## Wald test            = 23.07  on 9 df,   p=0.006
## Score (logrank) test = 23.9  on 9 df,   p=0.004
## wald-test for categorical predictor "loc"
wald.test(Sigma = vcov(fitfull)[c("locdistal", "locproximal"),
          c("locdistal", "locproximal")],
          b = fitfull$coeff[c("locdistal", "locproximal")], Terms = 1:2)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 8.6, df = 2, P(> X2) = 0.014
shrink(fitfull, type = "global", method = "jackknife")
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.6069174
## 
## Shrunken Regression Coefficients:
##      log2ddim           bmi       durther           age       sexmale 
##   0.133228852   0.003559850   0.013279842  -0.002411042   0.300986429 
##     locdistal   locproximal fiimutpresent fvleidpresent 
##  -0.549317699  -0.108851134  -0.098668180  -0.066084738
## apply backward elimination to full model*, and compute shrinkage factors to
## selected model
## * dummy variables of loc are jointly tested and selected
fitd <- step(fitfull, direction = "backward")
## Start:  AIC=1784.06
## Surv(time, status) ~ log2ddim + bmi + durther + age + sex + loc + 
##     fiimut + fvleid
## 
##            Df    AIC
## - bmi       1 1782.2
## - fiimut    1 1782.2
## - fvleid    1 1782.4
## - age       1 1782.4
## - durther   1 1782.9
## <none>        1784.1
## - log2ddim  1 1788.6
## - sex       1 1789.2
## - loc       2 1790.5
## 
## Step:  AIC=1782.15
## Surv(time, status) ~ log2ddim + durther + age + sex + loc + fiimut + 
##     fvleid
## 
##            Df    AIC
## - fiimut    1 1780.3
## - fvleid    1 1780.5
## - age       1 1780.5
## - durther   1 1781.0
## <none>        1782.2
## - log2ddim  1 1786.6
## - sex       1 1787.2
## - loc       2 1788.6
## 
## Step:  AIC=1780.34
## Surv(time, status) ~ log2ddim + durther + age + sex + loc + fvleid
## 
##            Df    AIC
## - fvleid    1 1778.7
## - age       1 1778.7
## - durther   1 1779.1
## <none>        1780.3
## - log2ddim  1 1784.9
## - sex       1 1785.3
## - loc       2 1786.7
## 
## Step:  AIC=1778.66
## Surv(time, status) ~ log2ddim + durther + age + sex + loc
## 
##            Df    AIC
## - age       1 1777.0
## - durther   1 1777.4
## <none>        1778.7
## - log2ddim  1 1783.2
## - sex       1 1783.4
## - loc       2 1785.0
## 
## Step:  AIC=1777.01
## Surv(time, status) ~ log2ddim + durther + sex + loc
## 
##            Df    AIC
## - durther   1 1775.8
## <none>        1777.0
## - log2ddim  1 1781.5
## - sex       1 1782.6
## - loc       2 1783.3
## 
## Step:  AIC=1775.77
## Surv(time, status) ~ log2ddim + sex + loc
## 
##            Df    AIC
## <none>        1775.8
## - log2ddim  1 1780.3
## - sex       1 1781.2
## - loc       2 1782.8
print(fitd)
## Call:
## coxph(formula = Surv(time, status) ~ log2ddim + sex + loc, data = deepvein, 
##     x = TRUE)
## 
##                 coef exp(coef) se(coef)      z       p
## log2ddim     0.21879   1.24457  0.08543  2.561 0.01043
## sexmale      0.49091   1.63380  0.18473  2.657 0.00787
## locdistal   -0.92237   0.39758  0.31007 -2.975 0.00293
## locproximal -0.20505   0.81461  0.17867 -1.148 0.25112
## 
## Likelihood ratio test=24.49  on 4 df, p=6.367e-05
## n= 929, number of events= 147
## wald-test for categorical predictor "loc"
wald.test(Sigma = vcov(fitd)[c("locdistal", "locproximal"),
          c("locdistal", "locproximal")],
          b = fitd$coeff[c("locdistal", "locproximal")], Terms = 1:2)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 9.1, df = 2, P(> X2) = 0.01
shrink(fitd, type = "global", method = "jackknife")
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.8076362
## 
## Shrunken Regression Coefficients:
##    log2ddim     sexmale   locdistal locproximal 
##   0.1767045   0.3964779  -0.7449390  -0.1656066
pwsf <- shrink(fitd, type = "parameterwise", method = "jackknife")
pwsf
## Shrinkage Factors (type=parameterwise, method=jackknife):
##    log2ddim     sexmale   locdistal locproximal 
##   0.7321036   0.8351074   0.8393993   0.1321006 
## 
## Shrunken Regression Coefficients:
##    log2ddim     sexmale   locdistal locproximal 
##  0.16017851  0.40996379 -0.77423621 -0.02708736
sqrt(diag(pwsf$ShrinkageFactorsVCOV))
##    log2ddim     sexmale   locdistal locproximal 
##   0.3703055   0.3699730   0.3268146   0.8696058
shrink(fitd, type = "parameterwise", method = "jackknife",
       join = list(c("locdistal", "locproximal")))
## Shrinkage Factors (type=parameterwise with join option, method=jackknife):
##    log2ddim     sexmale   locdistal locproximal 
##   0.7806284   0.8363976   0.8055175   0.8055175 
## 
## Shrunken Regression Coefficients:
##    log2ddim     sexmale   locdistal locproximal 
##   0.1707954   0.4105972  -0.7429847  -0.1651721
## dummy variables of loc are separately tested and selected
## generate dummy variables, fit full model and then apply backward elimination
deepvein <- cbind(deepvein, model.matrix( ~ loc, data = deepvein)[, -1])
fitfull2 <- coxph(Surv(time, status) ~ log2ddim + bmi + durther + age +
                  sex + locdistal + locproximal + fiimut + fvleid,
                  data = deepvein, x = TRUE)  # fitfull2 is identical to fitfull

fitd2 <- step(fitfull2, direction = "backward")
## Start:  AIC=1784.06
## Surv(time, status) ~ log2ddim + bmi + durther + age + sex + locdistal + 
##     locproximal + fiimut + fvleid
## 
##               Df    AIC
## - bmi          1 1782.2
## - fiimut       1 1782.2
## - fvleid       1 1782.4
## - age          1 1782.4
## - durther      1 1782.9
## - locproximal  1 1783.1
## <none>           1784.1
## - log2ddim     1 1788.6
## - sex          1 1789.2
## - locdistal    1 1792.5
## 
## Step:  AIC=1782.15
## Surv(time, status) ~ log2ddim + durther + age + sex + locdistal + 
##     locproximal + fiimut + fvleid
## 
##               Df    AIC
## - fiimut       1 1780.3
## - fvleid       1 1780.5
## - age          1 1780.5
## - durther      1 1781.0
## - locproximal  1 1781.2
## <none>           1782.2
## - log2ddim     1 1786.6
## - sex          1 1787.2
## - locdistal    1 1790.6
## 
## Step:  AIC=1780.34
## Surv(time, status) ~ log2ddim + durther + age + sex + locdistal + 
##     locproximal + fvleid
## 
##               Df    AIC
## - fvleid       1 1778.7
## - age          1 1778.7
## - durther      1 1779.1
## - locproximal  1 1779.3
## <none>           1780.3
## - log2ddim     1 1784.9
## - sex          1 1785.3
## - locdistal    1 1788.7
## 
## Step:  AIC=1778.66
## Surv(time, status) ~ log2ddim + durther + age + sex + locdistal + 
##     locproximal
## 
##               Df    AIC
## - age          1 1777.0
## - durther      1 1777.4
## - locproximal  1 1777.8
## <none>           1778.7
## - log2ddim     1 1783.2
## - sex          1 1783.4
## - locdistal    1 1787.0
## 
## Step:  AIC=1777.01
## Surv(time, status) ~ log2ddim + durther + sex + locdistal + locproximal
## 
##               Df    AIC
## - durther      1 1775.8
## - locproximal  1 1776.2
## <none>           1777.0
## - log2ddim     1 1781.5
## - sex          1 1782.6
## - locdistal    1 1785.3
## 
## Step:  AIC=1775.77
## Surv(time, status) ~ log2ddim + sex + locdistal + locproximal
## 
##               Df    AIC
## - locproximal  1 1775.1
## <none>           1775.8
## - log2ddim     1 1780.3
## - sex          1 1781.2
## - locdistal    1 1784.8
## 
## Step:  AIC=1775.1
## Surv(time, status) ~ log2ddim + sex + locdistal
## 
##             Df    AIC
## <none>         1775.1
## - log2ddim   1 1778.9
## - sex        1 1780.7
## - locdistal  1 1782.8
summary(fitd2)
## Call:
## coxph(formula = Surv(time, status) ~ log2ddim + sex + locdistal, 
##     data = deepvein, x = TRUE)
## 
##   n= 929, number of events= 147 
## 
##               coef exp(coef) se(coef)      z Pr(>|z|)   
## log2ddim   0.20392   1.22621  0.08483  2.404   0.0162 * 
## sexmale    0.49535   1.64107  0.18496  2.678   0.0074 **
## locdistal -0.84053   0.43148  0.30277 -2.776   0.0055 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##           exp(coef) exp(-coef) lower .95 upper .95
## log2ddim     1.2262     0.8155    1.0384    1.4480
## sexmale      1.6411     0.6094    1.1421    2.3581
## locdistal    0.4315     2.3176    0.2384    0.7811
## 
## Concordance= 0.602  (se = 0.026 )
## Likelihood ratio test= 23.16  on 3 df,   p=4e-05
## Wald test            = 20.03  on 3 df,   p=2e-04
## Score (logrank) test = 20.85  on 3 df,   p=1e-04
shrink(fitd2, type = "global", method = "jackknife")
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.8439991
## 
## Shrunken Regression Coefficients:
##   log2ddim    sexmale  locdistal 
##  0.1721124  0.4180761 -0.7094099
shrink(fitd2, type = "parameterwise", method = "jackknife")
## Shrinkage Factors (type=parameterwise, method=jackknife):
##  log2ddim   sexmale locdistal 
## 0.7700839 0.8317218 0.8975722 
## 
## Shrunken Regression Coefficients:
##   log2ddim    sexmale  locdistal 
##  0.1570393  0.4119946 -0.7544400
## Table 2
t2_jack <- c(shrink(fitd, type = "global", method = "jackknife")$ShrinkageFactors,
             pwsf$ShrinkageFactors,
             shrink(fitd, type = "parameterwise", method = "jackknife",
                    join = list(c("locdistal", "locproximal")))$ShrinkageFactors,
             system.time(shrink(fitd, type = "parameterwise", method = "jackknife"))[1])

t2_dfbeta <- c(shrink(fitd, type = "global", method = "dfbeta")$ShrinkageFactors,
               shrink(fitd, type = "parameterwise", method = "dfbeta")$ShrinkageFactors,
               shrink(fitd, type = "parameterwise", method = "dfbeta",
                      join = list(c("locdistal", "locproximal")))$ShrinkageFactors,
               system.time(shrink(fitd, type = "parameterwise", method = "dfbeta"))[1])

t2 <- cbind(Jackknife = t2_jack, DFBETA = t2_dfbeta)

Table2 <- cbind(t2, round((t2[, 2] - t2[, 1]) / t2[, 1] * 100, 1))
Table2[, 1:2] <- round(Table2[, 1:2], 4)
dimnames(Table2)[[2]][3] <- "Relative difference"
Table2 <- rbind(Table2[1,], rep(NA, 3), Table2[2:5,], rep(NA, 3), Table2[6:8,],
                rep(NA, 3), Table2[10,])
dimnames(Table2)[[1]][c(1:2, 7, 10, 12)] <- c("Global shrinkage",
          "Parameterwise shrinkage", "Joint shrinkage", "loc", "Computing time")
Table2
##                         Jackknife DFBETA Relative difference
## Global shrinkage           0.8076 0.8123                 0.6
## Parameterwise shrinkage        NA     NA                  NA
## log2ddim                   0.7321 0.7385                 0.9
## sexmale                    0.8351 0.8373                 0.3
## locdistal                  0.8394 0.8449                 0.7
## locproximal                0.1321 0.1470                11.2
## Joint shrinkage                NA     NA                  NA
## log2ddim                   0.7806 0.7864                 0.7
## sexmale                    0.8364 0.8386                 0.3
## loc                        0.8055 0.8111                 0.7
##                                NA     NA                  NA
## Computing time             0.5600 0.0100               -98.2
# max. difference
max(abs(Table2[1:10, 1] - Table2[1:10, 2]), na.rm=TRUE)
## [1] 0.0149
## Figure 1: Data simulated from deep vein thrombosis study: Comparison of Jackknife and
##           DFBETA-approximated global shrinkage factors for selected and unselected
##           models.

deepvein0 <- subset(deepvein, status == 0)
deepvein1 <- subset(deepvein, status == 1)

n0 <- nrow(deepvein0)
n1 <- nrow(deepvein1)

ratio <- 0.2                                     # based on n1 / n0
## Note: Running this code is time-consuming, since 2 * B * S data sets are analyzed.
## size <- seq(from = 200, to = 2000, length.out = 19)      # based on nrow(deepvein)
# B <- 100
## You may want to reduce B and length.out of size: e.g.
## size <- seq(from = 200, to = 2000, length.out = 4)
## B <- 3
#
# S <- length(size)
# 
# shrinkGJ <- shrinkGD <- shrinkGJsel <- shrinkGDsel <- matrix(NA, nrow = B, ncol = S)
# set.seed(954681)
# 
# for (s in 1:S) {
#   cat("\n", s)
#   for (b in 1:B) {
#     cat(".")
#     data <- rbind(deepvein0[sample(x = 1:n0, size = size[s]*(1-ratio), replace = TRUE),],
#                   deepvein1[sample(x = 1:n1, size = size[s]*ratio, replace = TRUE),])
#     
#     fitfull <- coxph(Surv(time, status) ~ log2ddim + bmi + durther + age + sex + loc +
#                        fiimut + fvleid, data = data, x = TRUE)
#     fitsel <- step(fitfull, direction = "backward", trace = 0)
#     
#     if (!is.null(fitsel$coefficients)) {                     # no null model selected
#       shrinkGJsel[b, s] <- shrink(fitsel, type = "global", method = "jackknife")$ShrinkageFactors
#       shrinkGDsel[b, s] <- shrink(fitsel, type = "global", method = "dfbeta")$ShrinkageFactors
#     }
#     
#     
#     fit <- coxph(Surv(time, status) ~ log2ddim + sex + loc, data = data, x = TRUE)
#     
#     shrinkGJ[b, s] <- shrink(fit, type = "global", method = "jackknife")$ShrinkageFactors
#     shrinkGD[b, s] <- shrink(fit, type = "global", method = "dfbeta")$ShrinkageFactors
#   }
# }
## In some smaller data sets (and especially when shrinkage factors are estimated with the
## Jackknife method) there might be issues with convergence in individual data sets.
## However, coxph will issue a warning.
# 
# 
# shrinkGDselm <- apply(shrinkGDsel, 2, median)
# shrinkGJselm <- apply(shrinkGJsel, 2, median)
# 
# cbind(n = size, Diff_J_D_sel = shrinkGJselm - shrinkGDselm)
# 
# shrinkGDm <- apply(shrinkGD, 2, median)
# shrinkGJm <- apply(shrinkGJ, 2, median)
# 
# cbind(n = size, Diff_J_D = shrinkGJm - shrinkGDm)
# 
# xrange <- c(0.5, 1)
## xrange <- range(shrinkGDselm, shrinkGJselm, shrinkGDm, shrinkGJm)

## pdf("compJvsD.pdf", width = 6, height = 4)
# par(oma = c(0, 0, 0.5, 0.5), mar = c(3.5, 4, 0, 0))
# plot(range(size), xrange, type = "n", las = 1, xlab = "", ylab = "", xaxt = "n")
# axis(1, at = size)
# mtext(side = 1, text = "Sample size", line = 2.3)
# mtext(side = 2, text = "Global shrinkage factor", line = 2.8)
# 
# points(size, shrinkGDselm, pch = 3, col = "darkolivegreen4", cex = 1.5)
# points(size, shrinkGJselm, pch = 1, col = "darkgoldenrod3", cex = 1.3)
# lines(size, shrinkGDselm, lty = 2, col = "darkolivegreen4", lwd = 2)
# lines(size, shrinkGJselm, lty = 3, col = "darkgoldenrod3", lwd = 2)
# 
# points(size, shrinkGDm, pch = 3, col = "firebrick3", cex = 1.5)
# points(size, shrinkGJm, pch = 1, col = "dodgerblue3", cex = 1.3)
# lines(size, shrinkGDm, lty = 2, col = "firebrick3", lwd = 2)
# lines(size, shrinkGJm, lty = 3, col = "dodgerblue3", lwd = 2)
# 
# legend(x = 1600, y = 0.62, legend = c("Jackknife", "DFBETA"), lwd = 2, title = "unselected",
#        col = c("dodgerblue3", "firebrick3"), inset = 0.05, bty = "n", pch = c(1, 3),
#        text.col = c("dodgerblue3", "firebrick3"), title.col = "black")
# 
# legend(x = 1000, y = 0.62, legend = c("Jackknife", "DFBETA"), lwd = 2, title = "selected",
#        col = c("darkgoldenrod3", "chartreuse4"), inset = 0.05, bty = "n", pch = c(1, 3),
#        text.col = c("darkgoldenrod3", "chartreuse4"), title.col = "black")
## dev.off()

Section 5.2: Breast Cancer Study

## Section 5.2: Breast Cancer Study
## define predictors as suggested by Sauerbrei (1999, Applied Statistics)
GBSG$enodes <- exp(-0.12*GBSG$posnodal)


# create ordinal dummy-coded variable tumgrad for grades 1 to 3
contrasts(GBSG$tumgrad) <- matrix(c(0, 1, 1, 0, 0, 1), ncol = 2, byrow = FALSE,
                                  dimnames = list(1:3, c("tumgrad1", "tumgrad2")))
contrasts(GBSG$tumgrad)
##   tumgrad1 tumgrad2
## 1        0        0
## 2        1        0
## 3        1        1
# for nicer variable names use default dimnames
contrasts(GBSG$tumgrad) <- matrix(c(0, 1, 1, 0, 0, 1), ncol = 2, byrow = FALSE)


## # model selection (backward elimination) and estimation
fitg <- mfp(Surv(rfst, cens) ~ fp(age) + fp(prm) + fp(esm) + fp(tumsize) +
              fp(enodes) + tumgrad + menostat + strata(htreat),
            family = cox, data = GBSG, alpha = 0.05, select = 0.05)
print(fitg)
## Call:
## mfp(formula = Surv(rfst, cens) ~ fp(age) + fp(prm) + fp(esm) + 
##     fp(tumsize) + fp(enodes) + tumgrad + menostat + strata(htreat), 
##     data = GBSG, family = cox, alpha = 0.05, select = 0.05)
## 
## 
## Deviance table:
##           Resid. Dev
## Null model    3198.026
## Linear model  3077.005
## Final model   3055.989
## 
## Fractional polynomials:
##           df.initial select alpha df.final power1 power2
## enodes             4   0.05  0.05        1      1      .
## prm                4   0.05  0.05        2    0.5      .
## tumgrad1           1   0.05  0.05        1      1      .
## tumgrad2           1   0.05  0.05        0      .      .
## tumsize            4   0.05  0.05        0      .      .
## menostat2          1   0.05  0.05        0      .      .
## age                4   0.05  0.05        4     -2     -1
## esm                4   0.05  0.05        0      .      .
## 
## 
## Transformations of covariates:
##                                  formula
## age      I((age/100)^-2)+I((age/100)^-1)
## prm                 I(((prm+1)/100)^0.5)
## esm                                 <NA>
## tumsize                             <NA>
## enodes                       I(enodes^1)
## tumgrad                          tumgrad
## menostat                            <NA>
## 
##               coef exp(coef) se(coef)      z        p
## enodes.1   -1.9782   0.13832   0.2272 -8.706 0.00e+00
## prm.1      -0.5717   0.56456   0.1109 -5.154 2.55e-07
## tumgrad1.1  0.5137   1.67141   0.2495  2.059 3.95e-02
## age.1       0.6060   1.83303   0.1188  5.101 3.37e-07
## age.2      -2.6539   0.07037   0.5902 -4.496 6.91e-06
## 
## Likelihood ratio test=142  on 5 df, p=0 n= 686
## Table 3
## (p-values unconditional on the selected degree and power for prm, age, and
## enodes = fitg$pvalues)
varorder <- c("age.1", "age.2", "prm.1", "enodes.1", "tumgrad1.1")

t3_beta_j <- c(NA, round(fitg$coefficients, 3)[varorder])
t3_df <- c(fitg$df.initial[7, ], NA, NA, fitg$df.initial[c(2, 1, 3), ])
t3_p <- c(round(fitg$pvalues[7, "p.null"], 3), NA, NA,
          round(fitg$pvalues[c(2, 1, 3), "p.null"], 3))
t3_pwsf0 <- shrink(fitg, type = "parameterwise", method = "jackknife")
t3_pwsf <- round(c(NA, t3_pwsf0$ShrinkageFactors[varorder]), 3)
t3_pwsfse <- round(c(NA, sqrt(diag(t3_pwsf0$ShrinkageFactorsVCOV))[varorder]), 3)
t3_jointsf0 <- shrink(fitg, type = "parameterwise", method = "jackknife",
                      join=list(c("age.1", "age.2")))
t3_jointsf <- round(c(t3_jointsf0$ShrinkageFactors[varorder[1]], NA, NA,
                      t3_jointsf0$ShrinkageFactors[varorder[-c(1:2)]]), 3)
t3_jointsfse <- round(c(sqrt(diag(t3_jointsf0$ShrinkageFactorsVCOV))["join.age.1"],
                        NA, NA,
                        sqrt(diag(t3_jointsf0$ShrinkageFactorsVCOV))[varorder[-c(1:2)]]), 3)

Table3 <- cbind("beta_j" = t3_beta_j, "df" = t3_df, "p value" = t3_p,
                "PWSF" = t3_pwsf, "PWSF SE" = t3_pwsfse,
                "Joint shrinkage" = t3_jointsf,
                "Joint shrinkage SE" = t3_jointsfse)
dimnames(Table3)[[1]][1] <- "age"
Table3
##            beta_j df p value  PWSF PWSF SE Joint shrinkage Joint shrinkage SE
## age            NA  4   0.001    NA      NA           0.876              0.188
## age.1       0.606 NA      NA 0.811   0.236              NA                 NA
## age.2      -2.654 NA      NA 0.782   0.277              NA                 NA
## prm.1      -0.572  4   0.000 0.978   0.189           0.982              0.189
## enodes.1   -1.978  4   0.000 0.987   0.116           0.986              0.116
## tumgrad1.1  0.514  1   0.028 0.811   0.453           0.809              0.452
## compute shrinkage factors
globalsf <- shrink(fitg, type = "global", method = "jackknife")
globalsf
## Shrinkage Factors (type=global, method=jackknife):
## [1] 0.9526639
## 
## Shrunken Regression Coefficients:
##   enodes.1      prm.1 tumgrad1.1      age.1      age.2 
## -1.8845360 -0.5446421  0.4893540  0.5772861 -2.5282947
sqrt(globalsf$ShrinkageFactorsVCOV)
##            global
## global 0.08062613
pwsf <- shrink(fitg, type = "parameterwise", method = "jackknife")
pwsf
## Shrinkage Factors (type=parameterwise, method=jackknife):
##   enodes.1      prm.1 tumgrad1.1      age.1      age.2 
##  0.9874769  0.9777288  0.8108045  0.8108243  0.7822610 
## 
## Shrunken Regression Coefficients:
##   enodes.1      prm.1 tumgrad1.1      age.1      age.2 
## -1.9534020 -0.5589719  0.4164852  0.4913355 -2.0760587
round(sqrt(diag(pwsf$ShrinkageFactorsVCOV)), 3)
##   enodes.1      prm.1 tumgrad1.1      age.1      age.2 
##      0.116      0.189      0.453      0.236      0.277
round(cov2cor(pwsf$ShrinkageFactorsVCOV), 3)
##            enodes.1  prm.1 tumgrad1.1  age.1  age.2
## enodes.1      1.000 -0.055     -0.078  0.030  0.021
## prm.1        -0.055  1.000     -0.200  0.026  0.032
## tumgrad1.1   -0.078 -0.200      1.000 -0.040 -0.035
## age.1         0.030  0.026     -0.040  1.000  0.984
## age.2         0.021  0.032     -0.035  0.984  1.000
jointsf <- shrink(fitg, type = "parameterwise", method = "jackknife",
                  join=list(c("age.1", "age.2")))
jointsf
## Shrinkage Factors (type=parameterwise with join option, method=jackknife):
##   enodes.1      prm.1 tumgrad1.1      age.1      age.2 
##  0.9862779  0.9816686  0.8094979  0.8763299  0.8763299 
## 
## Shrunken Regression Coefficients:
##   enodes.1      prm.1 tumgrad1.1      age.1      age.2 
## -1.9510302 -0.5612242  0.4158141  0.5310300 -2.3257103
round(sqrt(diag(jointsf$ShrinkageFactorsVCOV)), 3)
## join.age.1   enodes.1      prm.1 tumgrad1.1 
##      0.188      0.116      0.189      0.452
## Figure 2: selected model: Log hazard relative to 50 years
# refit model fitg explicitly including the two dummy variables of
# tumgrad
GBSG <- cbind(GBSG, model.matrix(~tumgrad, data = GBSG)[, -1])

fitg <- mfp(Surv(rfst, cens) ~ fp(age) + fp(prm) + fp(esm) + fp(tumsize) +
              fp(enodes) + tumgrad1 + tumgrad2 + menostat + strata(htreat),
            family = cox, data = GBSG, alpha = 0.05, select = 0.05)

globalsf <- shrink(fitg, type = "global", method = "jackknife")
pwsf <- shrink(fitg, type = "parameterwise", method = "jackknife")
jointsf <- shrink(fitg, type = "parameterwise", method = "jackknife",
                  join=list(c("age.1", "age.2")))

# define new data for which prediction is requested
# newdatref is the new data set for the reference observation
age <- 30:75
newdat <- data.frame(age = age, enodes = 1, prm = 1, tumgrad1 = 0,
                     htreat = factor(1))
newdatref <- data.frame(age = 50, enodes = 1, prm = 1, tumgrad1 = 0,
                        htreat = factor(1))


xaxis <- seq(from = min(age), to = max(age), by = 5)


# pdf("plotgbsg.pdf", width = 6, height = 6)
par(fig = c(0, 1, 0, 0.3), new = FALSE, oma = c(0, 0, 0.5, 0.5),
    mar = c(3.5, 4, 0, 0))
hist(GBSG$age[(GBSG$age >= min(age)) & (GBSG$age <= max(age))],
     breaks = seq(from = min(age), to = max(age), by = 2.5), main = "",
     xlim = range(age), xaxs = "r", yaxs = "r", xlab = "", ylab = "",
     axes = FALSE, col = "gray88")
axis(side = 1, at = xaxis)
axis(side = 2, at = seq(from = 0, to = 60, by = 20), las = 2)
mtext(side = 1, text = "Age", line = 2.3)
mtext(side = 2, text = "Frequency", line = 2.8)
box()

par(fig = c(0,1,0.3,1), new = TRUE, oma = c(0, 0, 0.5, 0.5),
    mar = c(1, 4, 0, 0))
plot(range(age), c(-0.05, 0.8), xlab = "", ylab = "", type = "n", yaxs = "i",
     xaxt = "n", yaxt = "n")
axis(side = 1, at = xaxis, labels = rep("", times = length(xaxis)))
axis(side = 2, at = seq(from = 0, to = 0.7, by = 0.1), las = 2,
     labels = seq(from = 0, to = 0.7, by = 0.1))
mtext(side = 2, text = "Log hazard relative to 50 years", line = 2.8)
lines(x = age, y = predict(fitg, newdata = newdat, type = "lp") -
        predict(fitg, newdata = newdatref, type = "lp"), lty = 1, col = "gray25",
      lwd = 2)
lines(x = age, y = predict(globalsf, newdata = newdat, type = "lp") -
        predict(globalsf, newdata = newdatref, type = "lp"), lty = 5,
      col = "forestgreen", lwd = 2)
lines(x = age, y = predict(pwsf, newdata = newdat, type="lp") -
        predict(pwsf, newdata = newdatref, type = "lp"), lty = 2,
      col = "dodgerblue3", lwd = 2)
lines(x = age, y = predict(jointsf, newdata = newdat, type = "lp") -
        predict(jointsf, newdata = newdatref, type = "lp"), lty = 4,
      col = "firebrick3", lwd = 2)

legend("topright", inset = 0.02, bty = "n", lty = c(1, 5, 4, 2),
       title = "SHRINKAGE",
       legend = c("No", "Global", "Joint", "Parameterwise"),
       col = c("gray25", "forestgreen", "firebrick3", "dodgerblue3"), lwd = 2,
       text.col = c("gray25", "forestgreen", "firebrick3", "dodgerblue3"))

# dev.off()

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.