The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

reproducibility - application

This script requires that the working directory includes the folders “data”, “results”, and “manuscript”, with the files “PPMI_Baseline_Data_02Jul2018.csv”, “PPMI_Year_1-3_Data_02Jul2018.csv” and “PPMI_Data_Dictionary_for_Baseline_Dataset_Jul2018.csv” in the folder “data”. We obtained our results using R 4.3.0 (2023-04-21) with cornet 0.0.8 (2023-06-01) on a local machine (aarch64-apple-darwin20, macOS Ventura 13.4).

Processing

# features
X <- read.csv("data/PPMI_Baseline_Data_02Jul2018.csv",row.names="PATNO",na.strings=c(".",""))
X <- X[X$APPRDX==1,] # Parkinson's disease
X[c("SITE","APPRDX","EVENT_ID","symptom5_comment")] <- NULL
100*mean(is.na(X)) # proportion missingness
#x <- mice::complete(data=mice::mice(X,m=10,maxit=5,method="pmm",seed=1),action="all") # low-dimensional
x <- lapply(seq_len(10),function(x) missRanger::missRanger(data=X,pmm.k=3,
        num.trees=100,verbose=0,seed=1)) # high-dimensional
x <- lapply(x,function(x) model.matrix(~.-1,data=x))

# outcome
Y <- read.csv("data/PPMI_Year_1-3_Data_02Jul2018.csv",na.strings=".")
Y <- Y[Y$APPRDX==1 & Y$EVENT_ID %in% c("V04","V06","V08"),]
Y <- Y[,c("EVENT_ID","PATNO","moca")]
Y <- reshape(Y,idvar="PATNO",timevar="EVENT_ID",direction="wide")
rownames(Y) <- Y$PATNO; Y$PATNO <- NULL

# overlap
names <- intersect(rownames(X),rownames(Y))
Y <- Y[names,]; x <- lapply(x,function(x) x[names,])
save(Y,x,file="data/processed_data.RData")
writeLines(text=capture.output(utils::sessionInfo(),cat("\n"),
        sessioninfo::session_info()),con="data/info_data.txt")

# dictionary
rm(list=ls())
info <- read.csv("data/PPMI_Data_Dictionary_for_Baseline_Dataset_Jul2018.csv",nrows=238)
info <- info[!info$Variable %in% c("","PATNO","SITE","APPRDX","EVENT_ID","symptom5_comment"),]
info <- paste(paste0("\\item \\textit{",info$Variable,"}: ",info$Description),collapse=" ")
info <- gsub(pattern=" \\(OFF\\)",replacement="",x=info)
info <- gsub(pattern="_",replacement="\\_",x=info,fixed=TRUE)
info <- gsub(pattern="&",replacement="\\\\&",x=info)
cat(info)

Analysis

load("data/processed_data.RData",verbose=TRUE)

colSums(!is.na(Y)) # sample size
round(100*colMeans(Y<25.5,na.rm=TRUE),1) # proportion impairment

# --- Cross-validating models. ---
names <- paste0(rep(c("lasso","ridge"),each=3),seq_len(3)) # change to 3!
loss <- fit <- p.log <- p.lin <- list()
for(i in seq_along(x)){
  loss[[i]] <- fit[[i]] <- p.log[[i]] <- p.lin[[i]] <- list()
    cat("i =",i,"\n")
    for(j in seq_along(names)){
      cat("j =",j," ")
      alpha <- 1*(substr(names[j],start=1,stop=5)=="lasso")
      index <- as.numeric(substr(names[j],start=6,stop=6))
      y <- Y[,index]
      cond <- !is.na(y)
      set.seed(i)
      loss[[i]][[names[j]]] <- cornet::cv.cornet(y=y[cond],cutoff=25.5,
                                  X=x[[i]][cond,],alpha=alpha,rf=(alpha==1),xgboost=(alpha==1))
      set.seed(i)
      fit[[i]][[names[j]]] <- cornet::cornet(y=y[cond],cutoff=25.5,
                                  X=x[[i]][cond,],alpha=alpha)
      set.seed(i)
      temp <- replicate(n=50,expr=cornet:::.test(y=y[cond],cutoff=25.5,X=x[[i]][cond,],alpha=alpha))
      p.log[[i]][[names[j]]] <- median(unlist(temp["log",]))
      p.lin[[i]][[names[j]]] <- median(unlist(temp["lin",]))
    }
    cat("\n")
}

save(loss,fit,p.log,p.lin,file="results/application.RData")
writeLines(text=capture.output(utils::sessionInfo(),cat("\n"),
        sessioninfo::session_info()),con="results/info_app.txt")

Results

load("results/application.RData",verbose=TRUE)

k <- "binomial" # compare: k <- "gaussian

names <- c(paste0("lasso",1:3),paste0("ridge",1:3))
frame <- data.frame(row.names=names)
for(i in names){

  # deviance
  dev <- as.data.frame(t(sapply(loss,function(x) x[[i]]$deviance)))
  dec <- (dev$combined-dev[[k]])/dev[[k]]
  # change in percent
  frame[i,"dev.min"] <- round(100*min(dec),digits=1)
  frame[i,"dev.max"] <- round(100*max(dec),digits=1)
  
  # proportion with improvement
  frame[i,"dev.num"] <- sum(dev$combined < dev[[k]])

  # significance based on multi-split
  if(k=="binomial"){
    pvalue <- sapply(p.log,function(x) x[[i]])
  }
  if(k=="gaussian"){
    pvalue <- sapply(p.lin,function(x) x[[i]])
  }
  frame[i,"pval.min"] <- round(min(pvalue),digits=3)
  frame[i,"pval.max"] <- round(max(pvalue),digits=3)

  # quantiles weight parameter
  q <- round(quantile(sapply(fit,function(x) x[[i]]$pi.min),probs=c(0,0.5,1)),digits=2)
  frame[i,"pi.min"] <- q[1]
  frame[i,"pi.med"] <- q[2]
  frame[i,"pi.max"] <- q[3]
  
  # quantiles scale parameter
  q <- round(quantile(sapply(fit,function(x) x[[i]]$sigma.min),probs=c(0,0.5,1)),digits=2)
  frame[i,"sd.min"] <- q[1]
  frame[i,"sd.med"] <- q[2]
  frame[i,"sd.max"] <- q[3]
}

# presentation
frame <- format(frame)
rownames(frame) <- paste0(substr(x=rownames(frame),start=1,stop=5)," ",
                          substr(x=rownames(frame),start=6,stop=6))
colnames(frame) <- gsub(pattern="dev.",replacement="\\\\delta_{\\\\text{",x=colnames(frame))
colnames(frame) <- gsub(pattern="pval.",replacement="p_{\\\\text{",x=colnames(frame))
colnames(frame) <- gsub(pattern="pi.",replacement="\\\\pi_{\\\\text{",x=colnames(frame))
colnames(frame) <- gsub(pattern="sd.",replacement="\\\\sigma_{\\\\text{",x=colnames(frame))
colnames(frame) <- paste0("$",colnames(frame),"}}$")

xtable <- xtable::xtable(frame,align="r|rrc|cc|ccc|ccc")
xtable::print.xtable(xtable,include.rownames=TRUE,sanitize.text.function=function(x) x,comment=FALSE)
load("results/application.RData",verbose=TRUE)

names <- c(paste0("lasso",1:3),paste0("ridge",1:3))
type <- c("deviance","class","mse","mae","auc","prauc")

k <- "binomial"

frame <- matrix(NA,nrow=length(names),ncol=length(type),dimnames=list(names,type))

for(i in names){
  for(j in type){
    value <- as.data.frame(t(sapply(loss,function(x) x[[i]][[j]])))
    change <- 100*(value$combined-value[[k]])/value[[k]]
    frame[i,j] <- median(change)
  }
}

frame <- format(round(frame,digits=1))
frame <- gsub(pattern=" ",replacement="+",x=frame)
colnames(frame) <- sapply(colnames(frame),function(x) switch(x,class="\\textsc{mcr}",mse="\\textsc{mse}",mae="\\textsc{mae}",auc="\\textsc{roc-auc}",prauc="\\textsc{pr-auc}",x))
colnames(frame) <- paste0("$\\Delta$",colnames(frame))
rownames(frame) <- paste0(substr(x=rownames(frame),start=1,stop=5)," ",
                          substr(x=rownames(frame),start=6,stop=6))
xtable <- xtable::xtable(frame,align="r|cccccc")
xtable::print.xtable(xtable,include.rownames=TRUE,sanitize.text.function=function(x) x,comment=FALSE)
load("results/application.RData",verbose=TRUE)

table <- numeric()
for(i in 1:3){
  lasso <- apply(sapply(loss,function(x) x[[paste0("lasso",i)]]$deviance),1,median)
  names(lasso)[names(lasso)=="binomial"] <- "logistic lasso regression"
  names(lasso)[names(lasso)=="combined"] <- "combined lasso regression"
  ridge <- apply(sapply(loss,function(x) x[[paste0("ridge",i)]]$deviance),1,median)
  names(ridge)[names(ridge)=="binomial"] <- "logistic ridge regression"
  names(ridge)[names(ridge)=="combined"] <- "combined ridge regression"
  table <- cbind(table,c(lasso[c("logistic lasso regression","combined lasso regression")],ridge[c("logistic ridge regression","combined ridge regression")],lasso["rf"],lasso["xgboost"])) 
  rownames(table)[rownames(table)=="rf"] <- "\\texttt{randomForest}"
  rownames(table)[rownames(table)=="xgboost"] <- "\\texttt{xgboost}"
}
colnames(table) <- paste("year",1:3)
rownames(table) <- paste("\\footnotesize",rownames(table))
xtable <- xtable::xtable(table)
xtable::print.xtable(xtable,comment=FALSE,hline.after=c(0,2,4),sanitize.text.function=identity)

Figures

load("results/application.RData",verbose=TRUE)
sum <- fit[[1]]$lasso1
sum$cvm <- Reduce("+",lapply(fit,function(x) x$lasso1$cvm))
sum$sigma.min <- sapply(fit,function(x) x$lasso1$sigma.min)
sum$pi.min <- sapply(fit,function(x) x$lasso1$pi.min)

grDevices::pdf("manuscript/figure_MAP.pdf",width=4,height=3)

graphics::par(mar=c(4,4,0.5,0.5))
cornet:::plot.cornet(sum)

grDevices::dev.off()
rm(list=ls())

sigma <- c(1,2,3); cutoff <- 25.5
x <- seq(from=20,to=30,length.out=100)

grDevices::pdf("manuscript/figure_TFN.pdf",width=4,height=3)

graphics::par(mar=c(4,4,0.5,0.5))
graphics::plot.new()
graphics::plot.window(xlim=range(x),ylim=c(0,1))
graphics::box()
graphics::axis(side=1)
graphics::axis(side=2)
graphics::title(xlab=expression(hat(y)),ylab=expression(Phi(hat(y),mu,sigma^2)),line=2.5)
graphics::abline(h=0.5,lty=2,col="grey")
graphics::abline(v=cutoff,lty=2,col="grey")

lty <- c(2,1,3); lwd <- c(1,1,2)
lty <- c("dashed","solid","dotted")
for(i in seq_along(sigma)){
  p <- stats::pnorm(q=x,mean=cutoff,sd=sigma[i])
  graphics::lines(x=x,y=p,lty=lty[i],lwd=lwd[i])
}

graphics::text(x=cutoff,y=0.40,labels=bquote(mu==.(cutoff)),pos=4)
legend <- sapply(sigma,function(x) as.expression(bquote(sigma == .(x))))
graphics::legend(x="topleft",legend=legend,lty=lty,bty="n",lwd=lwd)

grDevices::dev.off()

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.