The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
A small example using the smoking data set, containing normalized transcript measurements for 51 subjects (23 “never-smoked” and 34 “smokers”) and 22283 transcripts of lung tissue. See Spira et al. (2004)
I have removed that data set temporarily until I solve a problem with upload the package to CRAN
library(ranger)
rf1 <-ranger(y=y.numeric ,x=smoking_data,importance="impurity",seed=123, num.trees = 10000,
classification=TRUE)
t2 <-count_variables(rf1)
imp<-log(rf1$variable.importance)
#png("./supp_figures/smoking_log_importances.png")
plot(density(imp),xlab="log importances",main="")
#dev.off()
# starting from a randomForest model
library(RFlocalfdr)
library(randomForest)
sert.seed(123)
rf2 <-randomForest(y=factor(y.numeric) ,x=smoking_data,importance=TRUE, ntree = 10000)
imp.rf2 <- log(rf2$importance[,"MeanDecreaseGini"])
t2.rf2 <-varUsed(rf2)
plot(density(imp.rf2),xlab="log importances",main="")
cutoffs <- c(2,3,4,5)
res.con<- determine_cutoff(imp.rf2,t2.rf2 ,cutoff=cutoffs,plot=c(2,3,4,5))
plot(cutoffs,res.con[,3],pch=15,col="red",cex=1.5,ylab="max(abs(y - t1))")
cutoffs[which.min(res.con[,3])]
#2
A small simulated data set. Each band contains blocks of size {1, 2, 4, 8, 16, 32, 64}, and each block consists of correlated (identical variables).
See @ref(fig:log_importances) for the log importances. They are clearly multimodal and we try to determine a cutoff so that we are left with a unimodal distribution.
cutoffs <- c(2,3,4,5)
#png("./supp_figures/smoking_data_determine_cutoff.png")
res.con<- determine_cutoff(imp,t2,cutoff=cutoffs,plot=c(2,3,4,5))
#dev.off()
#png("./supp_figures/smoking_data_determine_cutoffs_2.png")
plot(cutoffs,res.con[,3],pch=15,col="red",cex=1.5,ylab="max(abs(y - t1))")
#dev.off()
cutoffs[which.min(res.con[,3])]
We select a cutoff of 3 and fit the RFlocalfdr model
temp<-imp[t2 > 3]
temp <- temp - min(temp) + .Machine$double.eps
qq <- plotQ(temp,debug.flag = 1)
ppp<-run.it.importances(qq,temp,debug.flag = 0)
#png("./supp_figures/smoking_significant_genes.png")
#aa<-significant.genes(ppp,temp,cutoff=0.05,do.plot=1)
aa<-significant.genes(ppp,temp,cutoff=0.05,debug.flag=0,do.plot=TRUE,use_95_q=TRUE)
#dev.off()
length(aa$probabilities) # 17
aa<-significant.genes(ppp,temp,cutoff=0.05,debug.flag=0,do.plot=TRUE,use_95_q=FALSE)
length(aa$probabilities) # 19
The option do.plot=1 returns a plot containing the
The option do.plot=2 returns the same plot with the addition of
devtools::install_github("parsifal9/RFlocalfdr", build_vignettes = TRUE, force = TRUE)
library(RFlocalfdr)
data(smoking)
?smoking
y<-smoking$y
smoking_data<-smoking$rma
y.numeric <-ifelse((y=="never-smoked"),0,1)
library(ranger)
rf1 <-ranger(y=y.numeric ,x=smoking_data,importance="impurity",seed=123, num.trees = 10000,
classification=TRUE)
t2 <-count_variables(rf1)
imp<-log(rf1$variable.importance)
#png("./supp_figures/smoking_log_importances.png")
plot(density(imp),xlab="log importances",main="")
#dev.off()
cutoffs <- c(2,3,4,5)
#png("./supp_figures/smoking_data_determine_cutoff.png")
res.con<- determine_cutoff(imp,t2,cutoff=cutoffs,plot=c(2,3,4,5))
#dev.off()
#png("./supp_figures/smoking_data_determine_cutoffs_2.png")
plot(cutoffs,res.con[,3],pch=15,col="red",cex=1.5,ylab="max(abs(y - t1))")
#dev.off()
cutoffs[which.min(res.con[,3])]
temp<-imp[t2 > 3]
temp <- temp - min(temp) + .Machine$double.eps
qq <- plotQ(temp,debug.flag = 1)
ppp<-run.it.importances(qq,temp,debug.flag = 0)
#png("./supp_figures/smoking_significant_genes.png")
#aa<-significant.genes(ppp,temp,cutoff=0.05,do.plot=1)
aa<-significant.genes(ppp,temp,cutoff=0.05,debug.flag=0,do.plot=TRUE,use_95_q=TRUE)
#dev.off()
length(aa$probabilities) # 17 -- Roc gets 30
aa<-significant.genes(ppp,temp,cutoff=0.05,debug.flag=0,do.plot=TRUE,use_95_q=FALSE)
length(aa$probabilities) # 19-- Roc gets 30
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.