The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
GWASinlps performs Bayesian non-local prior based iterative variable selection for data from genome-Wide association studies (GWAS), or other high-dimensional data with continuous, binary or survival outcomes (see References below).
install.packages("GWASinlps")
# install.packages("devtools")
::install_github("nilotpalsanyal/GWASinlps") devtools
GWASinlps()
is the main function which accepts
continuous or binary data (such as phenotype data) and a matrix with the
independent variable values (SNP genotypes). The function also needs as
input values for scaling parameter of the selected non-local prior and
the tuning paramters. These should be fixed based on exploratory study
and/or subject-specific heuristics. For example, in GWAS analysis, as
the GWAS effect sizes are generally very small (typical effect size of a
SNP is around 0.05% of the total phenotypic variance for quantitative
traits), the scaling parameter can be chosen such that the non-local
prior allows at least 1% chance of a standardized effect size being 0.05
or less in absolute value. Such estimates of the scaling parameter for
the MOM and iMOM priors are 0.022 and 0.008, respectively.
Here is a simple illistration of the use the GWASinlps()
function for both continous and binary phenotypes.
library(GWASinlps)
#> Loading required package: mombf
#> Loading required package: mvtnorm
#> Loading required package: ncvreg
#> Loading required package: mgcv
#> Loading required package: nlme
#> This is mgcv 1.8-40. For overview type 'help("mgcv-package")'.
#>
#> Welcome to GWASinlps! Select well.
#>
#> Website: https://nilotpalsanyal.github.io/GWASinlps/
#> Bug report: https://github.com/nilotpalsanyal/GWASinlps/issues
# Generate design matrix (genotype matrix)
= 200 #number of subjects
n = 10000 #number of variables/SNPs
p = 10 #number of true variables/causal SNPs
m set.seed(1)
= runif( p, .1, .2 ) #simulate minor allele frequency
f = matrix(nrow = n, ncol = p)
x for(j in 1:p)
= rbinom(n, 2, f[j]) #simulate genotypes
x[,j] colnames(x) = 1:p
# Generate true effect sizes
= sample(1:p, m)
causal_snps = rep(0, p)
beta = rnorm(m, mean = 0, sd = 2 )
beta[causal_snps]
# Generate continuous (phenotype) data
= x %*% beta + rnorm(n, 0, 1)
y
# GWASinlps analysis
<- GWASinlps(y=y, x=x, family="normal", prior="mom", tau=0.2,
inlps k0=1, m=50, rxx=0.2)
#> =================================
#> Number of selected variables: 9
#> Time taken: 0.04 min
#> =================================
# LASSO analysis
library(glmnet)
#> Loading required package: Matrix
#> Loaded glmnet 4.1-4
= cv.glmnet( x, y, alpha = 1 )
fit.cvlasso = fit.cvlasso $lambda.min # lambda that gives minimum cvm
l.min .1se = fit.cvlasso $lambda.1se # largest lambda such that error is
l# within 1 se of the minimum
= which( as.vector( coef( fit.cvlasso, s = l.min ) )[-1] != 0 )
lasso_min = which( as.vector( coef( fit.cvlasso, s = l.1se ) )[-1] != 0 )
lasso_1se
# Compare results
library(kableExtra)
= matrix(nrow=3,ncol=3)
res 1,] = c(length(inlps$selected), length(intersect(inlps$selected, causal_snps)), length(setdiff(causal_snps, inlps$selected)) )
res[2,] = c(length(lasso_min), length(intersect(lasso_min, causal_snps)), length(setdiff(causal_snps, lasso_min)))
res[3,] = c(length(lasso_1se), length(intersect(lasso_1se, causal_snps)), length(setdiff(causal_snps, lasso_1se)))
res[colnames(res) = c("#Selected SNPs","#True positive","#False negative")
rownames(res) = c("GWASinlps", "LASSO min", "LASSO 1se")
::kable(res, format="html",
kableExtratable.attr= "style='width:60%;'",
caption=paste("<center>Variable selection from", p, " SNPs with", m, " causal SNPs for continuous phenotypes from", n, " subjects</center>"),
escape=FALSE) %>%
::kable_styling() kableExtra
#Selected SNPs | #True positive | #False negative | |
---|---|---|---|
GWASinlps | 9 | 8 | 2 |
LASSO min | 190 | 8 | 2 |
LASSO 1se | 44 | 8 | 2 |
library(GWASinlps)
library(fastglm)
#> Loading required package: bigmemory
# Generate design matrix (genotype matrix)
= 500 #number of subjects
n = 2000 #number of variables/SNPs
p = 10 #number of true variables/SNPs
m set.seed(1)
= runif( p, .1, .2 ) #simulate minor allele frequency
f = matrix(nrow = n, ncol = p)
x for(j in 1:p)
= rbinom(n, 2, f[j]) #simulate genotypes
x[,j] colnames(x) = 1:p
# Generate true effect sizes
= sample(1:p, m)
causal_snps = rep(0, p)
beta = rnorm(m, mean = 0, sd = 2 )
beta[causal_snps]
# Generate binary (phenotype) data
= exp(x %*% beta)/(1 + exp(x %*% beta))
prob = sapply(1:n, function(i)rbinom(1,1,prob[i]) )
y
# GWASinlps analysis
mode(x) = "double" #needed for fastglm() function below
= apply( x, 2, function(z) coef( fastglm(y=y,
mmle_xy x=cbind(1,matrix(z)), family = binomial(link = "logit")) )[2] )
#pre-compute MMLEs of betas as it takes time
<- GWASinlps(y=y, x=x, family="binomial", method="rigorous",
inlps_rigorous mmle_xy=mmle_xy, prior="mom", tau=0.2, k0=1, m=50, rxx=0.2)
#> =================================
#> Number of selected variables: 4
#> Time taken: 0.33 min
#> =================================
<- GWASinlps(y=y, x=x, family="binomial", method="quick",
inlps_quick mmle_xy=mmle_xy, prior="mom", tau=0.2, k0=1, m=50, rxx=0.2)
#> =================================
#> Number of selected variables: 8
#> Time taken: 0 min
#> =================================
# Lasso analysis
library(glmnet)
= cv.glmnet( x, y, family = "binomial", alpha = 1 )
fit.cvlasso = fit.cvlasso $lambda.min # lambda that gives minimum cvm
l.min .1se = fit.cvlasso $lambda.1se # largest lambda such that error is
l# within 1 se of the minimum
= which( as.vector( coef( fit.cvlasso, s = l.min ) )[-1] != 0 )
lasso_min = which( as.vector( coef( fit.cvlasso, s = l.1se ) )[-1] != 0 )
lasso_1se
# Compare results
library(kableExtra)
= matrix(nrow=4,ncol=3)
res 1,] = c(length(inlps_rigorous$selected), length(intersect(inlps_rigorous$selected, causal_snps)), length(setdiff(causal_snps, inlps_rigorous$selected)) )
res[2,] = c(length(inlps_quick$selected), length(intersect(inlps_quick$selected, causal_snps)), length(setdiff(causal_snps, inlps_quick$selected)) )
res[3,] = c(length(lasso_min), length(intersect(lasso_min, causal_snps)), length(setdiff(causal_snps, lasso_min)))
res[4,] = c(length(lasso_1se), length(intersect(lasso_1se, causal_snps)), length(setdiff(causal_snps, lasso_1se)))
res[colnames(res) = c("#Selected SNPs","#True positive","#False negative")
rownames(res) = c("GWASinlps rigorous", "GWASinlps quick", "LASSO min", "LASSO 1se")
::kable(res, format="html",
kableExtratable.attr= "style='width:60%;'",
caption=paste("<center>Variable selection from", p, " SNPs with", m, " causal SNPs for binary phenotypes from", n, " subjects</center>"),
escape=FALSE) %>%
::kable_styling() kableExtra
#Selected SNPs | #True positive | #False negative | |
---|---|---|---|
GWASinlps rigorous | 4 | 4 | 6 |
GWASinlps quick | 8 | 4 | 6 |
LASSO min | 20 | 5 | 5 |
LASSO 1se | 6 | 4 | 6 |
Nilotpal Sanyal, Min-Tzu Lo, Karolina Kauppi, Srdjan Djurovic, Ole A. Andreassen, Valen E. Johnson, and Chi-Hua Chen. “GWASinlps: non-local prior based iterative SNP selection tool for genome-wide association studies.” Bioinformatics 35, no. 1 (2019): 1-11. https://doi.org/10.1093/bioinformatics/bty472
Nilotpal Sanyal. “Iterative variable selection for high-dimensional data with binary outcomes.” arXiv preprint arXiv:2211.03190 (2022). https://arxiv.org/abs/2304.11902
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.