The goal of SIHR is to provide inference procedures in the high-dimensional setting for (1)linear functionals (LF) and quadratic functionals (QF) in linear regression, (2)linear functional in logistic regression, (3) individual treatment effects (ITE) in linear and logistic regression.
These are basic examples which show how to solve the common high-dimensional inference problems:
library(SIHR)
Inference for linear functional in high-dimensional linear regression model
library(MASS)
100
n = 400
p = function(rho,p){
A1gen <-matrix(0,p,p)
A1=for(i in 1:p){
for(j in 1:p){
^(abs(i-j))
A1[i,j]<-rho
}
}
A1
} rep(0,p)
mu <-1:5] <- c(1:5)/5
mu[ 0.5
rho = (A1gen(rho,p))/2
Cov <- rep(0,p)
beta <-1:10] <- c(1:10)/5
beta[ MASS::mvrnorm(n,mu,Cov)
X <- X%*%beta + rnorm(n)
y = MASS::mvrnorm(1,rep(0,p),Cov)
loading <- SIHR::LF(X = X, y = y, loading = loading, intercept = TRUE)
Est =#> [1] "step is 3"
### Point esitmator
$prop.est
Est#> [,1]
#> [1,] -1.767103
### Standard error
$se
Est#> [1] 2.421901
### Confidence interval
$CI
Est#> [1] -6.513941 2.979735
### test whether the linear functional is below zero or not (1 indicates that it is above zero)
$decision
Est#> [1] 0
Individualised Treatment Effect in high-dimensional logistic regression model
100
n1 = 400
p = 100
n2 = function(rho,p){
A1gen <-matrix(0,p,p)
A1=for(i in 1:p){
for(j in 1:p){
^(abs(i-j))
A1[i,j]<-rho
}
}
A1
} rep(0,p)
mu <- 0.5
rho = (A1gen(rho,p))/2
Cov <- rep(0,p)
beta1 <-1:10] <- c(1:10)/5
beta1[ rep(0,p)
beta2 <-1:5] <- c(1:5)/10
beta2[ MASS::mvrnorm(n1,mu,Cov)
X1 <- MASS::mvrnorm(n2,mu,Cov)
X2 <- X1%*%beta1 + rnorm(n1)
y1 = X2%*%beta2 + rnorm(n2)
y2 = MASS::mvrnorm(1,rep(0,p),Cov)
loading <- SIHR::ITE(X1 = X1, y1 = y1, X2 = X2, y2 = y2,loading = loading, intercept = TRUE)
Est <-#> [1] "step is 3"
#> [1] "step is 3"
### Point esitmator
$prop.est
Est#> [,1]
#> [1,] 5.481873
### Standard error
$se
Est#> [1] 2.519672
### Confidence interval
$CI
Est#> [1] 0.5434056 10.4203400
### test whether the linear ITE is below zero or not (1 indicates that it is above zero)
$decision
Est#> [1] 1
Inference for linear functional in high-dimensional logistic regression model
library(MASS)
function(rho,p){
A1gen <-matrix(0,p,p)
A1=for(i in 1:p){
for(j in 1:p){
^(abs(i-j))
A1[i,j]<-rho
}
}
A1
} 100
n = 400
p = rep(0,p)
mu <- 0.5
rho = (A1gen(rho,p))/2
Cov <- rep(0,p)
beta <-1:10] <-0.5*c(1:10)/10
beta[ MASS::mvrnorm(n,mu,Cov)
X <- X%*%beta
exp_val <- exp(exp_val)/(1+exp(exp_val))
prob <- rbinom(n,1,prob)
y <- MASS::mvrnorm(1,mu,Cov)
loading <- SIHR::LF_logistic(X = X, y = y, loading = loading, weight = rep(1,n), trans = TRUE)
Est =#> [1] "step is 3"
### trans = TRUE implies target quantity is the case probability
### Point esitmator
$prop.est
Est#> [1] 0.7980198
### Standard error
$se
Est#> [1] 0.4963564
### Confidence interval
$CI
Est#> [1] 0.009362659 0.999394921
### test whether the case probability is below 0.5 or not (1 indicates that it is above 0.5)
$decision
Est#> [1] 0
Individualised Treatment Effect in high-dimensional logistic model
function(rho,p){
A1gen <-matrix(0,p,p)
A1=for(i in 1:p){
for(j in 1:p){
^(abs(i-j))
A1[i,j]<-rho
}
}
A1
} 100
n1 = 100
n2 = 400
p = rep(0,p)
mu <- 0.5
rho = (A1gen(rho,p))/2
Cov <- rep(0,p)
beta1 <-1:10] <- c(1:10)/5
beta1[ rep(0,p)
beta2 <-1:5] <- c(1:5)/10
beta2[ MASS::mvrnorm(n1,mu,Cov)
X1 <- MASS::mvrnorm(n2,mu,Cov)
X2 <- X1%*%beta1
exp_val1 <- X2%*%beta2
exp_val2 <- exp(exp_val1)/(1+exp(exp_val1))
prob1 <- exp(exp_val2)/(1+exp(exp_val2))
prob2 <- rbinom(n1,1,prob1)
y1 <- rbinom(n2,1,prob2)
y2 <- MASS::mvrnorm(1,mu,Cov)
loading <- SIHR::ITE_Logistic(X1 = X1, y1 = y1, X2 = X2, y2 = y2,loading = loading, weight = NULL, trans = FALSE)
Est <-#> [1] "step is 3"
#> [1] "step is 3"
### trans = FALSE implies target quantity is the difference between two linear combinations of the regression coefficients
### Point esitmator
$prop.est
Est#> [1] -1.936224
### Standard error
$se
Est#> [1] 7.925353
### Confidence interval
$CI
Est#> [1] -17.46963 13.59718
### test whether the first case probability is smaller than the second case probability or not (1 indicates that the first case probability is larger than the second case probability)
$decision
Est#> [1] 0
Inference for quadratic functional in high-dimensional linear model
library(MASS)
function(rho,p){
A1gen <-matrix(0,p,p)
A1=for(i in 1:p){
for(j in 1:p){
^(abs(i-j))
A1[i,j]<-rho
}
}
A1
} 0.6
rho = (A1gen(rho,400))
Cov <- rep(0,400)
mu <-1:5] <- c(1:5)/5
mu[ rep(0,400)
beta <-25:50] <- 0.08
beta[ MASS::mvrnorm(100,mu,Cov)
X <- X%*%beta + rnorm(100)
y <- c(30:100)
test.set <-
## Inference for Quadratic Functional with Population Covariance Matrix in middle
SIHR::QF(X = X, y = y, G=test.set)
Est =#> [1] "step is 5"
### Point esitmator
$prop.est
Est#> [,1]
#> [1,] 0.624398
### Standard error
$se
Est#> [1] 0.1384537
### Confidence interval
$CI
Est#> [,1] [,2]
#> [1,] 0.3530337 0.8957623
### test whether the quadratic form is equal to zero or not (1 indicates that it is above zero)
$decision
Est#> [1] 1
## Inference for Quadratic Functional with known matrix A in middle
SIHR::QF(X = X, y = y, G=test.set, Cov.weight = FALSE, A = diag(1:400,400))
Est =#> [1] "step is 3"
### Point esitmator
$prop.est
Est#> [,1]
#> [1,] 19.75794
### Standard error
$se
Est#> [1] 2.622109
### Confidence interval
$CI
Est#> [,1] [,2]
#> [1,] 14.6187 24.89717
### test whether the quadratic form is equal to zero or not (1 indicates that it is above zero)
$decision
Est#> [1] 1
## Inference for square norm of regression vector
SIHR::QF(X = X, y = y, G=test.set, Cov.weight = FALSE, A = diag(ncol(X)))
Est =#> [1] "step is 3"
### Point esitmator
$prop.est
Est#> [,1]
#> [1,] 0.360126
### Standard error
$se
Est#> [1] 0.112145
### Confidence interval
$CI
Est#> [,1] [,2]
#> [1,] 0.1403258 0.5799262
### test whether the quadratic form is equal to zero or not (1 indicates that it is above zero)
$decision
Est#> [1] 1
Finding projection direction in high dimensional linear regression
100
n = 400
p = matrix(sample(-2:2,n*p,replace = TRUE),nrow = n,ncol = p)
X = 1.5
resol = 3
step =
## Finding Projection Direction using fixed tuning parameter
SIHR::Direction_fixedtuning(X,loading=c(1,rep(0,(p-1))),mu=sqrt(2.01*log(p)/n)*resol^{-(step-1)})
Direction.est <-
### First 20 entries of the projection vector
$proj[1:20]
Direction.est#> [1] 7.493488e-01 2.758068e-21 -2.398941e-21 1.027806e-21 4.330324e-21
#> [6] 3.035517e-21 -1.590560e-21 2.899092e-21 -6.195319e-23 -1.803635e-22
#> [11] 1.748848e-21 2.673834e-21 2.035823e-21 -4.738669e-03 3.414678e-21
#> [16] 1.103289e-01 1.375374e-21 6.532494e-21 -1.950295e-23 -5.915719e-22
## Finding Projection Direction using best step size
SIHR::Direction_searchtuning(X,loading=c(1,rep(0,(p-1))))
Direction.est <-
### First 20 entries of the projection vector
$proj[1:20]
Direction.est#> [1] 7.493488e-01 2.738066e-21 -2.402517e-21 1.026739e-21 4.308531e-21
#> [6] 2.972226e-21 -1.576492e-21 2.851612e-21 -9.986260e-23 -2.031438e-22
#> [11] 1.708582e-21 2.653890e-21 2.007360e-21 -4.738669e-03 3.339736e-21
#> [16] 1.103289e-01 1.349339e-21 6.434696e-21 -5.231257e-23 -5.845769e-22
Finding projection direction in high dimensional logistic regression
50
n = 400
p = matrix(sample(-2:2,n*p,replace = TRUE),nrow=n,ncol=p)
X = rbinom(n,1,0.5)
y = 1/sqrt((1/n)*diag(t(X)%*%X)+0.0001);
col.norm <- X %*% diag(col.norm);
Xnor <- glmnet::cv.glmnet(Xnor, y, alpha=1,family = "binomial")
fit = as.vector(coef(fit, s = "lambda.min"))
htheta <-abs(htheta)>0.001)
support<-( cbind(rep(1,n),Xnor);
Xb <- cbind(rep(1,n),X);
Xc <- c(1,col.norm);
col.norm <- (p+1);
pp <- c(1,rep(0,(p-1)))
xnew =rep(0,pp)
loading=1]=1
loading[-1]=xnew
loading[ htheta*col.norm;
htheta <- as.vector(htheta)
htheta <- exp(Xc%*%htheta)/(1+exp(Xc%*%htheta))^2
f_prime <-
## Finding Projection Direction using fixed tuning parameter
SIHR::Direction_fixedtuning(X,loading=c(1,rep(0,(p-1))),mu=sqrt(2.01*log(p)/n)*resol^{-(step-1)},model = "logistic",weight = 1/f_prime, deriv.vec = f_prime)
Direction.est <-
### First 20 entries of the projection vector
$proj[1:20]
Direction.est#> [1] 6.743570e-01 -4.409032e-22 -4.296854e-22 -2.577536e-02 4.578960e-23
#> [6] 4.170592e-03 1.148916e-21 1.592058e-23 1.002686e-22 -9.457976e-22
#> [11] 2.069458e-22 -2.019592e-22 -5.011356e-23 3.910995e-22 7.806522e-23
#> [16] -2.753958e-22 -5.655215e-22 -1.000020e-21 3.232662e-22 -6.223914e-22
## Finding Projection Direction using best step size
SIHR::Direction_searchtuning(Xc,loading,model = "logistic",weight = 1/f_prime, deriv.vec = f_prime)
Direction.est <-
### First 20 entries of the projection vector
$proj[1:20]
Direction.est#> [1] 9.090972e-01 6.073521e-01 -1.350587e-20 -6.486011e-21 -8.931221e-21
#> [6] -4.727004e-21 4.972967e-02 -2.836570e-20 -2.354470e-21 -2.761754e-21
#> [11] -7.338999e-21 3.331681e-21 5.396664e-21 -1.549780e-20 5.381605e-21
#> [16] 9.408748e-21 1.730183e-01 1.052713e-20 9.447765e-21 -5.530575e-02