The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
You can install the released version of ConformalSmallest from CRAN with:
install.packages("ConformalSmallest")
Or directly from github
::install_github("Elsa-Yang98/ConformalSmallest") devtools
This is a basic example which shows you how to solve a common problem:
library(ConformalSmallest)
## basic example code
library(glmnet)
library(MASS)
library(mvtnorm)
source("ginverse.fun.R")
source("functions.R")
=paste("linear_fm_t3",sep="")
name
<- 3 #degrees of freedom
df <- 60 #number of dimensions
l <- 100
l.lambda <- seq(0,200,l=l.lambda)
lambda_seq <- round(seq(5,300,l=l))
dim <- 0.1
alpha <- 200 #number of training samples
n <- 100 #number of prediction points
n0 <- 100 #number of independent trials
nrep <- 0.5
rho
<- len.efcp <- matrix(0,nrep,l)
cov.efcp <- len.vfcp <- matrix(0,nrep,l)
cov.vfcp <- len.naive <- matrix(0,nrep,l)
cov.naive <- len.param <- matrix(0,nrep,l)
cov.param <- len.star <- matrix(0,nrep,l)
cov.star <- len.cv10 <- matrix(0,nrep,l)
cov.cv10 <- len.cv5 <- matrix(0,nrep,l)
cov.cv5 <- len.cvloo <- matrix(0,nrep,l)
cov.cvloo
<- out.efcp.lo <- matrix(0,n0,l)
out.efcp.up <- out.vfcp.lo <- matrix(0,n0,l)
out.vfcp.up <- out.naive.lo <- matrix(0,n0,l)
out.naive.up <- out.param.lo <- matrix(0,n0,l)
out.param.up <- out.star.lo <- matrix(0,n0,l)
out.star.up <- out.cv10.lo <- matrix(0,n0,l)
out.cv10.up <- out.cv5.lo <- matrix(0,n0,l)
out.cv5.up <- out.cvloo.lo <- matrix(0,n0,l)
out.cvloo.up
for(i in 1:nrep){
cat(i,"\n")
for (r in 1:l){
<- dim[r]
d set.seed(i)
<- matrix(rho,d,d)
Sigma diag(Sigma) <- rep(1,d)
<- rmvt(n,Sigma,df) #multivariate t distribution
X <- rep(1:5,d/5)
beta <- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
eps <- X%*%beta+eps
Y
<- rmvt(n0,Sigma,df)
X0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
eps0 <- X0%*%beta+eps0
Y0
<- ginverse.fun(X,Y,X0,alpha=alpha)
out.param <- out.param$lo
out.param.lo[,r] <- out.param$up
out.param.up[,r] <- mean(out.param.lo[,r] <= Y0 & Y0 <= out.param.up[,r])
cov.param[i,r] <- mean(out.param.up[,r]-out.param.lo[,r])
len.param[i,r]
<- efcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.efcp <- out.efcp$up
out.efcp.up[,r] <- out.efcp$lo
out.efcp.lo[,r] <- mean(out.efcp.lo[,r] <= Y0 & Y0 <= out.efcp.up[,r])
cov.efcp[i,r] <- mean(out.efcp.up[,r]-out.efcp.lo[,r])
len.efcp[i,r]
<- vfcp_ridge(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.vfcp <- out.vfcp$up
out.vfcp.up[,r] <- out.vfcp$lo
out.vfcp.lo[,r] <- mean(out.vfcp.lo[,r] <= Y0 & Y0 <= out.vfcp.up[,r])
cov.vfcp[i,r] <- mean(out.vfcp.up[,r]-out.vfcp.lo[,r])
len.vfcp[i,r]
<- naive.fun(X,Y,X0,alpha=alpha)
out.naive <- out.naive$up
out.naive.up[,r] <- out.naive$lo
out.naive.lo[,r] <- mean(out.naive.lo[,r] <= Y0 & Y0 <= out.naive.up[,r])
cov.naive[i,r] <- mean(out.naive.up[,r]-out.naive.lo[,r])
len.naive[i,r]
<- star.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha)
out.star <- out.star$up
out.star.up[,r] <- out.star$lo
out.star.lo[,r] <- mean(out.star.lo[,r] <= Y0 & Y0 <= out.star.up[,r])
cov.star[i,r] <- mean(out.star.up[,r] - out.star.lo[,r])
len.star[i,r]
<- cv.fun(X,Y,X0,lambda=lambda_seq,alpha=alpha,nfolds=5)
out.cv5 <- out.cv5$up
out.cv5.up[,r] <- out.cv5$lo
out.cv5.lo[,r] <- mean(out.cv5.lo[,r] <= Y0 & Y0 <= out.cv5.up[,r])
cov.cv5[i,r] <- mean(out.cv5.up[,r] - out.cv5.lo[,r])
len.cv5[i,r]
}
}
<- data.frame(dim,apply(cov.param,2,mean),apply(cov.naive,2,mean),apply(cov.vfcp,2,mean),apply(cov.star,2,mean),apply(cov.cv5,2,mean), apply(cov.efcp,2,mean))
df.cov
<- data.frame(dim,apply(len.param,2,mean),apply(len.naive,2,mean),apply(len.vfcp,2,mean),apply(len.star,2,mean),apply(len.cv5,2,mean), apply(len.efcp,2,mean))
df.len
save(dim,cov.param, cov.naive, cov.vfcp, cov.star, cov.cv5, cov.efcp, file = "cov100_t3.RData" )
save(dim,len.param, len.naive, len.vfcp, len.star, len.cv5, len.efcp, file = "len100_t3.RData" )
This output the right panal of Figure 1 in our paper.
<- 3
df <- 3
d <- 100
l.lambda <- seq(0,200,l=l.lambda)
lambda_seq <- c(50,100,500,1000,5000)
nset <- 0.1 #miscoverage level
alpha <- 100 #number of prediction points
n0 <- 100 #number of independent trials
nrep <- 0.5
rho
<- expand.grid(1:nrep, nset, c("efficient", "valid"))
evaluations <- nrow(evaluations)
no_eval <- cov_mat <- data.frame(number = rep(0, no_eval),
width_mat rep = evaluations[,1],
nset = evaluations[,2],
method = evaluations[,3])
colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "method")
<- matrix(rho,d,d)
Sigma diag(Sigma) <- rep(1,d) #covariance matrix for X
for(idx in 1:nrow(evaluations)){
set.seed(evaluations[idx, 1])
if(idx%%1 == 0){
print(idx)
}<- evaluations[idx, 2]
n
<- rmvt(n,Sigma,df) #multivariate t distribution
X
<- rt(n,df)*(1+sqrt(X[,1]^2+X[,2]^2))
eps1 <- rt(n,df)*(1+sqrt(X[,1]^4+X[,2]^4))
eps2 <- rpois(n,sin(X[,1])^2 + cos(X[,2])^4+0.01 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01)*eps2
Y
<- rmvt(n0,Sigma,df)
X0 <- rt(n0,df)*(1+sqrt(X0[,1]^2+X0[,2]^2))
eps01 <- rt(n0,df)*(1+sqrt(X0[,1]^4+X0[,2]^4))
eps02 <- rpois(n0,sin(X0[,1])^2 + cos(X0[,2])^4+0.01 )+0.03*X0[,1]*eps01+25*(runif(n0,0,1)<0.01)*eps02
Y0
3] <- cov_mat[idx, 3] <- n
width_mat[idx,<- evaluations[idx, 3]
method 4] <- cov_mat[idx, 4] <- method
width_mat[idx,2] <- cov_mat[idx, 2] <- evaluations[idx, 1]
width_mat[idx,
if(method == "valid"){
<- c(1/2, 1/2)
split else {
} <- 1/2
split
}
<- seq(1e-03, 4, length = 20)*alpha
beta_grid <- unique(ceiling(seq(1/10, 1, length = 20)*d))
mtry_grid <- seq(50, 400, by = 50)
ntree_grid
<- try(conf_CQR_reg(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha))
tmp
while (class(tmp)=="try-error"){
<- try(conf_CQR_reg(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE)
tmp
}1] <- tmp$width
width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0))
cov_mat[idx,
}
par(mfrow <- c(1,2))
<- width_vfcp <- sd_width_efcp <- sd_width_vfcp <- NULL
width_efcp #sd_efcp <- sd_vfcp <- NULL
for(n in nset){
<- width_mat[evaluations[,3] == "efficient", ]
TMP <- TMP[TMP[,3] == n,]
TMP_prime
<- width_mat[evaluations[,3] == "valid", ]
TMP <- TMP[TMP[,3] == n,]
TMP_prime_vfcp =TMP_prime_vfcp[ TMP_prime_vfcp[,1]<=10^5,1]
TMP_prime_vfcp_clean
<- c(width_efcp, mean(TMP_prime[,1] / TMP_prime_vfcp[,1]))
width_efcp <- c(sd_width_efcp, sd(TMP_prime[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_efcp #sd_efcp = c(sd_efcp , sd(TMP_prime[,1])/sqrt(nrep) )
<- c(width_vfcp, mean(TMP_prime_vfcp[,1] / TMP_prime_vfcp[,1]))
width_vfcp <- c(sd_width_vfcp, sd(TMP_prime_vfcp[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_vfcp #sd_vfcp = c(sd_vfcp , sd(TMP_prime_vfcp[,1])/sqrt(nrep) )
}
#plot(dim, width_efcp, type = 'l', ylim = range(c(width_efcp+sd_efcp)), lwd = 2)
plot(nset, width_efcp, type = 'l', ylim =c(-10,25), lwd = 2)
lines(nset, width_efcp - sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(nset, width_efcp + sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(nset, width_vfcp, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "red")
lines(nset, width_vfcp - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(nset, width_vfcp + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
abline(h = 1)
<- cov_vfcp <-sd_cov_efcp <- sd_cov_vfcp <- NULL
cov_efcp for(n in nset){
<- cov_mat[evaluations[,3] == "efficient", ]
TMP <- TMP[TMP[,3] == n,]
TMP_prime <- c( cov_efcp, mean(TMP_prime[,1] ) )
cov_efcp <- c(sd_cov_efcp, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_efcp
<- cov_mat[evaluations[,3] == "valid", ]
TMP <- TMP[TMP[,3] == n,]
TMP_prime <- c(cov_vfcp, mean(TMP_prime[,1]))
cov_vfcp <- c(sd_cov_vfcp, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_vfcp
}plot(nset, cov_efcp, type = 'l', ylim = c(0, 1), lwd = 2)
lines(nset, cov_vfcp, type = 'l', col = "red", lwd = 2)
abline(h = 1-alpha)
save(nset,nrep,width_mat, cov_mat, evaluations, width_efcp, sd_cov_efcp, sd_width_efcp,width_vfcp, sd_cov_vfcp,sd_width_vfcp, cov_efcp, cov_vfcp, alpha, file = "pois-100-repetitions.RData" )
This output the right panal of Figure 1 in our paper.
= 3
df = 1 #x is of one dimension
d = c(400) #number of training sample
nset = seq(0,5,by=0.2) # a grid of test points for x
x_test = 0.1 #miscoverage level
alpha = 100 #number of independent trials
nrep = 100 #number of test samples y for each test prediction sample x
nrep2
<- expand.grid(1:nrep, nset, x_test, c("efficient", "valid","CQR"))
evaluations <- nrow(evaluations)
no_eval <- cov_mat <- data.frame(number = rep(0, no_eval),
width_mat rep = evaluations[,1],
nset = evaluations[,2],
X_test = evaluations[,3],
method = evaluations[,4])
colnames(width_mat) <- colnames(cov_mat) <- c("number", "rep", "sample size", "test_value","method")
for(idx in 1:nrow(evaluations)){
set.seed(evaluations[idx, 1])
if(idx%%1 == 0){
print(idx)
}<- evaluations[idx, 2]
n = evaluations[idx, 3]
x0
= as.matrix(runif(n,0,5))
X = rnorm(n)
eps1 = rnorm(n)
eps2 = rpois(n,sin(X[,1])^2 +0.1 )+0.03*X[,1]*eps1+25*(runif(n,0,1)<0.01*eps2)
Y
= as.matrix( rep(x0,nrep2) )
X0 = rnorm(nrep2)
eps01 = rnorm(nrep2)
eps02 = rpois(nrep2,sin(X0)^2 +0.1 )+0.03*X0*eps01+25*(runif(nrep2,0,1)<0.01*eps02)
Y0
3] <- cov_mat[idx, 3] <- n
width_mat[idx,<- evaluations[idx, 4]
method #width_mat[idx,5] <- cov_mat[idx, 5] <- method
#width_mat[idx, 2] <- cov_mat[idx, 2] <- evaluations[idx, 1]
if (method =="CQR"){
= 0.05
beta_fixed = 1
mtry_fixed = 100
ntree_fixed
= try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha))
tmp
while (class(tmp)=="try-error"){
= try(conf_CQR_conditional(X, Y, beta_fixed, mtry_fixed, ntree_fixed, alpha = alpha),silent=TRUE)
tmp
}1] <- mean(tmp$pred_set(X0, Y0)[[2]])
width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]])
cov_mat[idx, else{ if(method == "valid"){
}<- c(1/2, 1/2)
split else {
} <- 1/2
split
}
<- seq(1e-03, 4, length = 20)*alpha
beta_grid <- unique(ceiling(seq(1/10, 1, length = 20)*d))
mtry_grid <- seq(50, 400, by = 50)
ntree_grid
= try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha))
tmp
while (class(tmp)=="try-error"){
= try(conf_CQR_reg_conditional(X, Y, split = split, beta_grid, mtry_grid, ntree_grid, method = method, alpha = alpha),silent=TRUE)
tmp
}1] <- mean(tmp$pred_set(X0, Y0)[[2]])
width_mat[idx, 1] <- mean(tmp$pred_set(X0, Y0)[[1]])
cov_mat[idx,
}
}
par(mfrow = c(1,2))
<- sd_width_cqr <- width_efcp <- width_vfcp <- sd_width_efcp <- sd_width_vfcp <- NULL
width_cqr #sd_efcp <- sd_vfcp <- NULL
for(x in x_test){
<- width_mat[evaluations[,4] == "efficient", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime
<- width_mat[evaluations[,4] == "valid", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime_vfcp =TMP_prime_vfcp[ TMP_prime_vfcp[,1]<=10^5,1]
TMP_prime_vfcp_clean
<- width_mat[evaluations[,4] == "CQR", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime_cqr
<- c(width_efcp, mean(TMP_prime[,1] / TMP_prime_vfcp[,1]))
width_efcp <- c(sd_width_efcp, sd(TMP_prime[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_efcp #sd_efcp = c(sd_efcp , sd(TMP_prime[,1])/sqrt(nrep) )
<- c(width_vfcp, mean(TMP_prime_vfcp[,1] / TMP_prime_vfcp[,1]))
width_vfcp <- c(sd_width_vfcp, sd(TMP_prime_vfcp[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_vfcp #sd_vfcp = c(sd_vfcp , sd(TMP_prime_vfcp[,1])/sqrt(nrep) )
<- c(width_cqr, mean(TMP_prime_cqr[,1] / TMP_prime_vfcp[,1]))
width_cqr <- c(sd_width_cqr, sd(TMP_prime_cqr[,1]/ TMP_prime_vfcp[,1])/sqrt(nrep))
sd_width_cqr #sd_vfcp = c(sd_vfcp , sd(TMP_prime_vfcp[,1])/sqrt(nrep) )
}
#plot(dim, width_efcp, type = 'l', ylim = range(c(width_efcp+sd_efcp)), lwd = 2)
plot(x_test, width_efcp, type = 'l', ylim =c(-5,20), lwd = 2)
lines(x_test, width_efcp - sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(x_test, width_efcp + sd_width_efcp, type = 'l', lty = 2, lwd = 2)
lines(x_test, width_vfcp, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "red")
lines(x_test, width_vfcp - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(x_test, width_vfcp + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "red")
lines(x_test, width_cqr, type = 'l', ylim = range(c(width_efcp, width_vfcp)), lwd = 2, col = "blue")
lines(x_test, width_cqr - sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "blue")
lines(x_test, width_cqr + sd_width_vfcp, type = 'l', lty = 2, lwd = 2, col = "blue")
abline(h = 1)
<-sd_cov_cqr <-cov_efcp <- cov_vfcp <-sd_cov_efcp <- sd_cov_vfcp <- NULL
cov_cqr for(x in x_test){
<- cov_mat[evaluations[,4] == "efficient", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime <- c( cov_efcp, mean(TMP_prime[,1] ) )
cov_efcp <- c(sd_cov_efcp, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_efcp
<- cov_mat[evaluations[,4] == "valid", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime <- c(cov_vfcp, mean(TMP_prime[,1]))
cov_vfcp <- c(sd_cov_vfcp, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_vfcp
<- cov_mat[evaluations[,4] == "CQR", ]
TMP <- TMP[TMP[,4] == x,]
TMP_prime <- c(cov_cqr, mean(TMP_prime[,1]))
cov_cqr <- c(sd_cov_cqr, sd(TMP_prime[,1])/sqrt(nrep))
sd_cov_cqr
}plot(x_test, cov_efcp, type = 'l', ylim = c(0, 1), lwd = 2)
lines(x_test, cov_vfcp, type = 'l', col = "red", lwd = 2)
lines(x_test, cov_cqr, type = 'l', col = "blue", lwd = 2)
legend(0,0.2, legend=c("EFCP", "VFCP","CQR"),
col=c("black","red", "blue"), lty=1, cex=0.8)
abline(h = 1-alpha)
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.