The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
In this tutorial, we show that the alternate maximization (AM) is used in the first step of the two-step estimation method and the information criterion (IC) method is adopted to choose the number of factors.
The package can be loaded with the command:
First, we generate the data with homogeneous normal variables.
Then, we set the algorithm parameters and fit model
# Obtain the observed data
XList <- dat$XList # this is the data in the form of matrix list.
str(XList)
X <- dat$X # this is the data in form of matrix
# set variables' type, 'gaussian' means there is continous variable type.
types <- 'gaussian'
Third, we fit the GFM model with user-specified number of factors.
# specify q=2
gfm1 <- gfm(XList, types, algorithm="AM", q=2, verbose = FALSE)
# measure the performance of GFM estimators in terms of canonical correlations
measurefun(gfm1$hH, dat$H0, type='ccor')
measurefun(gfm1$hB, dat$B0, type='ccor')
The number of factors can also be determined by data-driven manners.
First, we generate the data with heterogeous normal variables and set the parameters of algorithm.
dat <- gendata(seed=1, n=100, p=100, type='heternorm', q=2, rho=1)
# Obtain the observed data
XList <- dat$XList # this is the data in the form of matrix list.
str(XList)
X <- dat$X # this is the data in form of matrix
# set variables' type, 'gaussian' means there is continous variable type.
types <- 'gaussian'
Third, we fit the GFM model with user-specified number of factors and compare the results with that of linear factor models.
# specify q=2
gfm1 <- gfm(XList, types, algorithm="AM", q=2, verbose = FALSE)
# measure the performance of GFM estimators in terms of canonical correlations
corH_gfm <- measurefun(gfm1$hH, dat$H0, type='ccor')
corB_gfm <- measurefun(gfm1$hB, dat$B0, type='ccor')
lfm1 <- Factorm(X, q=2)
corH_lfm <- measurefun(lfm1$hH, dat$H0, type='ccor')
corB_lfm <- measurefun(lfm1$hB, dat$B0, type='ccor')
library(ggplot2)
df1 <- data.frame(CCor= c(corH_gfm, corH_lfm, corB_gfm, corB_lfm),
Method =factor(rep(c('GFM', "LFM"), times=2)),
Quantity= factor(c(rep('factors',2), rep("loadings", 2))))
ggplot(data=df1, aes(x=Quantity, y=CCor, fill=Method)) + geom_bar(position = "dodge", stat="identity",width = 0.5)
The number of factors can also be determined by data-driven manners.
First, we generate the data with Count(Poisson) variables and set the parameters of algorithm.
q <- 3; p <- 200
dat <- gendata(seed=1, n=200, p=p, type='pois', q=q, rho=4)
# Obtain the observed data
XList <- dat$XList # this is the data in the form of matrix list.
str(XList)
X <- dat$X # this is the data in form of matrix
# set variables' type, 'gaussian' means there is continous variable type.
types <- 'poisson'
Second, we we fit the GFM models given the true number of factors.
system.time(
hq <- chooseFacNumber(XList, types, q_set=1:6, select_method = "IC", parallelList=list(parallel=TRUE))
)
Third, we compare the results with that of linear factor models.
# measure the performance of GFM estimators in terms of canonical correlations
corH_gfm <- measurefun(gfm1$hH, dat$H0, type='ccor')
corB_gfm <- measurefun(gfm1$hB, dat$B0, type='ccor')
lfm1 <- Factorm(X, q=3)
corH_lfm <- measurefun(lfm1$hH, dat$H0, type='ccor')
corB_lfm <- measurefun(lfm1$hB, dat$B0, type='ccor')
library(ggplot2)
df1 <- data.frame(CCor= c(corH_gfm, corH_lfm, corB_gfm, corB_lfm),
Method =factor(rep(c('GFM', "LFM"), times=2)),
Quantity= factor(c(rep('factors',2), rep("loadings", 2))))
ggplot(data=df1, aes(x=Quantity, y=CCor, fill=Method)) + geom_bar(position = "dodge", stat="identity",width = 0.5)
First, we generate the data with Count(Poisson) variables and set the parameters of algorithm. Then fit the GFM model with user-specified number of factors.
dat <- gendata(seed=1, n=200, p=200, type='pois_bino', q=2, rho=2)
# Obtain the observed data
XList <- dat$XList # this is the data in the form of matrix list.
str(XList)
X <- dat$X # this is the data in form of matrix
# set variables' type, 'gaussian' means there is continous variable type.
types <- dat$types
table(dat$X[,1])
table(dat$X[, 200])
# user-specified q=2
gfm2 <- gfm(XList, types, algorithm="AM", q=2, verbose = FALSE)
measurefun(gfm2$hH, dat$H0, type='ccor')
measurefun(gfm2$hB, dat$B0, type='ccor')
Third, we compare the results with that of linear factor models.
# select q automatically
hq <- chooseFacNumber(XList, types, select_method='IC', q_set = 1:4, verbose = FALSE, parallelList=list(parallel=TRUE))
# measure the performance of GFM estimators in terms of canonical correlations
corH_gfm <- measurefun(gfm2$hH, dat$H0, type='ccor')
corB_gfm <- measurefun(gfm2$hB, dat$B0, type='ccor')
Compare with linear factor models
lfm1 <- Factorm(dat$X, q=3)
corH_lfm <- measurefun(lfm1$hH, dat$H0, type='ccor')
corB_lfm <- measurefun(lfm1$hB, dat$B0, type='ccor')
library(ggplot2)
df1 <- data.frame(CCor= c(corH_gfm, corH_lfm, corB_gfm, corB_lfm),
Method =factor(rep(c('GFM', "LFM"), times=2)),
Quantity= factor(c(rep('factors',2), rep("loadings", 2))))
ggplot(data=df1, aes(x=Quantity, y=CCor, fill=Method)) + geom_bar(position = "dodge", stat="identity",width = 0.5)
sessionInfo()
#> R version 4.1.2 (2021-11-01)
#> Platform: x86_64-w64-mingw32/x64 (64-bit)
#> Running under: Windows 10 x64 (build 22621)
#>
#> Matrix products: default
#>
#> locale:
#> [1] LC_COLLATE=C
#> [2] LC_CTYPE=Chinese (Simplified)_China.936
#> [3] LC_MONETARY=Chinese (Simplified)_China.936
#> [4] LC_NUMERIC=C
#> [5] LC_TIME=Chinese (Simplified)_China.936
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> loaded via a namespace (and not attached):
#> [1] digest_0.6.29 R6_2.5.1 jsonlite_1.8.0 magrittr_2.0.3
#> [5] evaluate_0.15 stringi_1.7.6 rlang_1.1.0 cli_3.2.0
#> [9] rstudioapi_0.13 jquerylib_0.1.4 bslib_0.3.1 rmarkdown_2.11
#> [13] tools_4.1.2 stringr_1.4.0 xfun_0.29 yaml_2.3.6
#> [17] fastmap_1.1.0 compiler_4.1.2 htmltools_0.5.2 knitr_1.37
#> [21] sass_0.4.1
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.