The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Measurement System: Two-Factor CFA

Greg Veramendi

Overview

This vignette shows how to specify and estimate a two-factor measurement system with factorana. The latent factors are identified from a set of noisy indicators (measurements) whose loadings the package estimates jointly with the factor variances.

The example below uses two latent factors (“cognitive” and “non-cognitive” skills) each measured by three continuous indicators, and is small enough (n = 300) to run quickly.

Simulate data

library(factorana)

set.seed(1)
n <- 300

# Latent factors (true values, unobserved in practice)
f_cog    <- rnorm(n, mean = 0, sd = 1.0)
f_noncog <- rnorm(n, mean = 0, sd = 0.8)

# Cognitive indicators: loadings (1.0, 0.9, 0.7); error sd = 0.5
cog1 <- 0.0 + 1.0 * f_cog + rnorm(n, 0, 0.5)
cog2 <- 0.2 + 0.9 * f_cog + rnorm(n, 0, 0.5)
cog3 <- 0.1 + 0.7 * f_cog + rnorm(n, 0, 0.5)

# Non-cognitive indicators: loadings (1.0, 1.1, 0.8); error sd = 0.5
nc1 <- 0.0 + 1.0 * f_noncog + rnorm(n, 0, 0.5)
nc2 <- 0.1 + 1.1 * f_noncog + rnorm(n, 0, 0.5)
nc3 <- 0.0 + 0.8 * f_noncog + rnorm(n, 0, 0.5)

dat <- data.frame(
  intercept = 1,
  cog1 = cog1, cog2 = cog2, cog3 = cog3,
  nc1  = nc1,  nc2  = nc2,  nc3  = nc3
)
head(dat)
#>   intercept       cog1       cog2        cog3        nc1        nc2        nc3
#> 1         1 -0.7969873 -1.1345097 -1.11703554  1.1399607  1.2434756 -0.0985743
#> 2         1  0.9348556  0.4624395  1.19013215 -1.3004950 -0.5309301 -0.6931244
#> 3         1 -0.5714748 -0.4198545 -1.41335484  2.0238605  1.7614149  2.3556515
#> 4         1  1.8663765  1.0763851  0.16363734 -0.7774106  0.5158946  0.4655233
#> 5         1  0.2611711  0.8220335  0.67947970  1.5927923  1.4158815  1.1502765
#> 6         1 -1.3888353 -1.0548717 -0.02060567  1.1187830  2.4446165  0.6413497

Define the factor model

Two independent latent factors. Loading normalizations are set on the component side: each factor has one indicator with loading fixed at 1 (to pin the scale) and two free loadings.

fm <- define_factor_model(n_factors = 2, factor_structure = "independent")

Define model components

For each indicator we declare a linear equation, which factor(s) it loads on, and any fixed loadings. loading_normalization takes a vector of length n_factors:

# Cognitive indicators: load on factor 1 only
mc_cog1 <- define_model_component(
  name = "cog1", data = dat, outcome = "cog1", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(1, 0)        # factor 1 loading = 1, factor 2 loading = 0
)
mc_cog2 <- define_model_component(
  name = "cog2", data = dat, outcome = "cog2", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(NA_real_, 0) # factor 1 loading free, factor 2 loading = 0
)
mc_cog3 <- define_model_component(
  name = "cog3", data = dat, outcome = "cog3", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(NA_real_, 0)
)

# Non-cognitive indicators: load on factor 2 only
mc_nc1 <- define_model_component(
  name = "nc1", data = dat, outcome = "nc1", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(0, 1)
)
mc_nc2 <- define_model_component(
  name = "nc2", data = dat, outcome = "nc2", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(0, NA_real_)
)
mc_nc3 <- define_model_component(
  name = "nc3", data = dat, outcome = "nc3", factor = fm,
  covariates = "intercept", model_type = "linear",
  loading_normalization = c(0, NA_real_)
)

Assemble the components into a system:

ms <- define_model_system(
  components = list(mc_cog1, mc_cog2, mc_cog3, mc_nc1, mc_nc2, mc_nc3),
  factor = fm
)

Estimate

The estimator uses Gauss-Hermite quadrature to integrate over the latent factors; we keep n_quad_points modest here for speed.

ctrl <- define_estimation_control(n_quad_points = 6, num_cores = 1)

fit <- estimate_model_rcpp(
  model_system = ms,
  data         = dat,
  control      = ctrl,
  optimizer    = "nlminb",
  parallel     = FALSE,
  verbose      = FALSE
)

fit$convergence  # 0 indicates successful convergence
#> [1] 0

Inspect estimates

# Tidy table of parameter estimates with standard errors
components_table(fit, digits = 3)
#> 
#> Factor Model Results by Component
#> ======================================================================================================================== 
#> 
#> Parameter             factor         cog1         cog2         cog3          nc1          nc2          nc3
#> ---------------------------------------------------------------------------------------------------------- 
#> Intercept                         0.170**     0.353***     0.264***       -0.030        0.089        0.025
#>                                   (0.065)      (0.057)      (0.047)      (0.059)      (0.061)      (0.049)
#> Sigma                            0.608***     0.535***     0.483***     0.545***     0.580***     0.531***
#>                                   (0.037)      (0.032)      (0.027)      (0.037)      (0.043)      (0.028)
#> factor_var_1        0.794***                                                                              
#>                      (0.089)                                                                              
#> factor_var_2        0.659***                                                                              
#>                      (0.062)                                                                              
#> cog2_loading_1                                0.888***                                                    
#>                                                (0.053)                                                    
#> cog3_loading_1                                             0.692***                                       
#>                                                             (0.044)                                       
#> nc2_loading_2                                                                        1.036***             
#>                                                                                       (0.067)             
#> nc3_loading_2                                                                                     0.769***
#>                                                                                                    (0.048)
#> 
#> ---------------------------------------------------------------------------------------------------------- 
#> N                          0          300          300          300          300          300          300
#> Log-likelihood: -2053.38
#> 
#> Standard errors in parentheses
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1

Factor variances are near the true values (1.0 for the cognitive factor and 0.64 = 0.8^2 for the non-cognitive factor); loadings recover the simulated values (cog2 ≈ 0.9, cog3 ≈ 0.7, nc2 ≈ 1.1, nc3 ≈ 0.8).

Factor scores

Posterior mean factor scores for each observation can be recovered from the estimated model:

fscores <- estimate_factorscores_rcpp(
  fit, dat, control = ctrl, parallel = FALSE, verbose = FALSE
)
head(fscores[, c("obs_id", "factor_1", "factor_2",
                 "se_factor_1", "se_factor_2", "converged")])
#> Factor Score Estimates
#> ======================
#> 
#> Observations: 6
#> Factors: 2
#> Converged: 6 (100.0%)
#> 
#> Summary Statistics:
#>   Factor 1: mean=-0.295, sd=0.944, mean_se=0.365
#>   Factor 2: mean=0.702, sd=0.957, mean_se=0.340
#> 
#> First 6 rows:
#>   obs_id   factor_1      factor_2 se_factor_1 se_factor_2 converged
#> 1      1 -1.2915708  0.7037167694    0.364723   0.3399574      TRUE
#> 2      2  0.5875902 -0.8011073548    0.364723   0.3399574      TRUE
#> 3      3 -1.0696673  1.8108452339    0.364723   0.3399574      TRUE
#> 4      4  0.7438436 -0.0003981072    0.364723   0.3399574      TRUE
#> 5      5  0.3346083  1.2404898768    0.364723   0.3399574      TRUE
#> 6      6 -1.0742353  1.2587562101    0.364723   0.3399574      TRUE

# Correlation of estimated factor scores with the true (unobserved) factors
cor(fscores$factor_1, f_cog)
#> [1] 0.936484
cor(fscores$factor_2, f_noncog)
#> [1] 0.9422921

Where to go next

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.