This short tutorial gives an example of how one can statistically assess whether a market is in an equilibrium state. The tutorial assumes some familiarity with the concepts and the functionality of the package. The basic_usage vignette can be helpful in acquiring this familiarity.
Load the required libraries.
Prepare the data. Here, we simply simulate data using a data generating process for a market in equilibrium.
nobs <- 1000
tobs <- 5
alpha_d <- -3.9
beta_d0 <- 18.9
beta_d <- c(2.1, -0.7)
eta_d <- c(3.5, 6.25)
alpha_s <- 2.8
beta_s0 <- 3.2
beta_s <- c(2.65)
eta_s <- c(1.15, 4.2)
sigma_d <- 0.8
sigma_s <- 1.1
rho_ds <- 0.0
seed <- 42
eq_data <- simulate_model_data(
"equilibrium_model", nobs, tobs,
alpha_d, beta_d0, beta_d, eta_d,
alpha_s, beta_s0, beta_s, eta_s,
NA, NA, c(NA),
sigma_d = sigma_d, sigma_s = sigma_s, rho_ds = rho_ds,
seed = seed
)
Prepare the basic parameters for model initialization.
key_columns <- c("id", "date")
time_column <- c("date")
quantity_column <- "Q"
price_column <- "P"
demand_specification <- paste0(price_column, " + Xd1 + Xd2 + X1 + X2")
supply_specification <- "Xs1 + X1 + X2"
price_specification <- "Xp1"
verbose <- 2
use_correlated_shocks <- TRUE
Using the above parameterization, construct the model objects. Here we construct two equilibrium models and four disequilibrium models. All the models are constructed using the simulated data from a model of market in equilibrium.
eqmdl <- new(
"equilibrium_model",
key_columns,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data[eq_data$date != 1, ],
use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Equilibrium FIML with correlated shocks' model
#> Warning: Removing unobserved '1' level(s).
bsmdl <- new(
"diseq_basic",
key_columns,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data[eq_data$date != 1, ],
use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Basic with correlated shocks' model
#> Warning: Removing unobserved '1' level(s).
damdl <- new(
"diseq_deterministic_adjustment",
key_columns, time_column,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data,
use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Deterministic Adjustment with correlated shocks' model
#> Info: Dropping 1000 rows by generating 'LAGGED_P'.
#> Info: Sample separated with 1971 rows in excess supply and 2029 in excess demand regime.
Set the estimation parameters.
Estimate the models.
eqmdl_reg <- estimate(eqmdl, method = "2SLS")
eqmdl_est <- estimate(eqmdl,
control = optimization_controls,
method = optimization_method
)
bsmdl_est <- estimate(bsmdl,
control = optimization_controls,
method = optimization_method
)
damdl_est <- estimate(damdl,
control = optimization_controls,
method = optimization_method
)
All the models provide estimates for the simulated data. Even with simulated data, it is difficult to assess which model performs better by examining only the summaries in separation or collectively.
summary(eqmdl_reg$first_stage_model)
#>
#> Call:
#> lm(formula = first_stage_formula, data = object@model_tibble)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.88388 -0.14215 0.00305 0.14088 0.81967
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 2.356218 0.036603 64.37 <2e-16 ***
#> Xd1 0.314532 0.006576 47.83 <2e-16 ***
#> Xd2 -0.102046 0.006549 -15.58 <2e-16 ***
#> X1 0.347118 0.006523 53.21 <2e-16 ***
#> X2 0.306744 0.006445 47.60 <2e-16 ***
#> Xs1 -0.401540 0.006568 -61.14 <2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.2087 on 3994 degrees of freedom
#> Multiple R-squared: 0.7409, Adjusted R-squared: 0.7406
#> F-statistic: 2284 on 5 and 3994 DF, p-value: < 2.2e-16
summary(eqmdl_reg$system_model)
#>
#> systemfit results
#> method: 2SLS
#>
#> N DF SSR detRCov OLS-R2 McElroy-R2
#> system 8000 7989 7765.49 0.838051 0.895447 0.90887
#>
#> N DF SSR MSE RMSE R2 Adj R2
#> demand 4000 3994 2579.28 0.645789 0.80361 0.930546 0.930459
#> supply 4000 3995 5186.20 1.298174 1.13937 0.860348 0.860208
#>
#> The covariance matrix of the residuals
#> demand supply
#> demand 0.6457891 -0.0171846
#> supply -0.0171846 1.2981739
#>
#> The correlations of the residuals
#> demand supply
#> demand 1.0000000 -0.0187684
#> supply -0.0187684 1.0000000
#>
#>
#> 2SLS estimates for 'demand' (equation 1)
#> Model Formula: Q ~ P + Xd1 + Xd2 + X1 + X2
#> <environment: 0x55afc83a3c18>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55afc83a3c18>
#>
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 18.7291651 0.1529666 122.4395 < 2.22e-16 ***
#> P -3.8534666 0.0629681 -61.1971 < 2.22e-16 ***
#> Xd1 2.1102794 0.0323478 65.2372 < 2.22e-16 ***
#> Xd2 -0.7156455 0.0261205 -27.3978 < 2.22e-16 ***
#> X1 3.5151555 0.0331078 106.1731 < 2.22e-16 ***
#> X2 6.2431905 0.0314974 198.2126 < 2.22e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.80361 on 3994 degrees of freedom
#> Number of observations: 4000 Degrees of Freedom: 3994
#> SSR: 2579.281574 MSE: 0.645789 Root MSE: 0.80361
#> Multiple R-Squared: 0.930546 Adjusted R-Squared: 0.930459
#>
#>
#> 2SLS estimates for 'supply' (equation 2)
#> Model Formula: Q ~ P + Xs1 + X1 + X2
#> <environment: 0x55afc83a3c18>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x55afc83a3c18>
#>
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 2.7596055 0.3499559 7.88558 3.9968e-15 ***
#> P 2.8849781 0.1084928 26.59143 < 2.22e-16 ***
#> Xs1 2.7054083 0.0569035 47.54382 < 2.22e-16 ***
#> X1 1.1758638 0.0515231 22.82207 < 2.22e-16 ***
#> X2 4.1766642 0.0489108 85.39346 < 2.22e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 1.139374 on 3995 degrees of freedom
#> Number of observations: 4000 Degrees of Freedom: 3995
#> SSR: 5186.204766 MSE: 1.298174 Root MSE: 1.139374
#> Multiple R-Squared: 0.860348 Adjusted R-Squared: 0.860208
bbmle::summary(eqmdl_est)
#> Maximum likelihood estimation
#>
#> Call:
#> `bbmle::mle2`(list(method = "BFGS", control = list(maxit = 10000,
#> reltol = 1e-08), skip.hessian = FALSE, start = c(D_P = -2.68706645853737,
#> D_CONST = 17.1339113219814, D_Xd1 = 1.73722956126006, D_Xd2 = -0.588859897639326,
#> D_X1 = 3.11549614992508, D_X2 = 5.88376213269379, S_P = 0.142585048267158,
#> S_CONST = 10.692054625689, S_Xs1 = 1.5882047811332, S_X1 = 2.11726946967838,
#> S_X2 = 5.03594363343709, D_VARIANCE = 1, S_VARIANCE = 1, RHO = 0
#> ), minuslogl = function (...)
#> minus_log_likelihood(object, ...), gr = function (...)
#> gradient(object, ...)))
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(z)
#> D_P -3.853483 0.062929 -61.2353 < 2.2e-16 ***
#> D_CONST 18.730529 0.153363 122.1322 < 2.2e-16 ***
#> D_Xd1 2.110141 0.032270 65.3909 < 2.2e-16 ***
#> D_Xd2 -0.716020 0.026169 -27.3612 < 2.2e-16 ***
#> D_X1 3.515155 0.033086 106.2424 < 2.2e-16 ***
#> D_X2 6.243180 0.031473 198.3687 < 2.2e-16 ***
#> S_P 2.886243 0.108461 26.6109 < 2.2e-16 ***
#> S_CONST 2.755863 0.349846 7.8774 3.344e-15 ***
#> S_Xs1 2.705925 0.056883 47.5696 < 2.2e-16 ***
#> S_X1 1.175444 0.051504 22.8224 < 2.2e-16 ***
#> S_X2 4.176285 0.048892 85.4181 < 2.2e-16 ***
#> D_VARIANCE 0.644823 0.018994 33.9485 < 2.2e-16 ***
#> S_VARIANCE 1.297027 0.051289 25.2886 < 2.2e-16 ***
#> RHO -0.018907 0.023533 -0.8034 0.4217
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> -2 log L: 6722.671
bbmle::summary(bsmdl_est)
#> Maximum likelihood estimation
#>
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08),
#> method = "BFGS", skip.hessian = FALSE, start = c(D_P = -2.68706645853737,
#> D_CONST = 17.1339113219814, D_Xd1 = 1.73722956126006, D_Xd2 = -0.588859897639326,
#> D_X1 = 3.11549614992508, D_X2 = 5.88376213269379, S_P = 0.142585048267158,
#> S_CONST = 10.692054625689, S_Xs1 = 1.5882047811332, S_X1 = 2.11726946967838,
#> S_X2 = 5.03594363343709, D_VARIANCE = 1, S_VARIANCE = 1,
#> RHO = 0), minuslogl = function (...)
#> minus_log_likelihood(object, ...), gr = function (...)
#> gradient(object, ...)))
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(z)
#> D_P -2.911913 0.072376 -40.2334 < 2.2e-16 ***
#> D_CONST 17.467852 0.217222 80.4149 < 2.2e-16 ***
#> D_Xd1 2.026359 0.048168 42.0682 < 2.2e-16 ***
#> D_Xd2 -0.711620 0.032789 -21.7029 < 2.2e-16 ***
#> D_X1 3.168023 0.047606 66.5467 < 2.2e-16 ***
#> D_X2 5.984037 0.046977 127.3816 < 2.2e-16 ***
#> S_P 0.924162 0.159424 5.7969 6.755e-09 ***
#> S_CONST 8.353210 0.563139 14.8333 < 2.2e-16 ***
#> S_Xs1 2.467525 0.115833 21.3025 < 2.2e-16 ***
#> S_X1 1.878510 0.096513 19.4638 < 2.2e-16 ***
#> S_X2 4.712034 0.100106 47.0706 < 2.2e-16 ***
#> D_VARIANCE 0.595982 0.023909 24.9274 < 2.2e-16 ***
#> S_VARIANCE 0.970348 0.073543 13.1943 < 2.2e-16 ***
#> RHO -0.335457 0.052592 -6.3785 1.788e-10 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> -2 log L: 8182.64
bbmle::summary(damdl_est)
#> Maximum likelihood estimation
#>
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08),
#> method = "BFGS", skip.hessian = FALSE, start = c(D_P = -2.68706645853737,
#> D_CONST = 17.1339113219814, D_Xd1 = 1.73722956126006, D_Xd2 = -0.588859897639326,
#> D_X1 = 3.11549614992508, D_X2 = 5.88376213269379, S_P = 0.142585048267158,
#> S_CONST = 10.692054625689, S_Xs1 = 1.5882047811332, S_X1 = 2.11726946967838,
#> S_X2 = 5.03594363343709, P_DIFF = 1, D_VARIANCE = 1, S_VARIANCE = 1,
#> RHO = 0), minuslogl = function (...)
#> minus_log_likelihood(object, ...), gr = function (...)
#> gradient(object, ...)))
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(z)
#> D_P -3.848967 0.066359 -58.0023 < 2.2e-16 ***
#> D_CONST 18.717272 0.165305 113.2287 < 2.2e-16 ***
#> D_Xd1 2.110049 0.032270 65.3876 < 2.2e-16 ***
#> D_Xd2 -0.716016 0.026168 -27.3624 < 2.2e-16 ***
#> D_X1 3.515018 0.033094 106.2122 < 2.2e-16 ***
#> D_X2 6.243126 0.031472 198.3693 < 2.2e-16 ***
#> S_P 2.882241 0.110065 26.1867 < 2.2e-16 ***
#> S_CONST 2.772435 0.358361 7.7364 1.022e-14 ***
#> S_Xs1 2.705982 0.056885 47.5691 < 2.2e-16 ***
#> S_X1 1.175343 0.051508 22.8185 < 2.2e-16 ***
#> S_X2 4.176110 0.048899 85.4028 < 2.2e-16 ***
#> P_DIFF 0.008358 0.039024 0.2142 0.8304
#> D_VARIANCE 0.644759 0.018995 33.9428 < 2.2e-16 ***
#> S_VARIANCE 1.297088 0.051294 25.2875 < 2.2e-16 ***
#> RHO -0.018885 0.023533 -0.8025 0.4223
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> -2 log L: 6722.625
The deterministic adjustment model has price dynamics that are analogous to excess demand and estimates one extra parameter. The directional model estimates one parameter less as the model does not have enough equations to identify prices in both demand and supply equations. The estimated parameters are summarized as follows.
sim_coef <- c(
alpha_d, beta_d0, beta_d, eta_d,
alpha_s, beta_s0, beta_s, eta_s,
NA,
sigma_d, sigma_s,
rho_ds
)
names(sim_coef) <- names(damdl_est@coef)
dm_inc <- eqmdl_reg$system_model$coefficients[
grep(
"demand",
names(eqmdl_reg$system_model$coefficients)
)
]
sp_inc <- eqmdl_reg$system_model$coefficients[
grep(
"supply",
names(eqmdl_reg$system_model$coefficients)
)
]
lm_coef <- c(
dm_inc[2], dm_inc[-2], sp_inc[2], sp_inc[-2],
NA,
NA, NA,
NA
)
eqmdl_coef <- append(
eqmdl_est@coef, c(NA),
after = which(names(eqmdl_est@coef) ==
get_prefixed_variance_variable(eqmdl@system@demand)) - 1
)
bsmdl_coef <- append(
bsmdl_est@coef, c(NA),
after = which(names(bsmdl_est@coef) ==
get_prefixed_variance_variable(bsmdl@system@demand)) - 1
)
damdl_coef <- damdl_est@coef
comp <- tibble::tibble(
parameter = names(sim_coef),
sim = sim_coef, lm = lm_coef, fi = eqmdl_coef,
bm = bsmdl_coef, da = damdl_coef,
lmerr = abs(lm_coef - sim_coef), fierr = abs(eqmdl_coef - sim_coef),
bmerr = abs(bsmdl_coef - sim_coef), daerr = abs(damdl_coef - sim_coef)
)
comp
#> # A tibble: 15 x 10
#> parameter sim lm fi bm da lmerr fierr bmerr
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 D_P -3.9 -3.85 -3.85 -2.91 -3.85 0.0465 0.0465 0.988
#> 2 D_CONST 18.9 18.7 18.7 17.5 18.7 0.171 0.169 1.43
#> 3 D_Xd1 2.1 2.11 2.11 2.03 2.11 0.0103 0.0101 0.0736
#> 4 D_Xd2 -0.7 -0.716 -0.716 -0.712 -0.716 0.0156 0.0160 0.0116
#> 5 D_X1 3.5 3.52 3.52 3.17 3.52 0.0152 0.0152 0.332
#> 6 D_X2 6.25 6.24 6.24 5.98 6.24 0.00681 0.00682 0.266
#> 7 S_P 2.8 2.88 2.89 0.924 2.88 0.0850 0.0862 1.88
#> 8 S_CONST 3.2 2.76 2.76 8.35 2.77 0.440 0.444 5.15
#> 9 S_Xs1 2.65 2.71 2.71 2.47 2.71 0.0554 0.0559 0.182
#> 10 S_X1 1.15 1.18 1.18 1.88 1.18 0.0259 0.0254 0.729
#> 11 S_X2 4.2 4.18 4.18 4.71 4.18 0.0233 0.0237 0.512
#> 12 P_DIFF NA NA NA NA 0.00836 NA NA NA
#> 13 D_VARIAN… 0.8 NA 0.645 0.596 0.645 NA 0.155 0.204
#> 14 S_VARIAN… 1.1 NA 1.30 0.970 1.30 NA 0.197 0.130
#> 15 RHO 0 NA -0.0189 -0.335 -0.0189 NA 0.0189 0.335
#> # … with 1 more variable: daerr <dbl>
Since we have used simulated data, we can calculate the average absolute error of the parameter estimation for each of the models. In practice, the population values are unknown and this calculation is impossible.
comp_means <- colMeans(comp[, grep("err", colnames(comp))], na.rm = TRUE)
comp_means
#> lmerr fierr bmerr daerr
#> 0.08138534 0.09076426 0.87318787 0.09056802
Moreover, the average absolute error cannot provide an overall estimation assessment as the market models have different parameter spaces. To assess the overall model performance one can instead use an information criterion.
model_names <- c(
eqmdl@model_type_string,
bsmdl@model_type_string, damdl@model_type_string
)
model_obs <- c(
get_number_of_observations(eqmdl),
get_number_of_observations(bsmdl),
get_number_of_observations(damdl)
)
model_errors <- c(
comp_means["fierr"],
comp_means["bmerr"],
comp_means["daerr"]
)
seltbl <- AIC(eqmdl_est, bsmdl_est, damdl_est) %>%
tibble::add_column(Model = model_names, .before = 1) %>%
tibble::add_column(Obs. = model_obs, `Mean Error` = model_errors) %>%
dplyr::rename(D.F. = df) %>%
dplyr::arrange(AIC)
seltbl
#> Model D.F. AIC Obs. Mean Error
#> eqmdl_est Equilibrium FIML 14 6750.671 4000 0.09076426
#> damdl_est Deterministic Adjustment 15 6752.625 4000 0.09056802
#> bsmdl_est Basic 14 8210.640 4000 0.87318787