This short tutorial gives an example of how one can statistically assess whether a market is in an equilibrium state. The tutorial assumes some familiarity with the concepts and the functionality of the package. The basic_usage vignette can be helpful in acquiring this familiarity.
Load the required libraries.
Prepare the data. Here, we simply simulate data using a data generating process for a market in equilibrium.
nobs <- 5000
tobs <- 5
alpha_d <- -1.9
beta_d0 <- 4.9
beta_d <- c(2.1, -0.7)
eta_d <- c(3.5, 6.25)
alpha_s <- 2.8
beta_s0 <- 1.2
beta_s <- c(0.65)
eta_s <- c(1.15, 4.2)
sigma_d <- 1
sigma_s <- 1
rho_ds <- 0.5
seed <- 443
eq_data <- simulate_model_data(
"equilibrium_model", nobs, tobs,
alpha_d, beta_d0, beta_d, eta_d,
alpha_s, beta_s0, beta_s, eta_s,
NA, NA, c(NA),
sigma_d = sigma_d, sigma_s = sigma_s, rho_ds = rho_ds,
seed = seed
)
Prepare the basic parameters for model initialization.
key_columns <- c("id", "date")
time_column <- c("date")
quantity_column <- "Q"
price_column <- "P"
demand_specification <- paste0(price_column, " + Xd1 + Xd2 + X1 + X2")
supply_specification <- "Xs1 + X1 + X2"
price_specification <- "Xp1"
verbose <- 2
use_correlated_shocks <- TRUE
Using the above parameterization, construct the model objects. Here we construct two equilibrium models and four disequilibrium models. All the models are constructed using the simulated data from a model of market in equilibrium.
eqmdl <- new(
"equilibrium_model",
key_columns,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data[eq_data$date != 1, ],
use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Equilibrium FIML with correlated shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#>
#> ℹ Input `date` is `(function (x) ...`.
#> Warning: Removing unobserved '1' level(s).
bsmdl <- new(
"diseq_basic",
key_columns,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data[eq_data$date != 1, ],
use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Basic with correlated shocks' model
#> Warning: Problem with `mutate()` input `date`.
#> ℹ Removing unobserved '1' level(s).
#>
#> ℹ Input `date` is `(function (x) ...`.
#> Warning: Removing unobserved '1' level(s).
damdl <- new(
"diseq_deterministic_adjustment",
key_columns, time_column,
quantity_column, price_column,
demand_specification, paste0(price_column, " + ", supply_specification),
eq_data,
use_correlated_shocks = use_correlated_shocks, verbose = verbose
)
#> Info: This is 'Deterministic Adjustment with correlated shocks' model
#> Info: Dropping 5000 rows by generating 'LAGGED_P'.
#> Info: Sample separated with 10003 rows in excess supply and 9997 in excess demand regime.
Set the estimation parameters.
Estimate the models.
eqmdl_reg <- estimate(eqmdl, method = "2SLS")
eqmdl_est <- estimate(eqmdl,
control = optimization_controls,
method = optimization_method
)
bsmdl_est <- estimate(bsmdl,
control = optimization_controls,
method = optimization_method
)
damdl_est <- estimate(damdl,
control = optimization_controls,
method = optimization_method
)
All the models provide estimates for the simulated data. Even with simulated data, it is difficult to assess which model performs better by examining only the summaries in separation or collectively.
summary(eqmdl_reg$first_stage_model)
#>
#> Call:
#> lm(formula = first_stage_formula, data = object@model_tibble)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.85453 -0.14407 0.00004 0.14368 0.78798
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 0.761849 0.017106 44.54 <2e-16 ***
#> Xd1 0.448077 0.003055 146.68 <2e-16 ***
#> Xd2 -0.144864 0.003056 -47.41 <2e-16 ***
#> X1 0.505181 0.003026 166.92 <2e-16 ***
#> X2 0.437596 0.003072 142.44 <2e-16 ***
#> Xs1 -0.139496 0.003057 -45.63 <2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.215 on 19994 degrees of freedom
#> Multiple R-squared: 0.7873, Adjusted R-squared: 0.7873
#> F-statistic: 1.48e+04 on 5 and 19994 DF, p-value: < 2.2e-16
summary(eqmdl_reg$system_model)
#>
#> systemfit results
#> method: 2SLS
#>
#> N DF SSR detRCov OLS-R2 McElroy-R2
#> system 40000 39989 39903.8 0.736073 0.899874 0.848746
#>
#> N DF SSR MSE RMSE R2 Adj R2
#> demand 20000 19994 20008.0 1.000702 1.000351 0.899592 0.899567
#> supply 20000 19995 19895.7 0.995036 0.997515 0.900156 0.900136
#>
#> The covariance matrix of the residuals
#> demand supply
#> demand 1.00070 0.509570
#> supply 0.50957 0.995036
#>
#> The correlations of the residuals
#> demand supply
#> demand 1.000000 0.510661
#> supply 0.510661 1.000000
#>
#>
#> 2SLS estimates for 'demand' (equation 1)
#> Model Formula: Q ~ P + Xd1 + Xd2 + X1 + X2
#> <environment: 0x562e825869c8>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x562e825869c8>
#>
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 4.8762033 0.0827412 58.9332 < 2.22e-16 ***
#> P -1.8442136 0.1019472 -18.0899 < 2.22e-16 ***
#> Xd1 2.0524139 0.0478747 42.8705 < 2.22e-16 ***
#> Xd2 -0.6858288 0.0205565 -33.3632 < 2.22e-16 ***
#> X1 3.5025023 0.0534337 65.5486 < 2.22e-16 ***
#> X2 6.2117062 0.0468246 132.6589 < 2.22e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 1.000351 on 19994 degrees of freedom
#> Number of observations: 20000 Degrees of Freedom: 19994
#> SSR: 20008.043378 MSE: 1.000702 Root MSE: 1.000351
#> Multiple R-Squared: 0.899592 Adjusted R-Squared: 0.899567
#>
#>
#> 2SLS estimates for 'supply' (equation 2)
#> Model Formula: Q ~ P + Xs1 + X1 + X2
#> <environment: 0x562e825869c8>
#> Instruments: ~Xd1 + Xd2 + X1 + X2 + Xs1
#> <environment: 0x562e825869c8>
#>
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 1.3095281 0.0768882 17.0316 < 2.22e-16 ***
#> P 2.7509924 0.0300520 91.5410 < 2.22e-16 ***
#> Xs1 0.6409146 0.0148046 43.2914 < 2.22e-16 ***
#> X1 1.1808148 0.0207026 57.0370 < 2.22e-16 ***
#> X2 4.2008260 0.0194740 215.7143 < 2.22e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.997515 on 19995 degrees of freedom
#> Number of observations: 20000 Degrees of Freedom: 19995
#> SSR: 19895.74173 MSE: 0.995036 Root MSE: 0.997515
#> Multiple R-Squared: 0.900156 Adjusted R-Squared: 0.900136
bbmle::summary(eqmdl_est)
#> Maximum likelihood estimation
#>
#> Call:
#> `bbmle::mle2`(list(method = "BFGS", control = list(maxit = 10000,
#> reltol = 1e-08), skip.hessian = FALSE, start = c(D_P = 0.248904000229706,
#> D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488,
#> D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514,
#> S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901,
#> S_X2 = 4.66113245104969, D_VARIANCE = 1, S_VARIANCE = 1, RHO = 0
#> ), minuslogl = function (...)
#> minus_log_likelihood(object, ...), gr = function (...)
#> gradient(object, ...)))
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(z)
#> D_P -1.847620 0.102162 -18.085 < 2.2e-16 ***
#> D_CONST 4.845601 0.079952 60.606 < 2.2e-16 ***
#> D_Xd1 2.057105 0.047842 42.998 < 2.2e-16 ***
#> D_Xd2 -0.676054 0.019563 -34.558 < 2.2e-16 ***
#> D_X1 3.503609 0.053540 65.439 < 2.2e-16 ***
#> D_X2 6.213180 0.046915 132.435 < 2.2e-16 ***
#> S_P 2.750472 0.030038 91.567 < 2.2e-16 ***
#> S_CONST 1.311502 0.076848 17.066 < 2.2e-16 ***
#> S_Xs1 0.640863 0.014796 43.313 < 2.2e-16 ***
#> S_X1 1.180568 0.020695 57.045 < 2.2e-16 ***
#> S_X2 4.201078 0.019466 215.811 < 2.2e-16 ***
#> D_VARIANCE 1.001167 0.024052 41.625 < 2.2e-16 ***
#> S_VARIANCE 0.994439 0.011789 84.353 < 2.2e-16 ***
#> RHO 0.510389 0.017863 28.572 < 2.2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> -2 log L: 46374.23
bbmle::summary(bsmdl_est)
#> Maximum likelihood estimation
#>
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08),
#> method = "BFGS", skip.hessian = FALSE, start = c(D_P = 0.248904000229706,
#> D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488,
#> D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514,
#> S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901,
#> S_X2 = 4.66113245104969, D_VARIANCE = 1, S_VARIANCE = 1,
#> RHO = 0), minuslogl = function (...)
#> minus_log_likelihood(object, ...), gr = function (...)
#> gradient(object, ...)))
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(z)
#> D_P 0.049812 0.053832 0.9253 0.3548000
#> D_CONST 4.014641 0.115685 34.7033 < 2.2e-16 ***
#> D_Xd1 1.322024 0.036993 35.7369 < 2.2e-16 ***
#> D_Xd2 -0.455326 0.019782 -23.0177 < 2.2e-16 ***
#> D_X1 2.577838 0.037536 68.6762 < 2.2e-16 ***
#> D_X2 5.390141 0.034642 155.5962 < 2.2e-16 ***
#> S_P 1.761937 0.105878 16.6412 < 2.2e-16 ***
#> S_CONST 2.836331 0.359832 7.8824 3.212e-15 ***
#> S_Xs1 1.119391 0.086554 12.9329 < 2.2e-16 ***
#> S_X1 1.575385 0.091235 17.2674 < 2.2e-16 ***
#> S_X2 4.613717 0.082291 56.0662 < 2.2e-16 ***
#> D_VARIANCE 0.822471 0.017428 47.1932 < 2.2e-16 ***
#> S_VARIANCE 1.372021 0.090718 15.1240 < 2.2e-16 ***
#> RHO 0.204999 0.056839 3.6066 0.0003102 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> -2 log L: 51245.47
bbmle::summary(damdl_est)
#> Maximum likelihood estimation
#>
#> Call:
#> `bbmle::mle2`(list(control = list(maxit = 10000, reltol = 1e-08),
#> method = "BFGS", skip.hessian = FALSE, start = c(D_P = 0.248904000229706,
#> D_CONST = 4.01094480918336, D_Xd1 = 1.1137756892884, D_Xd2 = -0.380925520464488,
#> D_X1 = 2.44419761908979, D_X2 = 5.29620369779804, S_P = 1.70876661459514,
#> S_CONST = 2.885697667298, S_Xs1 = 0.493421828653425, S_X1 = 1.70854379946901,
#> S_X2 = 4.66113245104969, P_DIFF = 1, D_VARIANCE = 1, S_VARIANCE = 1,
#> RHO = 0), minuslogl = function (...)
#> minus_log_likelihood(object, ...), gr = function (...)
#> gradient(object, ...)))
#>
#> Coefficients:
#> Estimate Std. Error z value Pr(z)
#> D_P -1.849202 0.102147 -18.1034 <2e-16 ***
#> D_CONST 4.860420 0.082592 58.8486 <2e-16 ***
#> D_Xd1 2.054982 0.047717 43.0664 <2e-16 ***
#> D_Xd2 -0.675406 0.019519 -34.6019 <2e-16 ***
#> D_X1 3.501842 0.053401 65.5767 <2e-16 ***
#> D_X2 6.211275 0.046794 132.7369 <2e-16 ***
#> S_P 2.757014 0.030859 89.3424 <2e-16 ***
#> S_CONST 1.284230 0.081551 15.7476 <2e-16 ***
#> S_Xs1 0.640862 0.014799 43.3037 <2e-16 ***
#> S_X1 1.181013 0.020696 57.0636 <2e-16 ***
#> S_X2 4.200912 0.019469 215.7692 <2e-16 ***
#> P_DIFF -0.013436 0.013574 -0.9898 0.3223
#> D_VARIANCE 0.999927 0.023931 41.7841 <2e-16 ***
#> S_VARIANCE 0.994434 0.011789 84.3560 <2e-16 ***
#> RHO 0.511222 0.017821 28.6869 <2e-16 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> -2 log L: 46373.32
The deterministic adjustment model has price dynamics that are analogous to excess demand and estimates one extra parameter. The directional model estimates one parameter less as the model does not have enough equations to identify prices in both demand and supply equations. The estimated parameters are summarized as follows.
sim_coef <- c(
alpha_d, beta_d0, beta_d, eta_d,
alpha_s, beta_s0, beta_s, eta_s,
NA,
sigma_d, sigma_s,
rho_ds
)
names(sim_coef) <- names(damdl_est@coef)
dm_inc <- eqmdl_reg$system_model$coefficients[
grep(
"demand",
names(eqmdl_reg$system_model$coefficients)
)
]
sp_inc <- eqmdl_reg$system_model$coefficients[
grep(
"supply",
names(eqmdl_reg$system_model$coefficients)
)
]
lm_coef <- c(
dm_inc[2], dm_inc[-2], sp_inc[2], sp_inc[-2],
NA,
NA, NA,
NA
)
eqmdl_coef <- append(
eqmdl_est@coef, c(NA),
after = which(names(eqmdl_est@coef) ==
get_prefixed_variance_variable(eqmdl@system@demand)) - 1
)
bsmdl_coef <- append(
bsmdl_est@coef, c(NA),
after = which(names(bsmdl_est@coef) ==
get_prefixed_variance_variable(bsmdl@system@demand)) - 1
)
damdl_coef <- damdl_est@coef
comp <- tibble::tibble(
parameter = names(sim_coef),
sim = sim_coef, lm = lm_coef, fi = eqmdl_coef,
bm = bsmdl_coef, da = damdl_coef,
lmerr = abs(lm_coef - sim_coef), fierr = abs(eqmdl_coef - sim_coef),
bmerr = abs(bsmdl_coef - sim_coef), daerr = abs(damdl_coef - sim_coef)
)
comp
#> # A tibble: 15 x 10
#> parameter sim lm fi bm da lmerr fierr bmerr
#> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#> 1 D_P -1.9 -1.84 -1.85 0.0498 -1.85 5.58e-2 0.0524 1.95
#> 2 D_CONST 4.9 4.88 4.85 4.01 4.86 2.38e-2 0.0544 0.885
#> 3 D_Xd1 2.1 2.05 2.06 1.32 2.05 4.76e-2 0.0429 0.778
#> 4 D_Xd2 -0.7 -0.686 -0.676 -0.455 -0.675 1.42e-2 0.0239 0.245
#> 5 D_X1 3.5 3.50 3.50 2.58 3.50 2.50e-3 0.00361 0.922
#> 6 D_X2 6.25 6.21 6.21 5.39 6.21 3.83e-2 0.0368 0.860
#> 7 S_P 2.8 2.75 2.75 1.76 2.76 4.90e-2 0.0495 1.04
#> 8 S_CONST 1.2 1.31 1.31 2.84 1.28 1.10e-1 0.112 1.64
#> 9 S_Xs1 0.65 0.641 0.641 1.12 0.641 9.09e-3 0.00914 0.469
#> 10 S_X1 1.15 1.18 1.18 1.58 1.18 3.08e-2 0.0306 0.425
#> 11 S_X2 4.2 4.20 4.20 4.61 4.20 8.26e-4 0.00108 0.414
#> 12 P_DIFF NA NA NA NA -0.0134 NA NA NA
#> 13 D_VARIAN… 1 NA 1.00 0.822 1.00 NA 0.00117 0.178
#> 14 S_VARIAN… 1 NA 0.994 1.37 0.994 NA 0.00556 0.372
#> 15 RHO 0.5 NA 0.510 0.205 0.511 NA 0.0104 0.295
#> # … with 1 more variable: daerr <dbl>
Since we have used simulated data, we can calculate the average absolute error of the parameter estimation for each of the models. In practice, the population values are unknown and this calculation is impossible.
comp_means <- colMeans(comp[, grep("err", colnames(comp))], na.rm = TRUE)
comp_means
#> lmerr fierr bmerr daerr
#> 0.03467257 0.03092702 0.74766291 0.02754986
Moreover, the average absolute error cannot provide an overall estimation assessment as the market models have different parameter spaces. To assess the overall model performance one can instead use an information criterion.
model_names <- c(
eqmdl@model_type_string,
bsmdl@model_type_string, damdl@model_type_string
)
model_obs <- c(
get_number_of_observations(eqmdl),
get_number_of_observations(bsmdl),
get_number_of_observations(damdl)
)
model_errors <- c(
comp_means["fierr"],
comp_means["bmerr"],
comp_means["daerr"]
)
seltbl <- AIC(eqmdl_est, bsmdl_est, damdl_est) %>%
tibble::add_column(Model = model_names, .before = 1) %>%
tibble::add_column(Obs. = model_obs, `Mean Error` = model_errors) %>%
dplyr::rename(D.F. = df) %>%
dplyr::arrange(AIC)
seltbl
#> Model D.F. AIC Obs. Mean Error
#> eqmdl_est Equilibrium FIML 14 46402.23 20000 0.03092702
#> damdl_est Deterministic Adjustment 15 46403.32 20000 0.02754986
#> bsmdl_est Basic 14 51273.47 20000 0.74766291