saeHB-Twofold-Beta

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

saeHB-Twofold-Beta

STEP 1 Load package and load the data

library(saeHB.TF.beta)
data("dataBeta")

STEP 2 Data Exploration

dataBeta$CV <- sqrt(dataBeta$vardir)/dataBeta$y
explore(y~X1+X2, CV = "CV", data = dataBeta, normality = TRUE)
#>                   y         X1         X2
#> Min.    0.002826007 0.04205953 0.02461368
#> 1st Qu. 0.677417203 0.34818477 0.20900053
#> Median  0.986658040 0.58338771 0.39409355
#> Mean    0.786269096 0.57240016 0.43864087
#> 3rd Qu. 0.999116512 0.85933934 0.73765722
#> Max.    1.000000000 0.99426978 0.96302423
#> NA      0.000000000 0.00000000 0.00000000
#> 
#> Normality test for y :
#> Decision: Data do NOT follow normal distribution, with p.value = 0 < 0.05

STEPS 3 Fitting Twofold HB Beta Model

model <- betaTF(y~X1+X2,area="codearea",weight="w",iter.mcmc = 10000, burn.in = 3000, iter.update = 5, thin = 10, data=dataBeta)

STEP 4 Check Convergence via Plot MCMC

Trace Plot, Density Plot, ACF Plot, R-Hat Plot

model$plot
#> [[1]]

#> 
#> [[2]]

#> 
#> [[3]]

#> 
#> [[4]]

STEP 5 Extract Result for Area

Area Mean Estimation

model$Est_area
#>         Mean         SD      2.5%       25%       50%       75%    97.5%
#> 1  1.2179303 0.08367189 1.0199331 1.1771017 1.2359316 1.2786094 1.321750
#> 2  0.9663316 0.10936126 0.7284934 0.9008660 0.9787452 1.0433653 1.146264
#> 3  1.2811164 0.10489759 1.0422264 1.2281014 1.3043557 1.3596630 1.415032
#> 4  0.9749039 0.06918723 0.8166622 0.9359717 0.9910931 1.0249401 1.068415
#> 5  0.9812609 0.16483636 0.6162753 0.8838426 0.9916185 1.0920329 1.273873
#> 6  1.4889919 0.13338164 1.1885749 1.4149461 1.5170100 1.5832349 1.669097
#> 7  0.8246541 0.17429089 0.5025987 0.7020238 0.8255332 0.9514103 1.143805
#> 8  0.8481751 0.15196815 0.5238050 0.7471580 0.8583737 0.9609586 1.120119
#> 9  0.7298027 0.18379549 0.3490142 0.6115407 0.7395244 0.8513946 1.077212
#> 10 0.7941518 0.06083836 0.6447614 0.7591267 0.8054407 0.8369791 0.880041

Area Random Effect

model$area_randeff
#>            f_mean
#> f[1]   0.32835454
#> f[2]  -0.01729205
#> f[3]   0.46235495
#> f[4]   0.78472625
#> f[5]  -0.36881912
#> f[6]   0.57163578
#> f[7]  -0.74469434
#> f[8]  -0.34217516
#> f[9]  -0.94902221
#> f[10]  0.36114471

Calculate Area Relative Standard Error (RSE) or CV and Mean Squared Error (MSE)

CV_area <- (model$Est_area$SD)/(model$Est_area$Mean)*100
MSE_area <- model$Est_area$SD^2
summary(cbind(CV_area,MSE_area))
#>     CV_area          MSE_area       
#>  Min.   : 6.870   Min.   :0.003701  
#>  1st Qu.: 7.793   1st Qu.:0.008002  
#>  Median :10.138   Median :0.014875  
#>  Mean   :13.113   Mean   :0.017067  
#>  3rd Qu.:17.637   3rd Qu.:0.026152  
#>  Max.   :25.184   Max.   :0.033781

STEP 6 Extract Result for Subarea

Subarea Mean Estimation

model$Est_area
#>         Mean         SD      2.5%       25%       50%       75%    97.5%
#> 1  1.2179303 0.08367189 1.0199331 1.1771017 1.2359316 1.2786094 1.321750
#> 2  0.9663316 0.10936126 0.7284934 0.9008660 0.9787452 1.0433653 1.146264
#> 3  1.2811164 0.10489759 1.0422264 1.2281014 1.3043557 1.3596630 1.415032
#> 4  0.9749039 0.06918723 0.8166622 0.9359717 0.9910931 1.0249401 1.068415
#> 5  0.9812609 0.16483636 0.6162753 0.8838426 0.9916185 1.0920329 1.273873
#> 6  1.4889919 0.13338164 1.1885749 1.4149461 1.5170100 1.5832349 1.669097
#> 7  0.8246541 0.17429089 0.5025987 0.7020238 0.8255332 0.9514103 1.143805
#> 8  0.8481751 0.15196815 0.5238050 0.7471580 0.8583737 0.9609586 1.120119
#> 9  0.7298027 0.18379549 0.3490142 0.6115407 0.7395244 0.8513946 1.077212
#> 10 0.7941518 0.06083836 0.6447614 0.7591267 0.8054407 0.8369791 0.880041

Subarea Random Effect

model$sub_randeff
#>             u_mean
#> u[1]   0.119715995
#> u[2]  -0.056201482
#> u[3]   0.227430412
#> u[4]   0.226823593
#> u[5]  -0.536328040
#> u[6]   0.301095852
#> u[7]   0.001988169
#> u[8]   0.168360702
#> u[9]   0.364382150
#> u[10]  0.525992674
#> u[11]  0.123937310
#> u[12]  0.211898157
#> u[13] -0.200098286
#> u[14]  0.617127849
#> u[15] -0.905695714
#> u[16]  0.037466149
#> u[17]  0.876489893
#> u[18] -0.388672505
#> u[19] -0.999258002
#> u[20]  0.289406088
#> u[21] -0.024721479
#> u[22]  0.288812727
#> u[23] -0.461070642
#> u[24] -0.288971985
#> u[25] -0.684000074
#> u[26] -0.608315501
#> u[27]  0.408009283
#> u[28]  0.501033723
#> u[29] -0.475810178
#> u[30]  0.341399077

Calculate Subarea Relative Standard Error (RSE) or CV and Mean Squared Error (MSE)

CV_sub <- (model$Est_sub$SD)/(model$Est_sub$Mean)*100
MSE_sub <- model$Est_sub$SD^2
summary(cbind(CV_sub,MSE_sub))
#>      CV_sub          MSE_sub        
#>  Min.   : 6.691   Min.   :0.003821  
#>  1st Qu.: 9.642   1st Qu.:0.007333  
#>  Median :15.050   Median :0.015436  
#>  Mean   :19.198   Mean   :0.018663  
#>  3rd Qu.:26.776   3rd Qu.:0.029237  
#>  Max.   :52.603   Max.   :0.048170

Extract Coefficient Estimation \(\beta\)

model$coefficient
#>           Mean        SD       2.5%       25%       50%       75%     97.5%
#> b[0] 0.8598961 0.2630190  0.3370801 0.6867451 0.8551453 1.0395515 1.3858284
#> b[1] 0.3071740 0.3494490 -0.3458376 0.0597489 0.3201630 0.5505867 0.9800302
#> b[2] 1.1810308 0.4042009  0.3863613 0.9234731 1.1893539 1.4589896 1.9547040

Extract Area Random Effect Variance \(\sigma_u^2\) and Subarea Random Effect Variance \(\sigma_v^2\)

model$refVar
#>       b_var     a_var
#> 1 0.9970455 0.9252804

STEP 7 Visualize The Result

library(ggplot2)

Save the output of Subarea estimation and the Direct Estimation (y)

df <- data.frame(
  area = seq_along(model$Est_sub$Mean),             
  direct = dataBeta$y,              
  mean_estimate = model$Est_sub$Mean
)

Area Mean Estimation

ggplot(df, aes(x = area)) +
  geom_point(aes(y = direct), size = 2, colour = "#388894", alpha = 0.6) +   # scatter points
  geom_point(aes(y = mean_estimate), size = 2, colour = "#2b707a") +   # scatter points
  geom_line(aes(y = direct), linewidth = 1, colour = "#388894", alpha = 0.6) +  # line connecting points
  geom_line(aes(y = mean_estimate), linewidth = 1, colour = "#2b707a") +  # line connecting points
  labs(
    title = "Scatter + Line Plot of Estimated Means",
    x = "Area Index",
    y = "Value"
  )

ggplot(df, aes(x = , direct, y = mean_estimate)) +
  geom_point( size = 2, colour = "#2b707a") +
   geom_abline(intercept = 0, slope = 1, color = "gray40", linetype = "dashed") +
  geom_smooth(method = "lm", color = "#2b707a", se = FALSE) +
  ylim(0, 1) +
  labs(
    title = "Scatter Plot of Direct vs Model-Based",
    x = "Direct",
    y = "Model Based"
  )

Combine the CV of direct estimation and CV from output

df_cv <- data.frame(
  direct = sqrt(dataBeta$vardir)/dataBeta$y*100,              
  cv_estimate = CV_sub
)
df_cv <- df_cv[order(df_cv$direct), ]
df_cv$area <- seq_along(dataBeta$y)

Relative Standard Error of Subarea Mean Estimation

ggplot(df_cv, aes(x = area)) +
  geom_point(aes(y = direct), size = 2, colour = "#388894", alpha = 0.5) +
  geom_point(aes(y = cv_estimate), size = 2, colour = "#2b707a") +
  ylim(0, 100) +
  labs(
    title = "Scatter Plot of Direct vs Model-Based CV",
    x = "Area",
    y = "CV (%)"
  )

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.