The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Updated JSS code

2021-05-01

Preliminaries

library(HLMdiag)
library(ggplot2)
library(lme4)
#> Loading required package: Matrix

Exam data

data("Exam", package = "mlmRev")
head(Exam)
#>   school   normexam schgend    schavg      vr     intake   standLRT sex type
#> 1      1  0.2613242   mixed 0.1661752 mid 50% bottom 25%  0.6190592   F  Mxd
#> 2      1  0.1340672   mixed 0.1661752 mid 50%    mid 50%  0.2058022   F  Mxd
#> 3      1 -1.7238820   mixed 0.1661752 mid 50%    top 25% -1.3645760   M  Mxd
#> 4      1  0.9675862   mixed 0.1661752 mid 50%    mid 50%  0.2058022   F  Mxd
#> 5      1  0.5443412   mixed 0.1661752 mid 50%    mid 50%  0.3711052   F  Mxd
#> 6      1  1.7348992   mixed 0.1661752 mid 50% bottom 25%  2.1894372   M  Mxd
#>   student
#> 1     143
#> 2     145
#> 3     142
#> 4     141
#> 5     138
#> 6     155

Residual analysis

# Model fm1 on page 6
(fm1 <- lmer(normexam ~ standLRT + (1 | school), Exam, REML = FALSE))
#> Linear mixed model fit by maximum likelihood  ['lmerMod']
#> Formula: normexam ~ standLRT + (1 | school)
#>    Data: Exam
#>       AIC       BIC    logLik  deviance  df.resid 
#>  9365.243  9390.478 -4678.622  9357.243      4055 
#> Random effects:
#>  Groups   Name        Std.Dev.
#>  school   (Intercept) 0.3035  
#>  Residual             0.7522  
#> Number of obs: 4059, groups:  school, 65
#> Fixed Effects:
#> (Intercept)     standLRT  
#>    0.002391     0.563371

# Extract level-1 residuals
# Residual plot from page 7
resid_fm1 <- hlm_resid(fm1)
head(resid_fm1)
#> # A tibble: 6 x 10
#>      id normexam standLRT school  .resid .fitted .ls.resid .ls.fitted .mar.resid
#>   <dbl>    <dbl>    <dbl> <fct>    <dbl>   <dbl>     <dbl>      <dbl>      <dbl>
#> 1     1    0.261    0.619 1      -0.464    0.725    -0.561      0.822    -0.0898
#> 2     2    0.134    0.206 1      -0.358    0.492    -0.395      0.529     0.0157
#> 3     3   -1.72    -1.36  1      -1.33    -0.393    -1.14      -0.585    -0.958 
#> 4     4    0.968    0.206 1       0.475    0.492     0.438      0.529     0.849 
#> 5     5    0.544    0.371 1      -0.0409   0.585    -0.102      0.647     0.333 
#> 6     6    1.73     2.19  1       0.125    1.61     -0.201      1.94      0.499 
#> # … with 1 more variable: .mar.fitted <dbl>

ggplot(resid_fm1, aes(x = standLRT, y = .ls.resid)) + 
  geom_point() + 
  geom_smooth() +
  labs(y = "LS level-1 residuals")
#> `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
# Model fm2 on page 7
fm2 <- lmer(normexam ~ standLRT + I(standLRT^2) + I(standLRT^3) + 
              (1 | school), Exam, REML = FALSE)


resid_fm2 <- hlm_resid(fm2)
#> Warning in LSresids.lmerMod(object, level = 1, standardize = standardize): LS residuals might be inaccurate as one or more groups are rank deficient.
#> Use the 'include.ls = FALSE' parameter to get EB residuals only.


# Figure 2 page 8
ggplot(resid_fm2, aes(x = `I(standLRT^2)`, y = .ls.resid)) + 
  geom_hline(yintercept = 0, color = "blue") + 
  geom_point() +
  labs( y = "LS residuals", x = "standLRT2")

ggplot_qqnorm() function is now defunct, Q-Q plots are now much easier to create in ggplot2 directly than when the package was first created.

ggplot(resid_fm2, aes(sample = .ls.resid)) +
  geom_qq() + 
  geom_qq_line() +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles")

A better alternative if found in the qqplotr package

library(qqplotr)
#> 
#> Attaching package: 'qqplotr'
#> The following objects are masked from 'package:ggplot2':
#> 
#>     StatQqLine, stat_qq_line
ggplot(resid_fm2, aes(sample = .ls.resid)) +
  stat_qq_band() +
  stat_qq_line() + 
  stat_qq_point() +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles")

EB residuals are now called .ranef. residuals

# Model fm3, page 11
fm3 <- lmer(normexam ~ standLRT + I(standLRT^2) + I(standLRT^3) + sex +
              (standLRT | school), Exam, REML = FALSE)

## Extract level-2 EB residuals
resid_fm3 <- hlm_resid(fm3, level = "school")
#> Warning in LSresids.lmerMod(object, level = level, stand = standardize): The model matrix is likely rank deficient. Some LS residuals cannot be calculated.
#> It is recommended to use EB (.ranef) group level residuals for this model.
resid_fm3
#> # A tibble: 65 x 5
#>    school .ranef.intercept .ranef.stand_lrt .ls.intercept .ls.stand_lrt
#>    <chr>             <dbl>            <dbl>         <dbl>         <dbl>
#>  1 1                0.404            0.127          0.533       0.348  
#>  2 2                0.401            0.159         NA          NA      
#>  3 3                0.495            0.0780         0.523       0.00905
#>  4 4                0.0597           0.120          0.265       0.186  
#>  5 5                0.251            0.0711         0.186       0.0503 
#>  6 6                0.448            0.0482        NA          NA      
#>  7 7                0.297           -0.151         NA          NA      
#>  8 8               -0.0788           0.0131        NA          NA      
#>  9 9               -0.157           -0.0660        -0.282      -0.251  
#> 10 10              -0.282           -0.137         -0.172      -0.451  
#> # … with 55 more rows
## Construct school-level data set
library(dplyr)
SchoolExam <- Exam %>%
  group_by(school) %>%
  dplyr::summarize(
    size = length(school),
    schgend = unique(schgend),
    schavg = unique(schavg),
    type = unique(type), 
    schLRT = mean(standLRT)
  )

SchoolExam <- SchoolExam %>% left_join(resid_fm3, by = "school")
SchoolExam
#> # A tibble: 65 x 10
#>    school  size schgend  schavg type   schLRT .ranef.intercept .ranef.stand_lrt
#>    <chr>  <int> <fct>     <dbl> <fct>   <dbl>            <dbl>            <dbl>
#>  1 1         73 mixed    0.166  Mxd    0.166            0.404            0.127 
#>  2 2         55 girls    0.395  Sngl   0.395            0.401            0.159 
#>  3 3         52 mixed    0.514  Mxd    0.514            0.495            0.0780
#>  4 4         79 mixed    0.0918 Mxd    0.0918           0.0597           0.120 
#>  5 5         35 mixed    0.211  Mxd    0.211            0.251            0.0711
#>  6 6         80 girls    0.638  Sngl   0.638            0.448            0.0482
#>  7 7         88 girls   -0.0290 Sngl  -0.0290           0.297           -0.151 
#>  8 8        102 girls   -0.0405 Sngl  -0.0405          -0.0788           0.0131
#>  9 9         34 mixed   -0.494  Mxd   -0.494           -0.157           -0.0660
#> 10 10        50 mixed    0.189  Mxd    0.189           -0.282           -0.137 
#> # … with 55 more rows, and 2 more variables: .ls.intercept <dbl>,
#> #   .ls.stand_lrt <dbl>

## Left panel -- figure 5
ggplot(
  SchoolExam, 
  aes(
    x = reorder(schgend, .ranef.intercept, median), 
    y = .ranef.intercept)
  ) +
  geom_boxplot() +
  labs(x = "school gender", y = "level-2 residual (Intercept)")
  
## Right panel -- figure 5
ggplot(SchoolExam, aes(x = schavg, y = .ranef.intercept)) +
  geom_point() +
  geom_smooth() +
  labs(x = "average intake score", y = "level-2 residual (Intercept)")
## Model fm4, page 12
fm4 <- lmer(normexam ~ standLRT + I(standLRT^2) + I(standLRT^3) +
              sex + schgend + schavg + (standLRT | school), 
            data = Exam, REML = FALSE)

resid_fm4 <- hlm_resid(fm4, level = "school", include.ls = FALSE)

## Figure 6, page 13
ggplot(resid_fm4, aes(sample = .ranef.intercept)) +
  stat_qq_band() +
  stat_qq_line() + 
  stat_qq_point() +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles")

ggplot(resid_fm4, aes(sample = .ranef.stand_lrt)) +
  stat_qq_band() +
  stat_qq_line() + 
  stat_qq_point() +
  labs(x = "Theoretical Quantiles", y = "Sample Quantiles")

Influence analysis

We can now use hlm_influence to pull off all of the case-deletion diagnostics for the fixed effects at the specified level:

# Calculating influence diagnostics for model fm4
influence_fm4 <- hlm_influence(fm4, level = "school")
influence_fm4
#> # A tibble: 65 x 6
#>    school  cooksd mdffits covtrace covratio leverage.overall
#>    <fct>    <dbl>   <dbl>    <dbl>    <dbl>            <dbl>
#>  1 1      0.0216  0.0210    0.141      1.15           0.0217
#>  2 2      0.0144  0.0141    0.137      1.14           0.0267
#>  3 3      0.0384  0.0361    0.140      1.15           0.0257
#>  4 4      0.0165  0.0160    0.133      1.14           0.0201
#>  5 5      0.00889 0.00868   0.0677     1.07           0.0313
#>  6 6      0.0190  0.0171    0.166      1.17           0.0179
#>  7 7      0.0413  0.0394    0.111      1.12           0.0174
#>  8 8      0.00712 0.00668   0.205      1.22           0.0172
#>  9 9      0.00418 0.00408   0.140      1.15           0.0400
#> 10 10     0.00956 0.00935   0.0819     1.08           0.0249
#> # … with 55 more rows

dotplot_diag(influence_fm4$cooksd, cutoff = "internal", name = "cooks.distance")
dotplot_diag(influence_fm4$cooksd, cutoff = "internal", name = "cooks.distance", modify = "dotplot")
#> Warning: Removed 4 rows containing missing values (geom_text_repel).
beta_cdd <- cooks.distance(fm4, level = "school", include.attr = TRUE)
#> Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if `.name_repair` is omitted as of tibble 2.0.0.
#> Using compatibility `.name_repair`.
beta_cdd[25,]
#> # A tibble: 1 x 9
#>   cooksd  beta_1   beta_2   beta_3  beta_4   beta_5  beta_6  beta_7 beta_8
#>    <dbl>   <dbl>    <dbl>    <dbl>   <dbl>    <dbl>   <dbl>   <dbl>  <dbl>
#> 1 0.0863 0.00390 -0.00987 -0.00333 0.00321 0.000980 0.00232 -0.0168 0.0221

Diagnostics for variance components

To calculate the relative variance change, we use hlm_influence(), but approx = FALSE must be specified:

hlm_influence(fm4, approx = FALSE)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.