The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Prediction intervals for GLMs

Overview

Our implementation of prediction intervals (when simulate_pi = FALSE) follows that described by Gavin Simpson in two posts on his blog. Whilst what follows is a brief overview, more detail, including discussion on whether or not it makes sense to calculate these intervals, can be found in the following post.

Confidence interval

To calculate prediction intervals we first calculate the confidence interval on the scale of the linear predictor. The upper and lower bounds of this interval, are then fed in to the inverse link function which in turn gives us a confidence interval on the expected response.

Prediction interval

Once we have calculated the confidence interval on the response we feed the upper and lower bounds, in to the quantile function associated with the relevant distribution. The maximum and minimum values of the output are then used as the upper and lower bounds of our prediction interval.

Comparison to a bootstrap approach

Below we compare the prediction intervals from trending with those generated by the ciTools package. ciTools uses a parametric bootstrap approach so the expectation is that trending will produce a more conservative (wider) interval when we allow for uncertainty around the estimate, and a less conservative (narrower) interval when uncertainty is ignored.

The following examples build on those discussed in the ciTools glm vignette:

setup

library(ciTools)
library(trending)
library(ggplot2)
library(patchwork)
library(MASS)

Example 1 - Poisson

# generate data
x <- rnorm(100, mean = 0)
y <- rpois(n = 100, lambda = exp(1.5 + 0.5*x))
dat <- data.frame(x = x, y = y)
fit <- glm(y ~ x , family = poisson(link = "log"))

# use ciTools to add prediction interval
dat1 <- add_pi(dat, fit, names = c("lpb", "upb"), alpha = 0.1, nsims = 20000)
#> Warning in add_pi.glm(dat, fit, names = c("lpb", "upb"), alpha = 0.1, nsims =
#> 20000): The response is not continuous, so Prediction Intervals are approximate
head(dat1)
#>            x  y     pred lpb upb
#> 1 -1.7051837  2 1.978464   0   5
#> 2 -0.7533523  3 3.232369   1   6
#> 3  0.4280385  9 5.944711   2  10
#> 4 -0.1654847  2 4.377160   1   8
#> 5  0.4824726  4 6.113966   2  11
#> 6  0.9575575 12 7.811480   4  13

# add intervals with trending (no uncertainty in parameters)
poisson_model <- glm_model(y ~ x, family = "poisson")
fitted_model <- fit(poisson_model, dat)
dat2 <- predict(fitted_model, simulate_pi = FALSE, uncertain = FALSE, alpha = 0.1)
dat2 <- get_result(dat2)
head(dat2[[1]])
#> <trending_prediction> 6 x 7
#>       y      x estimate lower_ci upper_ci lower_pi upper_pi
#>   <int>  <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#> 1     2 -1.71      1.98     1.66     2.36        0        5
#> 2     3 -0.753     3.23     2.88     3.63        1        6
#> 3     9  0.428     5.94     5.54     6.38        2       10
#> 4     2 -0.165     4.38     4.01     4.77        1        8
#> 5     4  0.482     6.11     5.70     6.56        2       10
#> 6    12  0.958     7.81     7.24     8.43        4       13

# add intervals with trending (uncertainty in parameters)
dat3 <- predict(fitted_model, simulate_pi = FALSE, alpha = 0.1)
dat3 <- get_result(dat3)
head(dat3[[1]])
#> <trending_prediction> 6 x 7
#>       y      x estimate lower_ci upper_ci lower_pi upper_pi
#>   <int>  <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#> 1     2 -1.71      1.98     1.66     2.36        0        5
#> 2     3 -0.753     3.23     2.88     3.63        0        7
#> 3     9  0.428     5.94     5.54     6.38        2       11
#> 4     2 -0.165     4.38     4.01     4.77        1        9
#> 5     4  0.482     6.11     5.70     6.56        2       11
#> 6    12  0.958     7.81     7.24     8.43        3       13

# plots
p1 <- ggplot(dat1, aes(x, y)) +
  geom_point(size = 1) +
  geom_line(aes(y = pred), size = 1.2) +
  geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.2) +
  geom_ribbon(aes(ymin = `lower_pi`, ymax = `upper_pi`), data = dat2[[1]], alpha = 0.4) +
  ggtitle(
    "Poisson regression with prediction intervals and no uncertainty in parameters", 
    subtitle = "Model fit (black line), with bootstrap intervals (gray), parametric intervals (dark gray)"
  ) +
  coord_cartesian(ylim=c(0, 30))
#> Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
#> ℹ Please use `linewidth` instead.
#> This warning is displayed once every 8 hours.
#> Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
#> generated.

p2 <- ggplot(dat1, aes(x, y)) +
  geom_point(size = 1) +
  geom_line(aes(y = pred), size = 1.2) +
  geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.4) +
  geom_ribbon(aes(ymin = `lower_pi`, ymax = `upper_pi`), data = dat3[[1]], alpha = 0.2) +
  ggtitle(
    "Poisson regression with prediction intervals and uncertainty in parameters", 
    subtitle = "Model fit (black line), with parametric intervals (gray), bootstrap intervals (dark gray)"
  ) +
  coord_cartesian(ylim=c(0, 30))

p1 / p2

Example 2 - Quassipoisson

# generate data
x <- runif(n = 100, min = 0, max = 2)
mu <- exp(1 + x)
y <- rnegbin(n = 100, mu = mu, theta = mu/(5 - 1))
dat <- data.frame(x = x, y = y)
fit <- glm(y ~ x, family = quasipoisson(link = "log"))

# use ciTools to add prediction interval
dat1 <- add_pi(dat, fit, names = c("lpb", "upb"), alpha = 0.1, nsims = 20000)
#> Warning in add_pi.glm(dat, fit, names = c("lpb", "upb"), alpha = 0.1, nsims =
#> 20000): The response is not continuous, so Prediction Intervals are approximate
head(dat1)
#>           x y     pred lpb upb
#> 1 1.1373043 2 7.322380   1  18
#> 2 1.3481487 7 9.496419   2  20
#> 3 0.4928128 1 3.307697   0  10
#> 4 0.5307645 3 3.466163   0  10
#> 5 0.7430577 4 4.503316   0  11
#> 6 1.3176464 6 9.145886   2  20

# add intervals with trending (no uncertainty in parameters)
quasipoisson_model <- glm_model(y ~ x, family = quasipoisson(link = "log"))
fitted_model <- fit(quasipoisson_model, dat)
dat2 <- predict(fitted_model, simulate_pi = FALSE,  uncertain = FALSE, alpha = 0.1)
dat2 <- get_result(dat2)
head(dat2[[1]])
#> <trending_prediction> 6 x 7
#>       y     x estimate lower_ci upper_ci lower_pi upper_pi
#>   <int> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#> 1     2 1.14      7.32     6.48     8.27        1       17
#> 2     7 1.35      9.50     8.51    10.6         2       20
#> 3     1 0.493     3.31     2.63     4.16        0       10
#> 4     3 0.531     3.47     2.78     4.32        0       10
#> 5     4 0.743     4.50     3.76     5.39        0       12
#> 6     6 1.32      9.15     8.19    10.2         2       20

# add intervals with trending (uncertainty in parameters)
dat3 <- predict(fitted_model, simulate_pi = FALSE, alpha = 0.1)
dat3 <- get_result(dat3)
head(dat3[[1]])
#> <trending_prediction> 6 x 7
#>       y     x estimate lower_ci upper_ci lower_pi upper_pi
#>   <int> <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
#> 1     2 1.14      7.32     6.48     8.27        1       18
#> 2     7 1.35      9.50     8.51    10.6         2       22
#> 3     1 0.493     3.31     2.63     4.16        0       12
#> 4     3 0.531     3.47     2.78     4.32        0       12
#> 5     4 0.743     4.50     3.76     5.39        0       14
#> 6     6 1.32      9.15     8.19    10.2         1       21

# plots
p3 <- ggplot(dat1, aes(x, y)) +
  geom_point(size = 1) +
  geom_line(aes(y = pred), size = 1.2) +
  geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.2) +
  geom_ribbon(aes(ymin = `lower_pi`, ymax = `upper_pi`), data = dat2[[1]], alpha = 0.4) +
  ggtitle(
    "Quasipoisson regression with prediction intervals and no uncertainty in parameters", 
    subtitle = "Model fit (black line), with bootstrap intervals (gray), parametric intervals (dark gray)"
  ) +
  coord_cartesian(ylim=c(0, 30))

p4 <- ggplot(dat1, aes(x, y)) +
  geom_point(size = 1) +
  geom_line(aes(y = pred), size = 1.2) +
  geom_ribbon(aes(ymin = lpb, ymax = upb), alpha = 0.4) +
  geom_ribbon(aes(ymin = `lower_pi`, ymax = `upper_pi`), data = dat3[[1]], alpha = 0.2) +
  ggtitle(
    "Quasipoisson regression with prediction intervals and uncertainty in parameters",
    subtitle = "Model fit (black line), with parametric intervals (gray), bootstrap intervals (dark gray)"
  ) +
  coord_cartesian(ylim=c(0, 30))

p3 / p4

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.