ae_attendances dataset

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Tom Jemmett, The Strategy Unit

15/08/2019

This vignette explains how to use the ae_attendances dataset in R, and also details where it comes from and how it is generated.

The data contains all reported A&E attendances for the period April 2016 through March 2019

First let’s load some packages and the dataset and show the first 10 rows of data.

library(knitr)
library(scales)
library(ggrepel)
library(lubridate)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union
library(dplyr)
library(forcats)
library(tidyr)
#> 
#> Attaching package: 'tidyr'
#> The following objects are masked from 'package:Matrix':
#> 
#>     expand, pack, unpack

library(NHSRdatasets)

data("ae_attendances")

# format for display
ae_attendances %>%
  # set the period column to show in Month-Year format
  mutate_at(vars(period), format, "%b-%y") %>% 
  # set the numeric columns to have a comma at the 1000's place
  mutate_at(vars(attendances, breaches, admissions), comma) %>%
  # show the first 10 rows
  head(10) %>%
  # format as a table
  kable()

period	org_code	type	attendances	breaches	admissions
Mar-17	RF4	1	21,289.0	2,879.0	5,060.0
Mar-17	RF4	2	813.0	22.0	0.0
Mar-17	RF4	other	2,850.0	6.0	0.0
Mar-17	R1H	1	30,210.0	5,902.0	6,943.0
Mar-17	R1H	2	807.0	11.0	0.0
Mar-17	R1H	other	11,352.0	136.0	0.0
Mar-17	AD913	other	4,381.0	2.0	0.0
Mar-17	RYX	other	19,562.0	258.0	0.0
Mar-17	RQM	1	17,414.0	2,030.0	3,597.0
Mar-17	RQM	other	7,817.0	86.0	0.0

england_performance <- ae_attendances %>%
  group_by(period) %>%
  summarise_at(vars(attendances, breaches), sum) %>%
  mutate(performance = 1 - breaches / attendances)

# format for display
england_performance %>% 
  # same format options as above
  mutate_at(vars(period), format, "%b-%y") %>% 
  mutate_at(vars(attendances, breaches), comma) %>%
  # this time show the performance column as a percentage
  mutate_at(vars(performance), percent) %>%
  # show the first 10 rows and format as a table
  head(10) %>%
  kable()

period	attendances	breaches	performance
Apr-16	1,867,781	186,122	90.0351%
May-16	2,070,340	201,329	90.2756%
Jun-16	1,958,802	184,912	90.5599%
Jul-16	2,079,034	201,973	90.2852%
Aug-16	1,932,901	174,419	90.9763%
Sep-16	1,952,464	182,597	90.6479%
Oct-16	2,001,816	219,137	89.0531%
Nov-16	1,907,871	221,713	88.3790%
Dec-16	1,944,567	268,818	86.1759%
Jan-17	1,895,272	281,612	85.1413%

ae_attendances %>%
  group_by(period, type) %>%
  summarise_if(is.numeric, sum) %>%
  mutate(performance = 1 - breaches / attendances) %>%
  ggplot(aes(period, performance, colour = type)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels = percent) +
  #facet_wrap(vars(type), nrow = 1) +
  theme(legend.position = "bottom") +
  labs(x = "Month of attendance",
       y = "% of attendances that met the 4 hour standard",
       title = "NHS England A&E 4 Hour Performance",
       subtitle = "By Department Type",
       caption = "Source: NHS England Statistical Work Areas (OGL v3.0)")

From this it appears as if only the type 1 departments have the seasonal drops, type 2 and “other” departments remain pretty consistent.

What are the best and worst trusts for performance?

We could create a similar table of data for performance by each individual trust, but it would be useful to only look at trusts that have a type 1 department as it appears from the chart above that these departments have the largest variation.

performance_by_trust <- ae_attendances %>%
  group_by(org_code, period) %>%
  # make sure that this trust has a type 1 department
  filter(any(type == 1)) %>%
  summarise_at(vars(attendances, breaches), sum) %>%
  mutate(performance = 1 - breaches / attendances)

# format for display
performance_by_trust %>%
  mutate_at(vars(period), format, "%b-%y") %>% 
  mutate_at(vars(attendances, breaches), comma) %>%
  mutate_at(vars(performance), percent) %>%
  head(10) %>%
  kable()

org_code	period	attendances	breaches	performance
R0A	Oct-17	35,744.0	3,663.0	89.7521%
R0A	Nov-17	34,314.0	3,982.0	88.3954%
R0A	Dec-17	34,082.0	5,430.0	84.0678%
R0A	Jan-18	33,758.0	4,906.0	85.4671%
R0A	Feb-18	30,520.0	4,111.0	86.5301%
R0A	Mar-18	35,233.0	5,496.0	84.4010%
R0A	Apr-18	33,127.0	3,809.0	88.5018%
R0A	May-18	35,797.0	4,792.0	86.6134%
R0A	Jun-18	34,070.0	3,616.0	89.3866%
R0A	Jul-18	35,081.0	4,723.0	86.5369%

From this table we can calculate the overall performance by each trust and then organise the trusts by their overall performance.

performance_by_trust_ranking <- performance_by_trust %>%
  summarise(performance = 1 - sum(breaches) / sum(attendances)) %>%
  arrange(performance) %>%
  pull(org_code) %>%
  as.character()

print("Bottom 5")
#> [1] "Bottom 5"
head(performance_by_trust_ranking, 5)
#> [1] "RQW" "RWD" "RXW" "RX1" "RHU"

print("Top 5")
#> [1] "Top 5"
tail(performance_by_trust_ranking, 5)
#> [1] "RA4" "RBD" "RVW" "RCU" "RC9"

performance_by_trust %>%
  ungroup() %>%
  mutate_at(vars(org_code), fct_relevel, performance_by_trust_ranking) %>%
  filter(org_code %in% c(head(performance_by_trust_ranking, 5),
                         tail(performance_by_trust_ranking, 5))) %>%
  ggplot(aes(period, performance)) +
  geom_line() +
  geom_point() +
  scale_y_continuous(labels = percent) +
  facet_wrap(vars(org_code), nrow = 2) +
  theme(legend.position = "bottom") +
  labs(x = "Month of attendance",
       y = "% of attendances that met the 4 hour standard",
       title = "NHS England A&E 4 Hour Performance",
       subtitle = "Bottom 5/Top 5 over the whole 3 years",
       caption = "Source: NHS England Statistical Work Areas (OGL v3.0)")

Benchmarking

It is sometimes useful to see how an organisation stacks up against all of the other organisations. Below we create a chart where each organisation is shown as a point, ordered by performance from left (highest performance) to right (lowest) performance.

It’s useful to indicate certain organisations on the chart, below I am showing the 3 organisations that are at the lower quartile, median and upper quartile, however you could change this to instead pick out specific organisations (using a reference table and left_join or hard coding with case_when).

ae_attendances %>%
  filter(period == last(period)) %>%
  group_by(org_code) %>%
  filter(any(type == 1)) %>%
  summarise_at(vars(attendances, breaches), sum) %>%
  mutate(performance = 1 - breaches/attendances,
         overall_performance = 1 - sum(breaches)/sum(attendances),
         org_code = fct_reorder(org_code, -performance)) %>%
  #
  arrange(performance) %>%
  # lets highlight the organsiations that are at the lower and upper quartile
  # and at the median. First "tile" the data into 4 groups, then we use the
  # lag function to check to see if the value changes between rows. We will get
  # NA for the first row, so replace this with FALSE
  mutate(highlight = ntile(n = 4),
         highlight = replace_na(highlight != lag(highlight), FALSE)) %>%

  ggplot(aes(org_code, performance)) +
  geom_hline(aes(yintercept = overall_performance)) +
  geom_point(aes(fill = highlight), show.legend = FALSE, pch = 21) +
  geom_text_repel(aes(label = ifelse(highlight, as.character(org_code), NA)),
                  na.rm = TRUE) +
  scale_fill_manual(values = c("TRUE" = "black",
                               "FALSE" = NA)) +
  scale_y_continuous(labels = percent) +
  theme_minimal() +
  theme(panel.grid = element_blank(),
        axis.text.x = element_blank(),
        axis.line = element_line(),
        axis.ticks.y = element_line())

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.