ggpmisc
0.2.9library(ggpmisc)
library(ggplot2)
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(tibble)
library(nlme)
Many of the functions, including ggplot statistics and geoms, included in package ‘ggpmisc’ had their origin in my need to produce plots for use in teaching. Some of them are more generally useful, such as stat_poly_eq()
, but others like stat_fit_deviations()
are squarely aimed and producing learning material. Finally, several statistics for debugging and learning how ggplot statistics and geoms interact with each other will be of use only to developers of new statistics and geoms. Function try_data_frame()
opens the door to easily converting time series objects into data frames for plotting with ggplot()
.
try_data_frame()
Several different formats for storing time series data are used in R. Here we use in the examples objects of class ts
but several other classes are supported as try.xts()
is used internally. The first example is a quarterly series.
class(austres)
## [1] "ts"
austres.df <- try_data_frame(austres)
class(austres.df)
## [1] "data.frame"
lapply(austres.df, "class")
## $time
## [1] "POSIXct" "POSIXt"
##
## $V.austres
## [1] "numeric"
head(austres.df, 4)
## time V.austres
## 1 1971-04-01 13067.3
## 2 1971-07-01 13130.5
## 3 1971-10-01 13198.4
## 4 1972-01-01 13254.2
The next chunk demonstrates that numeric times are expressed as decimal years in the returned data frame.
austres.df <- try_data_frame(austres, as.numeric = TRUE)
lapply(austres.df, "class")
## $time
## [1] "numeric"
##
## $V.austres
## [1] "numeric"
head(austres.df, 4)
## time V.austres
## 1 1971.247 13067.3
## 2 1971.496 13130.5
## 3 1971.748 13198.4
## 4 1972.000 13254.2
This second example is for a series of yearly values.
class(lynx)
## [1] "ts"
lynx.df <- try_data_frame(lynx)
class(lynx.df)
## [1] "data.frame"
lapply(lynx.df, "class")
## $time
## [1] "POSIXct" "POSIXt"
##
## $V.lynx
## [1] "numeric"
head(lynx.df, 3)
## time V.lynx
## 1 1821-01-01 00:00:01 269
## 2 1822-01-01 00:00:01 321
## 3 1823-01-01 00:00:01 585
Above there is a small rounding error of 1 s for these old dates. We can correct this by rounding to year.
lynx.df <- try_data_frame(lynx, "year")
head(lynx.df, 3)
## time V.lynx
## 1 1821-01-01 269
## 2 1822-01-01 321
## 3 1823-01-01 585
In addition we can convert the POSIXct values into numeric values in calendar years plus a decimal fraction.
lynx_n.df <- try_data_frame(lynx, "year", as.numeric = TRUE)
lapply(lynx_n.df, "class")
## $time
## [1] "numeric"
##
## $V.lynx
## [1] "numeric"
head(lynx_n.df, 3)
## time V.lynx
## 1 1821 269
## 2 1822 321
## 3 1823 585
try_data_frame()
attempts to handle gracefully objects that are not time series.
try_data_frame(1:5)
## x
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
try_data_frame(letters[1:5])
## x
## 1 a
## 2 b
## 3 c
## 4 d
## 5 e
try_data_frame(factor(letters[1:5]))
## x
## 1 a
## 2 b
## 3 c
## 4 d
## 5 e
try_data_frame(list(x = rep(1,5), y = 1:5))
## x y
## 1 1 1
## 2 1 2
## 3 1 3
## 4 1 4
## 5 1 5
try_data_frame(data.frame(x = rep(1,5), y = 1:5))
## x y
## 1 1 1
## 2 1 2
## 3 1 3
## 4 1 4
## 5 1 5
try_data_frame(matrix(1:10, ncol = 2))
## V1 V2
## 1 1 6
## 2 2 7
## 3 3 8
## 4 4 9
## 5 5 10
stat_peaks()
and stat_valleys()
Using POSIXct for time
and the default formatting of labels.
ggplot(lynx.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", vjust = -0.5) +
ylim(-100, 7300)
Using numeric values for time
and the default formatting of labels.
ggplot(lynx_n.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", vjust = -0.5) +
ylim(-100, 7300)
Using POSIXct for time
but supplying a format string. In addition marking both peaks and valleys.
ggplot(lynx.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", vjust = -0.5, x.label.fmt = "%Y") +
stat_valleys(colour = "blue") +
stat_valleys(geom = "text", colour = "blue", vjust = 1.5, x.label.fmt = "%Y") +
ylim(-100, 7300)
Using numeric for time
but supplying a format string. In addition marking both peaks and valleys.
ggplot(lynx_n.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", vjust = -0.5, x.label.fmt = "%4.0f") +
stat_valleys(colour = "blue") +
stat_valleys(geom = "text", colour = "blue", vjust = 1.5, x.label.fmt = "%4.0f") +
ylim(-100, 7300)
Rotating the labels.
ggplot(lynx.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "text", colour = "red", angle = 66,
hjust = -0.1, x.label.fmt = "%Y") +
ylim(NA, 7800)
Using geom_rug
for the peaks and valleys.
ggplot(lynx.df, aes(time, V.lynx)) + geom_line() +
stat_peaks(colour = "red") +
stat_peaks(geom = "rug", colour = "red") +
stat_valleys(colour = "blue") +
stat_valleys(geom = "rug", colour = "blue")
stat_poly_eq()
We generate some artificial data.
set.seed(4321)
# generate artificial data
x <- 1:100
y <- (x + x^2 + x^3) + rnorm(length(x), mean = 0, sd = mean(x^3) / 4)
my.data <- data.frame(x,
y,
group = c("A", "B"),
y2 = y * c(0.5,2),
block = c("a", "a", "b", "b"))
First one example using defaults.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(formula = formula, parse = TRUE)
stat_poly_eq()
makes available three different labels in the returned data frame. One of these is used by default, but aes()
can be used to select a different one.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..adj.rr.label..), formula = formula,
parse = TRUE)
BIC and AIC labels are also returned.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..AIC.label..),
formula = formula,
parse = TRUE)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), formula = formula,
parse = TRUE)
Within aes()
it is possible to compute new labels based on those returned plus “arbitrary” text. The supplied labels are meant to be parsed into expressions, so any text added should be valid for a string that will be parsed.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = paste(..eq.label.., ..adj.rr.label.., sep = "~~~~")),
formula = formula, parse = TRUE)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = paste("atop(", ..AIC.label.., ",", ..BIC.label.., ")", sep = "")),
formula = formula,
parse = TRUE)
Two examples of removing and changing the lhs and/or rhs of the equation.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..),
eq.with.lhs = FALSE,
formula = formula, parse = TRUE)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..),
eq.with.lhs = "italic(hat(y))~`=`~",
formula = formula, parse = TRUE)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
labs(x = expression(italic(z)), y = expression(italic(h)) ) +
stat_poly_eq(aes(label = ..eq.label..),
eq.with.lhs = "italic(h)~`=`~",
eq.x.rhs = "~italic(z)",
formula = formula, parse = TRUE)
As any valid R expression can be used, Greek letters are also supported, as well as the inclusion in the label of variable transformations used in the model formula.
formula <- y ~ poly(x, 2, raw = TRUE)
ggplot(my.data, aes(x, log10(y + 1e6))) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..),
eq.with.lhs = "plain(log)[10](italic(y)+10^6)~`=`~",
formula = formula, parse = TRUE)
A couple of additional examples of polynomials of different orders, and specified in different ways.
Higher order polynomial.
formula <- y ~ poly(x, 5, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), formula = formula, parse = TRUE)
Intercept forced to zero.
formula <- y ~ x + I(x^2) + I(x^3) - 1
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), formula = formula,
parse = TRUE)
We give below several examples to demonstrate how other components of the ggplot
object affect the behaviour of this statistic.
Facets work as expected either with fixed or free scales. Although bellow we had to adjust the size of the font used for the equation. In addition to we manually position the equation label by supplying coordinates.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), size = rel(3),
formula = formula, parse = TRUE) +
facet_wrap(~group)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..), size = rel(3),
formula = formula, parse = TRUE) +
facet_wrap(~group, scales = "free_y")
Grouping, in this example using the colour aesthetic also works as expected. We can use justification and supply an absolute location for the equation.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..),
formula = formula, parse = TRUE) +
theme_bw()
Label positions relative to the ranges of the x and y scales are also supported, both through string constants and numeric values in the range 0 to 1.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..),
formula = formula, parse = TRUE, label.y.npc = "center") +
theme_bw()
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2, colour = group)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..eq.label..),
formula = formula, parse = TRUE, label.y.npc = 0.75) +
theme_bw()
The default locations are now based on normalized coordinates, and consequently these defaults work even when the range of the x and y scales varies from panel to panel.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2, fill = block)) +
geom_point(shape = 21, size = rel(3)) +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..rr.label..), size = rel(3),
geom = "label", alpha = 0.33,
formula = formula, parse = TRUE) +
facet_wrap(~group, scales = "free_y") +
theme_bw()
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y2, colour = group, fill = block)) +
geom_point(shape = 21, size = rel(3)) +
geom_smooth(method = "lm", formula = formula) +
stat_poly_eq(aes(label = ..rr.label..), size = rel(3),
geom = "label", alpha = 0.2,
formula = formula, parse = TRUE,
label.y.npc = 0.66) +
facet_wrap(~group, scales = "free_y") +
theme_bw()
stat_fit_residuals
I had the need to quickly plot residuals matching fits plotted with geom_smooth()
using grouping and facets, so a new (simple) statistic was born.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_hline(yintercept = 0, linetype = "dashed") +
stat_fit_residuals(formula = formula)
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_hline(yintercept = 0, linetype = "dashed") +
stat_fit_residuals(formula = formula, resid.type = "working")
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y, color = group)) +
geom_hline(yintercept = 0, linetype = "dashed") +
stat_fit_residuals(formula = formula)
stat_fit_deviations
As I also had the need to highlight residuals in slides and notes to be used in teaching, another statistic was born.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm", formula = formula) +
stat_fit_deviations(formula = formula, color = "red") +
geom_point()
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y, color = group)) +
geom_smooth(method = "lm", formula = formula) +
stat_fit_deviations(formula = formula) +
geom_point()
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
geom_smooth(method = "lm", formula = formula) +
stat_fit_deviations(formula = formula, color = "red",
arrow = arrow(length = unit(0.015, "npc"),
ends = "both")) +
geom_point()
stat_fit_glance
# formula <- y ~ poly(x, 3, raw = TRUE)
# broom::augment does not handle poly correctly!
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_fit_glance(method = "lm",
method.args = list(formula = formula),
geom = "text",
aes(label = signif(..p.value.., digits = 4)))
# formula <- y ~ poly(x, 3, raw = TRUE)
# broom::augment does not handle poly() correctly!
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y, color = group)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_fit_glance(method = "lm",
method.args = list(formula = formula),
geom = "text",
aes(label = paste("P-value = ", signif(..p.value.., digits = 4), sep = "")))
# formula <- y ~ poly(x, 3, raw = TRUE)
# broom::augment does not handle poly correctly!
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y, color = group)) +
geom_point() +
geom_smooth(method = "lm", formula = formula) +
stat_fit_glance(method = "lm",
method.args = list(formula = formula),
label.x.npc = "right",
label.y.npc = "bottom",
geom = "text",
aes(label = paste("P-value = ", signif(..p.value.., digits = 4), sep = "")))
stat_fit_augment
Experimental! Use ggplot2::stat_smooth
instead of stat_fit_augment
if possible.
For a single panel and no grouping, there is little advantage in using this statistic compared to the examples in the documentation of package ‘broom’. With grouping and faceting stat_fit_augment
may occasionally be more convenient than ggplot2::stat_smooth
because of its flexibility.
# formula <- y ~ poly(x, 3, raw = TRUE)
# broom::augment does not handle poly correctly!
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_fit_augment(method = "lm",
method.args = list(formula = formula))
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y, color = group)) +
geom_point() +
stat_fit_augment(method = "lm",
method.args = list(formula = formula))
We can override the variable returned as to be any of the variables in the data frame returned by broom::augment
while still preserving the original y values.
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y)) +
stat_fit_augment(method = "lm",
method.args = list(formula = formula),
geom = "point",
y.out = ".resid")
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y, color = group)) +
stat_fit_augment(method = "lm",
method.args = list(formula = formula),
geom = "point",
y.out = ".std.resid")
We can use any model fitting method for which augment
is implemented.
args <- list(formula = y ~ k * e ^ x,
start = list(k = 1, e = 2))
ggplot(mtcars, aes(wt, mpg)) +
geom_point() +
stat_fit_augment(method = "nls",
method.args = args)
args <- list(formula = y ~ k * e ^ x,
start = list(k = 1, e = 2))
ggplot(mtcars, aes(wt, mpg)) +
stat_fit_augment(method = "nls",
method.args = args,
geom = "point",
y.out = ".resid")
args <- list(model = y ~ SSlogis(x, Asym, xmid, scal),
fixed = Asym + xmid + scal ~1,
random = Asym ~1 | group,
start = c(Asym = 200, xmid = 725, scal = 350))
ggplot(Orange, aes(age, circumference, color = Tree)) +
geom_point() +
stat_fit_augment(method = "nlme",
method.args = args,
augment.args = list(data = quote(data)))
These stats are very simple and print a summary of their data
input to the console. In addition they also return a data frame containing labels suitable for plotting as with geom “text” or geom “label”. However, starting from version 0.2.7 of the package the default geom is “null”. The values are listed to the console at the time when the ggplot
object is printed.
As shown here, no other geom or stat is required, however in the remaining examples we include geom_point()
to make the data on the plot visible.
ggplot(my.data, aes(x, y)) + stat_debug_group()
## [1] "Input 'data' to 'compute_group()':"
## Source: local data frame [100 x 4]
##
## x y PANEL group
## <dbl> <dbl> <int> <int>
## 1 1 -27205.450 1 -1
## 2 2 -14242.651 1 -1
## 3 3 45790.918 1 -1
## 4 4 53731.420 1 -1
## 5 5 -8028.578 1 -1
## 6 6 102863.943 1 -1
## 7 7 -18547.282 1 -1
## 8 8 13080.521 1 -1
## 9 9 79924.325 1 -1
## 10 10 -44711.499 1 -1
## .. ... ... ... ...
In the absence of facets or groups we get just get the summary from one data frame.
ggplot(my.data, aes(x, y)) + geom_point() + stat_debug_group()
## [1] "Input 'data' to 'compute_group()':"
## Source: local data frame [100 x 4]
##
## x y PANEL group
## <dbl> <dbl> <int> <int>
## 1 1 -27205.450 1 -1
## 2 2 -14242.651 1 -1
## 3 3 45790.918 1 -1
## 4 4 53731.420 1 -1
## 5 5 -8028.578 1 -1
## 6 6 102863.943 1 -1
## 7 7 -18547.282 1 -1
## 8 8 13080.521 1 -1
## 9 9 79924.325 1 -1
## 10 10 -44711.499 1 -1
## .. ... ... ... ...
ggplot(my.data, aes(x, y)) + geom_point() + stat_debug_panel()
## [1] "Input 'data' to 'compute_panel()':"
## Source: local data frame [100 x 4]
##
## x y PANEL group
## <dbl> <dbl> <int> <int>
## 1 1 -27205.450 1 -1
## 2 2 -14242.651 1 -1
## 3 3 45790.918 1 -1
## 4 4 53731.420 1 -1
## 5 5 -8028.578 1 -1
## 6 6 102863.943 1 -1
## 7 7 -18547.282 1 -1
## 8 8 13080.521 1 -1
## 9 9 79924.325 1 -1
## 10 10 -44711.499 1 -1
## .. ... ... ... ...
In the case of grouping then one data frame is summarized for each group in the ggplot object.
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
stat_debug_group()
## [1] "Input 'data' to 'compute_group()':"
## Source: local data frame [50 x 5]
##
## x y colour PANEL group
## <dbl> <dbl> <fctr> <int> <int>
## 1 1 -27205.450 A 1 1
## 2 3 45790.918 A 1 1
## 3 5 -8028.578 A 1 1
## 4 7 -18547.282 A 1 1
## 5 9 79924.325 A 1 1
## 6 11 -2823.736 A 1 1
## 7 13 -78016.690 A 1 1
## 8 15 -74281.234 A 1 1
## 9 17 9903.674 A 1 1
## 10 19 -94022.623 A 1 1
## .. ... ... ... ... ...
## [1] "Input 'data' to 'compute_group()':"
## Source: local data frame [50 x 5]
##
## x y colour PANEL group
## <dbl> <dbl> <fctr> <int> <int>
## 1 2 -14242.65 B 1 2
## 2 4 53731.42 B 1 2
## 3 6 102863.94 B 1 2
## 4 8 13080.52 B 1 2
## 5 10 -44711.50 B 1 2
## 6 12 23839.55 B 1 2
## 7 14 75601.96 B 1 2
## 8 16 104676.72 B 1 2
## 9 18 -68746.93 B 1 2
## 10 20 -39230.19 B 1 2
## .. ... ... ... ... ...
Without facets, we still have only one panel.
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
stat_debug_panel()
## [1] "Input 'data' to 'compute_panel()':"
## Source: local data frame [100 x 5]
##
## x y colour PANEL group
## <dbl> <dbl> <fctr> <int> <int>
## 1 1 -27205.450 A 1 1
## 2 2 -14242.651 B 1 2
## 3 3 45790.918 A 1 1
## 4 4 53731.420 B 1 2
## 5 5 -8028.578 A 1 1
## 6 6 102863.943 B 1 2
## 7 7 -18547.282 A 1 1
## 8 8 13080.521 B 1 2
## 9 9 79924.325 A 1 1
## 10 10 -44711.499 B 1 2
## .. ... ... ... ... ...
The data are similar, except for the column named after the aesthetic, for the aesthetics used for grouping.
ggplot(my.data, aes(x, y, shape = group)) + geom_point() +
stat_debug_group()
## [1] "Input 'data' to 'compute_group()':"
## Source: local data frame [50 x 5]
##
## x y shape PANEL group
## <dbl> <dbl> <fctr> <int> <int>
## 1 1 -27205.450 A 1 1
## 2 3 45790.918 A 1 1
## 3 5 -8028.578 A 1 1
## 4 7 -18547.282 A 1 1
## 5 9 79924.325 A 1 1
## 6 11 -2823.736 A 1 1
## 7 13 -78016.690 A 1 1
## 8 15 -74281.234 A 1 1
## 9 17 9903.674 A 1 1
## 10 19 -94022.623 A 1 1
## .. ... ... ... ... ...
## [1] "Input 'data' to 'compute_group()':"
## Source: local data frame [50 x 5]
##
## x y shape PANEL group
## <dbl> <dbl> <fctr> <int> <int>
## 1 2 -14242.65 B 1 2
## 2 4 53731.42 B 1 2
## 3 6 102863.94 B 1 2
## 4 8 13080.52 B 1 2
## 5 10 -44711.50 B 1 2
## 6 12 23839.55 B 1 2
## 7 14 75601.96 B 1 2
## 8 16 104676.72 B 1 2
## 9 18 -68746.93 B 1 2
## 10 20 -39230.19 B 1 2
## .. ... ... ... ... ...
If we use as geom "label"
or "text"
a debug summary is added to the plot itself, we can use other arguments valid for the geom used, in this case vjust
.
ggplot(my.data, aes(x, y, shape = group)) + geom_point() +
stat_debug_group(geom = "label", vjust = c(-0.5,1.5))
## [1] "Input 'data' to 'compute_group()':"
## Source: local data frame [50 x 5]
##
## x y shape PANEL group
## <dbl> <dbl> <fctr> <int> <int>
## 1 1 -27205.450 A 1 1
## 2 3 45790.918 A 1 1
## 3 5 -8028.578 A 1 1
## 4 7 -18547.282 A 1 1
## 5 9 79924.325 A 1 1
## 6 11 -2823.736 A 1 1
## 7 13 -78016.690 A 1 1
## 8 15 -74281.234 A 1 1
## 9 17 9903.674 A 1 1
## 10 19 -94022.623 A 1 1
## .. ... ... ... ... ...
## [1] "Input 'data' to 'compute_group()':"
## Source: local data frame [50 x 5]
##
## x y shape PANEL group
## <dbl> <dbl> <fctr> <int> <int>
## 1 2 -14242.65 B 1 2
## 2 4 53731.42 B 1 2
## 3 6 102863.94 B 1 2
## 4 8 13080.52 B 1 2
## 5 10 -44711.50 B 1 2
## 6 12 23839.55 B 1 2
## 7 14 75601.96 B 1 2
## 8 16 104676.72 B 1 2
## 9 18 -68746.93 B 1 2
## 10 20 -39230.19 B 1 2
## .. ... ... ... ... ...
The summary function can be a user defined one, which allows lots of flexibility.
ggplot(my.data, aes(x, y)) + geom_point() +
stat_debug_group(summary.fun = summary)
## [1] "Input 'data' to 'compute_group()':"
## x y PANEL group
## Min. : 1.00 Min. : -94023 Min. :1 Min. :-1
## 1st Qu.: 25.75 1st Qu.: 40345 1st Qu.:1 1st Qu.:-1
## Median : 50.50 Median : 154036 Median :1 Median :-1
## Mean : 50.50 Mean : 266433 Mean :1 Mean :-1
## 3rd Qu.: 75.25 3rd Qu.: 422069 3rd Qu.:1 3rd Qu.:-1
## Max. :100.00 Max. :1077469 Max. :1 Max. :-1
ggplot(my.data, aes(x, y)) + geom_point() +
stat_debug_group(summary.fun = head)
## [1] "Input 'data' to 'compute_group()':"
## x y PANEL group
## 1 1 -27205.450 1 -1
## 2 2 -14242.651 1 -1
## 3 3 45790.918 1 -1
## 4 4 53731.420 1 -1
## 5 5 -8028.578 1 -1
## 6 6 102863.943 1 -1
ggplot(my.data, aes(x, y)) + geom_point() +
stat_debug_group(summary.fun = nrow)
## [1] "Input 'data' to 'compute_group()':"
## [1] 100
The default.
ggplot(my.data, aes(x, y)) + geom_point() +
stat_debug_group(summary.fun = as_data_frame)
## [1] "Input 'data' to 'compute_group()':"
## Source: local data frame [100 x 4]
##
## x y PANEL group
## <dbl> <dbl> <int> <int>
## 1 1 -27205.450 1 -1
## 2 2 -14242.651 1 -1
## 3 3 45790.918 1 -1
## 4 4 53731.420 1 -1
## 5 5 -8028.578 1 -1
## 6 6 102863.943 1 -1
## 7 7 -18547.282 1 -1
## 8 8 13080.521 1 -1
## 9 9 79924.325 1 -1
## 10 10 -44711.499 1 -1
## .. ... ... ... ...
ggplot(my.data, aes(x, y)) + geom_point() +
stat_debug_group(summary.fun = head, summary.fun.args = list(n = 3))
## [1] "Input 'data' to 'compute_group()':"
## x y PANEL group
## 1 1 -27205.45 1 -1
## 2 2 -14242.65 1 -1
## 3 3 45790.92 1 -1
This next chunk showing how to print the whole data frame is not run as its output is more than 100 lines long as the data set contains 100 observations.
ggplot(my.data, aes(x, y)) + geom_point() +
stat_debug_group(summary.fun = function(x) {x})
With grouping, for each group the compute_group()
function is called with a subset of the data.
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
stat_debug_group(summary.fun = head, summary.fun.args = list(n = 3))
## [1] "Input 'data' to 'compute_group()':"
## x y colour PANEL group
## 1 1 -27205.450 A 1 1
## 3 3 45790.918 A 1 1
## 5 5 -8028.578 A 1 1
## [1] "Input 'data' to 'compute_group()':"
## x y colour PANEL group
## 2 2 -14242.65 B 1 2
## 4 4 53731.42 B 1 2
## 6 6 102863.94 B 1 2
In this example with grouping and facets, within each panel the compute_group()
function is called for each group, in total four times.
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
stat_debug_group(summary.fun = nrow) +
facet_wrap(~block)
## [1] "Input 'data' to 'compute_group()':"
## [1] 25
## [1] "Input 'data' to 'compute_group()':"
## [1] 25
## [1] "Input 'data' to 'compute_group()':"
## [1] 25
## [1] "Input 'data' to 'compute_group()':"
## [1] 25
With facets, for each panel the compute_panel()
function is called with a subset of the data that is not split by groups. For our example, it is called twice.
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
stat_debug_panel(summary.fun = nrow) +
facet_wrap(~block)
## [1] "Input 'data' to 'compute_panel()':"
## [1] 50
## [1] "Input 'data' to 'compute_panel()':"
## [1] 50
Finally we show how geom_debug()
can be used. First to print to the console the data as passed to geoms.
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
geom_debug(summary.fun = head)
## Input 'data' to 'geom_debug()':
## colour x y PANEL group
## 1 #F8766D 1 -27205.450 1 1
## 2 #00BFC4 2 -14242.651 1 2
## 3 #F8766D 3 45790.918 1 1
## 4 #00BFC4 4 53731.420 1 2
## 5 #F8766D 5 -8028.578 1 1
## 6 #00BFC4 6 102863.943 1 2
And also to print to the console the data returned by a stat.
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
stat_smooth(method = "lm",
geom = "debug",
summary.fun = as_data_frame,
summary.fun.args = list())
## Input 'data' to 'geom_debug()':
## Source: local data frame [160 x 8]
##
## colour x y ymin ymax se PANEL group
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <int> <int>
## 1 #F8766D 1.000000 -188136.10 -254710.6 -121561.62 33111.18 1 1
## 2 #F8766D 2.240506 -176933.14 -242260.5 -111605.80 32490.90 1 1
## 3 #F8766D 3.481013 -165730.18 -229819.0 -101641.36 31874.92 1 1
## 4 #F8766D 4.721519 -154527.23 -217386.7 -91667.76 31263.50 1 1
## 5 #F8766D 5.962025 -143324.27 -204964.1 -81684.46 30656.89 1 1
## 6 #F8766D 7.202532 -132121.31 -192551.7 -71690.88 30055.40 1 1
## 7 #F8766D 8.443038 -120918.36 -180150.3 -61686.39 29459.34 1 1
## 8 #F8766D 9.683544 -109715.40 -167760.5 -51670.30 28869.04 1 1
## 9 #F8766D 10.924051 -98512.44 -155383.0 -41641.90 28284.87 1 1
## 10 #F8766D 12.164557 -87309.48 -143018.6 -31600.41 27707.21 1 1
## .. ... ... ... ... ... ... ... ...
ggplot(my.data, aes(x, y, colour = group)) + geom_point() +
stat_peaks(span = NULL,
geom = "debug",
summary.fun = as_data_frame,
summary.fun.args = list())
## Input 'data' to 'geom_debug()':
## Source: local data frame [2 x 10]
##
## colour xintercept yintercept label x y PANEL group x.label
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <int> <int> <chr>
## 1 #F8766D 95 984858 95 95 984858 1 1 95
## 2 #00BFC4 100 1077468 100 100 1077468 1 2 100
## Variables not shown: y.label <chr>.
formula <- y ~ poly(x, 3, raw = TRUE)
ggplot(my.data, aes(x, y)) +
stat_fit_residuals(formula = formula,
geom = "debug",
summary.fun = as_data_frame,
summary.fun.args = list())
## Input 'data' to 'geom_debug()':
## Source: local data frame [100 x 6]
##
## x y y.resid y.resid.abs PANEL group
## <dbl> <dbl> <dbl> <dbl> <int> <int>
## 1 1 -23513.687 -23513.687 23513.687 1 -1
## 2 2 -11663.732 -11663.732 11663.732 1 -1
## 3 3 47289.460 47289.460 47289.460 1 -1
## 4 4 54175.231 54175.231 54175.231 1 -1
## 5 5 -8620.676 -8620.676 8620.676 1 -1
## 6 6 101247.937 101247.937 101247.937 1 -1
## 7 7 -21182.019 -21182.019 21182.019 1 -1
## 8 8 9425.407 9425.407 9425.407 1 -1
## 9 9 75240.365 75240.365 75240.365 1 -1
## 10 10 -50439.597 -50439.597 50439.597 1 -1
## .. ... ... ... ... ... ...
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y)) +
geom_point() +
stat_fit_augment(method = "lm",
method.args = list(formula = formula),
geom = "debug",
summary.fun = tibble::as_data_frame,
summary.fun.args = list()) +
stat_fit_augment(method = "lm",
method.args = list(formula = formula),
geom = "smooth",
aes(y = ...fitted..,
ymax = ...fitted.. + ...se.fit.. * 2,
ymin = ...fitted.. - ...se.fit.. * 2))
## Input 'data' to 'geom_debug()':
## Source: local data frame [100 x 17]
##
## ymin ymax y x I.x.2. I.x.3. .fitted .se.fit
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -48698.27 41314.74 -3691.7638 1 1 1 -3691.7638 22673.48
## 2 -44308.35 39150.51 -2578.9186 2 4 8 -2578.9186 21022.55
## 3 -40181.60 37184.51 -1498.5419 3 9 27 -1498.5419 19487.84
## 4 -36313.71 35426.09 -443.8109 4 16 64 -443.8109 18070.62
## 5 -32700.69 33884.88 592.0973 5 25 125 592.0973 16772.32
## 6 -29338.59 32570.60 1616.0056 6 36 216 1616.0056 15594.38
## 7 -26223.08 31492.56 2634.7369 7 49 343 2634.7369 14538.06
## 8 -23348.80 30659.03 3655.1141 8 64 512 3655.1141 13604.10
## 9 -20708.60 30076.52 4683.9600 9 81 729 4683.9600 12792.32
## 10 -18292.59 29748.78 5728.0975 10 100 1000 5728.0975 12101.20
## .. ... ... ... ... ... ... ... ...
## Variables not shown: .resid <dbl>, .hat <dbl>, .sigma <dbl>, .cooksd
## <dbl>, .std.resid <dbl>, y.observed <dbl>, t.value <dbl>, PANEL <int>,
## group <int>.
formula <- y ~ x + I(x^2) + I(x^3)
ggplot(my.data, aes(x, y2, colour = group)) +
geom_point() +
stat_fit_augment(method = "lm",
method.args = list(formula = formula),
geom = "debug",
summary.fun = tibble::as_data_frame,
summary.fun.args = list()) +
stat_fit_augment(method = "lm",
method.args = list(formula = formula),
geom = "smooth",
aes(y = ...fitted..,
ymax = ...fitted.. + ...se.fit.. * 2,
ymin = ...fitted.. - ...se.fit.. * 2))
## Warning: Computation failed in `stat_fit_augment()`:
## variable lengths differ (found for 'x')
## Input 'data' to 'geom_debug()':
## Source: local data frame [100 x 19]
##
## colour ymin ymax .rownames y x I.x.2. I.x.3.
## <chr> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 #F8766D -41080.41 20399.43 1 -10340.4868 1 1 1
## 2 #F8766D -35273.92 17554.72 3 -8859.5995 3 9 27
## 3 #F8766D -30156.45 15352.63 5 -7401.9119 5 25 125
## 4 #F8766D -25719.57 13832.64 7 -5943.4676 7 49 343
## 5 #F8766D -21946.27 13025.65 9 -4460.3100 9 81 729
## 6 #F8766D -18793.72 12936.75 11 -2928.4828 11 121 1331
## 7 #F8766D -16175.63 13527.57 13 -1324.0295 13 169 2197
## 8 #F8766D -13958.28 14712.29 15 377.0064 15 225 3375
## 9 #F8766D -11978.53 16375.69 17 2198.5812 17 289 4913
## 10 #F8766D -10073.31 18402.61 19 4164.6515 19 361 6859
## .. ... ... ... ... ... ... ... ...
## Variables not shown: .fitted <dbl>, .se.fit <dbl>, .resid <dbl>, .hat
## <dbl>, .sigma <dbl>, .cooksd <dbl>, .std.resid <dbl>, y.observed <dbl>,
## t.value <dbl>, PANEL <int>, group <int>.
The package also defines a "null"
geom, which is used as default by the debug stats described above. Currently this geom is similar to the recently added ggplot2::geom_blank()
, which should be preferred.
ggplot(my.data, aes(x, y, colour = group)) + geom_null()