let Example

Nina Zumel

2016-12-22

An example of using let to wrap dplyr expressions as functions.

library("dplyr")
library("replyr")

The desired task: write a function that takes a data frame with a specified numerical column and an optional grouping column, and returns a data frame with one row per group containing:

The dplyr expression for such a table is easy when the column names are known, but complicated when they are not. We use replyr::let to write such a function without the use of lazyeval.

sumstat_intervals = function(dframe, colname, groupcolname=NULL) {
  mapping = list(col=colname)
  if(!is.null(groupcolname)) {
    dframe %>% group_by_(groupcolname) -> dframe
  }
  let(alias=mapping,
      expr={
        dframe %>% summarize(sdlower = mean(col)-sd(col),
                             mean=mean(col),
                             sdupper = mean(col) + sd(col),
                             iqrlower = median(col)-0.5*IQR(col),
                             median=median(col),
                             iqrupper=median(col)+0.5*IQR(col))
      })
}

We can test sumstat_intervals on iris:

sumstat_intervals(iris, "Sepal.Length")
 #     sdlower     mean  sdupper iqrlower median iqrupper
 #  1 5.015267 5.843333 6.671399     5.15    5.8     6.45
sumstat_intervals(iris, "Sepal.Length", "Species")
 #  # A tibble: 3 × 7
 #       Species  sdlower  mean  sdupper iqrlower median iqrupper
 #        <fctr>    <dbl> <dbl>    <dbl>    <dbl>  <dbl>    <dbl>
 #  1     setosa 4.653510 5.006 5.358490   4.8000    5.0   5.2000
 #  2 versicolor 5.419829 5.936 6.452171   5.5500    5.9   6.2500
 #  3  virginica 5.952120 6.588 7.223880   6.1625    6.5   6.8375
sumstat_intervals(iris, "Petal.Length", "Species")
 #  # A tibble: 3 × 7
 #       Species  sdlower  mean  sdupper iqrlower median iqrupper
 #        <fctr>    <dbl> <dbl>    <dbl>    <dbl>  <dbl>    <dbl>
 #  1     setosa 1.288336 1.462 1.635664   1.4125   1.50   1.5875
 #  2 versicolor 3.790089 4.260 4.729911   4.0500   4.35   4.6500
 #  3  virginica 5.000105 5.552 6.103895   5.1625   5.55   5.9375

We can also use let to parameterize other functions that specify their parameters via non-standard evaluation. For example, we could write a ggplot2 function to plot the information in sumstat_intervals (either the mean-centered interval or the median-centered one) using ggplot2::aes_string. Or we could use replyr::let.

plot_distributions = NULL

if  (requireNamespace("ggplot2")) {
  library("ggplot2")
  plot_distributions = function(dframe, colname, groupcol,
                                intervaltype="mean", title="") {
    if(!(intervaltype %in% c("mean", "median")))
      error("Intervaltype must be one of 'mean' or 'median'")
    
    sintervals = sumstat_intervals(dframe, colname, groupcol)
    
    # I could do the following with aes_string, but what the heck
    mapping = list(xval=groupcol, yval=colname, center=intervaltype)
    if(intervaltype=="mean") {
      mapping2 =list(lower="sdlower", upper="sdupper")
    } else {
      mapping2 =list(lower="iqrlower", upper="iqrupper")
    }
    mapping = c(mapping, mapping2)
    
    let(alias=mapping,
        expr = {
          ggplot(dframe, aes(x=xval,color=xval)) +
            geom_jitter(aes(y=yval), width=0.2, height=0, alpha=0.5) +
            geom_crossbar(data=sintervals, aes(y=center, ymin=lower, ymax=upper)) +
            ggtitle(title) + theme(plot.title=element_text(hjust=0.5)) +
            scale_color_brewer(palette="Dark2")
        })
  }
}
if(!("NULL") %in% class(plot_distributions)) {
  plot_distributions(iris, "Sepal.Length", "Species",
                     title="Iris sepal length with mean +/1 one standard deviation")
}

if(!("NULL") %in% class(plot_distributions)) {
  plot_distributions(iris, "Petal.Width", "Species",
                     intervaltype="median",
                     title="Iris petal width with median and centered IQR interval")
}