Introduction

The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

Links
Overview
vlookup & xlookup
More examples
Variable selection in the expressions
Joins
‘dplyr’-like interface for data.table.

Overview

Package provides pipe-style interface for data.table package. It preserves all data.table features without significant impact on performance. let and take functions are simplified interfaces for most common data manipulation tasks.

To select rows from data: rows(mtcars, am==0)
To select columns from data: columns(mtcars, mpg, vs:carb)
To aggregate data: take(mtcars, mean_mpg = mean(mpg), by = am)
To aggregate all non-grouping columns: take_all(mtcars, mean, by = am)
To aggregate several columns with one summary: take(mtcars, mpg, hp, fun = mean, by = am)
To get total summary skip by argument: take_all(mtcars, mean)
Use magrittr pipe %>% to chain several operations:

     mtcars %>%
        let(mpg_hp = mpg/hp) %>%
        take(mean(mpg_hp), by = am)

To modify variables or add new variables:

      mtcars %>%
         let(new_var = 42,
             new_var2 = new_var*hp) %>%
         head()

To drop variable assign NULL: let(mtcars, am = NULL) %>% head()
To modify all non-grouping variables:

    iris %>%
      let_all(
          scaled = (.x - mean(.x))/sd(.x),
          by = Species) %>%
       head()

To aggregate all variables conditionally on name:

    iris %>%
      take_all(
          mean = if(startsWith(.name, "Sepal")) mean(.x),
          median = if(startsWith(.name, "Petal")) median(.x),
          by = Species
      )

For parametric assignment use :=:

    new_var = "my_var"
    old_var = "mpg"
    mtcars %>%
        let((new_var) := get(old_var)*2) %>%
        head()
     
    # or,  
    expr = quote(mean(cyl))
    mtcars %>% 
        let((new_var) := eval(expr)) %>% 
        head()
    
    # the same with `take` 
    by_var = "vs,am"
    take(mtcars, (new_var) := eval(expr), by = by_var)

query_if function translates its arguments one-to-one to [.data.table method. Additionally there are some conveniences such as automatic data.frame conversion to data.table.

vlookup & xlookup

Let’s make datasets for lookups:

workers = fread("
    name company
    Nick Acme
    John Ajax
    Daniela Ajax
")

positions = fread("
    name position
    John designer
    Daniela engineer
    Cathie manager
")

# xlookup
workers = let(workers,
  position = xlookup(name, positions$name, positions$position)
)

# vlookup
# by default we search in the first column and return values from second column
workers = let(workers,
  position = vlookup(name, positions, no_match = "Not found")
)

# the same 
workers = let(workers,
  position = vlookup(name, positions, 
                     result_column = "position", 
                     no_match = "Not found") # or, result_column = 2 
)

head(workers)

##       name company  position
## 1:    Nick    Acme Not found
## 2:    John    Ajax  designer
## 3: Daniela    Ajax  engineer

More examples

We will use for demonstartion well-known mtcars dataset and some examples from dplyr package.

library(maditr)
data(mtcars)

# Newly created variables are available immediately
mtcars %>%
    let(
        cyl2 = cyl * 2,
        cyl4 = cyl2 * 2
    ) %>% head()

##     mpg cyl disp  hp drat    wt  qsec vs am gear carb cyl2 cyl4
## 1: 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4   12   24
## 2: 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4   12   24
## 3: 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1    8   16
## 4: 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1   12   24
## 5: 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2   16   32
## 6: 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1   12   24

# You can also use let() to remove variables and
# modify existing variables
mtcars %>%
    let(
        mpg = NULL,
        disp = disp * 0.0163871 # convert to litres
    ) %>% head()

##    cyl     disp  hp drat    wt  qsec vs am gear carb
## 1:   6 2.621936 110 3.90 2.620 16.46  0  1    4    4
## 2:   6 2.621936 110 3.90 2.875 17.02  0  1    4    4
## 3:   4 1.769807  93 3.85 2.320 18.61  1  1    4    1
## 4:   6 4.227872 110 3.08 3.215 19.44  1  0    3    1
## 5:   8 5.899356 175 3.15 3.440 17.02  0  0    3    2
## 6:   6 3.687098 105 2.76 3.460 20.22  1  0    3    1

# window functions are useful for grouped computations
mtcars %>%
    let(rank = rank(-mpg, ties.method = "min"),
        by = cyl) %>%
    head()

##     mpg cyl disp  hp drat    wt  qsec vs am gear carb rank
## 1: 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4    2
## 2: 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4    2
## 3: 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1    8
## 4: 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1    1
## 5: 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2    2
## 6: 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1    6

# You can drop variables by setting them to NULL
mtcars %>%
    let(cyl = NULL) %>%
    head()

##     mpg disp  hp drat    wt  qsec vs am gear carb
## 1: 21.0  160 110 3.90 2.620 16.46  0  1    4    4
## 2: 21.0  160 110 3.90 2.875 17.02  0  1    4    4
## 3: 22.8  108  93 3.85 2.320 18.61  1  1    4    1
## 4: 21.4  258 110 3.08 3.215 19.44  1  0    3    1
## 5: 18.7  360 175 3.15 3.440 17.02  0  0    3    2
## 6: 18.1  225 105 2.76 3.460 20.22  1  0    3    1

# keeps all existing variables
mtcars %>%
    let(displ_l = disp / 61.0237) %>%
    head()

##     mpg cyl disp  hp drat    wt  qsec vs am gear carb  displ_l
## 1: 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4 2.621932
## 2: 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4 2.621932
## 3: 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1 1.769804
## 4: 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1 4.227866
## 5: 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2 5.899347
## 6: 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1 3.687092

# keeps only the variables you create
mtcars %>%
    take(displ_l = disp / 61.0237) %>% 
    head()

##     displ_l
## 1: 2.621932
## 2: 2.621932
## 3: 1.769804
## 4: 4.227866
## 5: 5.899347
## 6: 3.687092

# can refer to both contextual variables and variable names:
var = 100
mtcars %>%
    let(cyl = cyl * var) %>%
    head()

##     mpg cyl disp  hp drat    wt  qsec vs am gear carb
## 1: 21.0 600  160 110 3.90 2.620 16.46  0  1    4    4
## 2: 21.0 600  160 110 3.90 2.875 17.02  0  1    4    4
## 3: 22.8 400  108  93 3.85 2.320 18.61  1  1    4    1
## 4: 21.4 600  258 110 3.08 3.215 19.44  1  0    3    1
## 5: 18.7 800  360 175 3.15 3.440 17.02  0  0    3    2
## 6: 18.1 600  225 105 2.76 3.460 20.22  1  0    3    1

# select rows
mtcars %>%
    rows(am==0) %>% 
    head()

##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1: 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 2: 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
## 3: 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
## 4: 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
## 5: 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 6: 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2

# select rows with compound condition
mtcars %>%
    rows(am==0 & mpg>mean(mpg))

##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1: 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 2: 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 3: 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 4: 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1

# select columns
mtcars %>% 
    columns(vs:carb, cyl)

##                     vs am gear carb cyl
## Mazda RX4            0  1    4    4   6
## Mazda RX4 Wag        0  1    4    4   6
## Datsun 710           1  1    4    1   4
## Hornet 4 Drive       1  0    3    1   6
## Hornet Sportabout    0  0    3    2   8
## Valiant              1  0    3    1   6
## Duster 360           0  0    3    4   8
## Merc 240D            1  0    4    2   4
## Merc 230             1  0    4    2   4
## Merc 280             1  0    4    4   6
## Merc 280C            1  0    4    4   6
## Merc 450SE           0  0    3    3   8
## Merc 450SL           0  0    3    3   8
## Merc 450SLC          0  0    3    3   8
## Cadillac Fleetwood   0  0    3    4   8
## Lincoln Continental  0  0    3    4   8
## Chrysler Imperial    0  0    3    4   8
## Fiat 128             1  1    4    1   4
## Honda Civic          1  1    4    2   4
## Toyota Corolla       1  1    4    1   4
## Toyota Corona        1  0    3    1   4
## Dodge Challenger     0  0    3    2   8
## AMC Javelin          0  0    3    2   8
## Camaro Z28           0  0    3    4   8
## Pontiac Firebird     0  0    3    2   8
## Fiat X1-9            1  1    4    1   4
## Porsche 914-2        0  1    5    2   4
## Lotus Europa         1  1    5    2   4
## Ford Pantera L       0  1    5    4   8
## Ferrari Dino         0  1    5    6   6
## Maserati Bora        0  1    5    8   8
## Volvo 142E           1  1    4    2   4

mtcars %>% 
    columns(-am, -cyl)

##                      mpg  disp  hp drat    wt  qsec vs gear carb
## Mazda RX4           21.0 160.0 110 3.90 2.620 16.46  0    4    4
## Mazda RX4 Wag       21.0 160.0 110 3.90 2.875 17.02  0    4    4
## Datsun 710          22.8 108.0  93 3.85 2.320 18.61  1    4    1
## Hornet 4 Drive      21.4 258.0 110 3.08 3.215 19.44  1    3    1
## Hornet Sportabout   18.7 360.0 175 3.15 3.440 17.02  0    3    2
## Valiant             18.1 225.0 105 2.76 3.460 20.22  1    3    1
## Duster 360          14.3 360.0 245 3.21 3.570 15.84  0    3    4
## Merc 240D           24.4 146.7  62 3.69 3.190 20.00  1    4    2
## Merc 230            22.8 140.8  95 3.92 3.150 22.90  1    4    2
## Merc 280            19.2 167.6 123 3.92 3.440 18.30  1    4    4
## Merc 280C           17.8 167.6 123 3.92 3.440 18.90  1    4    4
## Merc 450SE          16.4 275.8 180 3.07 4.070 17.40  0    3    3
## Merc 450SL          17.3 275.8 180 3.07 3.730 17.60  0    3    3
## Merc 450SLC         15.2 275.8 180 3.07 3.780 18.00  0    3    3
## Cadillac Fleetwood  10.4 472.0 205 2.93 5.250 17.98  0    3    4
## Lincoln Continental 10.4 460.0 215 3.00 5.424 17.82  0    3    4
## Chrysler Imperial   14.7 440.0 230 3.23 5.345 17.42  0    3    4
## Fiat 128            32.4  78.7  66 4.08 2.200 19.47  1    4    1
## Honda Civic         30.4  75.7  52 4.93 1.615 18.52  1    4    2
## Toyota Corolla      33.9  71.1  65 4.22 1.835 19.90  1    4    1
## Toyota Corona       21.5 120.1  97 3.70 2.465 20.01  1    3    1
## Dodge Challenger    15.5 318.0 150 2.76 3.520 16.87  0    3    2
## AMC Javelin         15.2 304.0 150 3.15 3.435 17.30  0    3    2
## Camaro Z28          13.3 350.0 245 3.73 3.840 15.41  0    3    4
## Pontiac Firebird    19.2 400.0 175 3.08 3.845 17.05  0    3    2
## Fiat X1-9           27.3  79.0  66 4.08 1.935 18.90  1    4    1
## Porsche 914-2       26.0 120.3  91 4.43 2.140 16.70  0    5    2
## Lotus Europa        30.4  95.1 113 3.77 1.513 16.90  1    5    2
## Ford Pantera L      15.8 351.0 264 4.22 3.170 14.50  0    5    4
## Ferrari Dino        19.7 145.0 175 3.62 2.770 15.50  0    5    6
## Maserati Bora       15.0 301.0 335 3.54 3.570 14.60  0    5    8
## Volvo 142E          21.4 121.0 109 4.11 2.780 18.60  1    4    2

# regular expression pattern
columns(iris, "^Petal") %>% head() # variables which start from 'Petal'

##   Petal.Length Petal.Width
## 1          1.4         0.2
## 2          1.4         0.2
## 3          1.3         0.2
## 4          1.5         0.2
## 5          1.4         0.2
## 6          1.7         0.4

columns(iris, "Width$") %>% head() # variables which end with 'Width'

##   Sepal.Width Petal.Width
## 1         3.5         0.2
## 2         3.0         0.2
## 3         3.2         0.2
## 4         3.1         0.2
## 5         3.6         0.2
## 6         3.9         0.4

# move Species variable to the front
# pattern "^." matches all variables
columns(iris, Species, "^.") %>% head()

##   Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1  setosa          5.1         3.5          1.4         0.2
## 2  setosa          4.9         3.0          1.4         0.2
## 3  setosa          4.7         3.2          1.3         0.2
## 4  setosa          4.6         3.1          1.5         0.2
## 5  setosa          5.0         3.6          1.4         0.2
## 6  setosa          5.4         3.9          1.7         0.4

# pattern "^.*al" means "contains 'al'"
columns(iris, "^.*al") %>% head()

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4

# numeric indexing - all variables except Species
columns(iris, 1:4) %>% head()

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4

# A 'take' with summary functions applied without 'by' argument returns an aggregated data
mtcars %>%
    take(mean = mean(disp), n = .N)

##        mean  n
## 1: 230.7219 32

# Usually, you'll want to group first
mtcars %>%
    take(mean = mean(disp), n = .N, by = am)

##    am     mean  n
## 1:  1 143.5308 13
## 2:  0 290.3789 19

# grouping by multiple variables
mtcars %>%
    take(mean = mean(disp), n = .N, by = list(am, vs))

##    am vs     mean  n
## 1:  1  0 206.2167  6
## 2:  1  1  89.8000  7
## 3:  0  1 175.1143  7
## 4:  0  0 357.6167 12

# You can group by expressions:
mtcars %>%
    take_all(
        mean,
        by = list(vsam = vs + am)
    )

##    vsam      mpg      cyl     disp        hp     drat       wt     qsec
## 1:    1 20.28462 5.692308 189.4692 138.46154 3.738462 3.038846 18.04231
## 2:    2 28.37143 4.000000  89.8000  80.57143 4.148571 2.028286 18.70000
## 3:    0 15.05000 8.000000 357.6167 194.16667 3.120833 4.104083 17.14250
##        gear     carb
## 1: 4.076923 3.307692
## 2: 4.142857 1.428571
## 3: 3.000000 3.083333

# modify all non-grouping variables in-place
mtcars %>%
    let_all((.x - mean(.x))/sd(.x), by = am) %>%
    head()

##           mpg        cyl       disp         hp       drat         wt       qsec
## 1: -0.5501185  0.5945745  0.1888587 -0.2004008 -0.4120299  0.3387459 -0.5021316
## 2: -0.5501185  0.5945745  0.1888587 -0.2004008 -0.4120299  0.7520483 -0.1896942
## 3: -0.2582189 -0.6936702 -0.4074443 -0.4026317 -0.5493732 -0.1474922  0.6974050
## 4:  1.1091990 -0.6133196 -0.2938955 -0.9323843 -0.5259081 -0.7124963  0.7176592
## 5:  0.4049674  0.6814663  0.6319326  0.2733692 -0.3474750 -0.4230701 -0.6641654
## 6:  0.2484716 -0.6133196 -0.5934281 -1.0251346 -1.3416023 -0.3973433  1.1630407
##            vs am       gear       carb
## 1: -1.0377490  1 -0.7595545  0.4944600
## 2: -1.0377490  1 -0.7595545  0.4944600
## 3:  0.8894992  1 -0.7595545 -0.8829642
## 4:  1.2743862  0 -0.5026247 -1.5141438
## 5: -0.7433919  0 -0.5026247 -0.6423641
## 6:  1.2743862  0 -0.5026247 -1.5141438

# modify all non-grouping variables to new variables
mtcars %>%
    let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
    head()

##     mpg cyl disp  hp drat    wt  qsec vs am gear carb mpg_scaled cyl_scaled
## 1: 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4 -0.5501185  0.5945745
## 2: 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4 -0.5501185  0.5945745
## 3: 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1 -0.2582189 -0.6936702
## 4: 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1  1.1091990 -0.6133196
## 5: 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2  0.4049674  0.6814663
## 6: 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1  0.2484716 -0.6133196
##    disp_scaled  hp_scaled drat_scaled  wt_scaled qsec_scaled  vs_scaled
## 1:   0.1888587 -0.2004008  -0.4120299  0.3387459  -0.5021316 -1.0377490
## 2:   0.1888587 -0.2004008  -0.4120299  0.7520483  -0.1896942 -1.0377490
## 3:  -0.4074443 -0.4026317  -0.5493732 -0.1474922   0.6974050  0.8894992
## 4:  -0.2938955 -0.9323843  -0.5259081 -0.7124963   0.7176592  1.2743862
## 5:   0.6319326  0.2733692  -0.3474750 -0.4230701  -0.6641654 -0.7433919
## 6:  -0.5934281 -1.0251346  -1.3416023 -0.3973433   1.1630407  1.2743862
##    gear_scaled carb_scaled
## 1:  -0.7595545   0.4944600
## 2:  -0.7595545   0.4944600
## 3:  -0.7595545  -0.8829642
## 4:  -0.5026247  -1.5141438
## 5:  -0.5026247  -0.6423641
## 6:  -0.5026247  -1.5141438

# conditionally modify all variables
iris %>%
    let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
    head()

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Length_mean
## 1:          5.1         3.5          1.4         0.2  setosa          5.843333
## 2:          4.9         3.0          1.4         0.2  setosa          5.843333
## 3:          4.7         3.2          1.3         0.2  setosa          5.843333
## 4:          4.6         3.1          1.5         0.2  setosa          5.843333
## 5:          5.0         3.6          1.4         0.2  setosa          5.843333
## 6:          5.4         3.9          1.7         0.4  setosa          5.843333
##    Sepal.Width_mean Petal.Length_mean Petal.Width_mean
## 1:         3.057333             3.758         1.199333
## 2:         3.057333             3.758         1.199333
## 3:         3.057333             3.758         1.199333
## 4:         3.057333             3.758         1.199333
## 5:         3.057333             3.758         1.199333
## 6:         3.057333             3.758         1.199333

# modify all variables conditionally on name
iris %>%
    let_all(
        mean = if(startsWith(.name, "Sepal")) mean(.x),
        median = if(startsWith(.name, "Petal")) median(.x),
        by = Species
    ) %>%
    head()

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Length_mean
## 1:          5.1         3.5          1.4         0.2  setosa             5.006
## 2:          4.9         3.0          1.4         0.2  setosa             5.006
## 3:          4.7         3.2          1.3         0.2  setosa             5.006
## 4:          4.6         3.1          1.5         0.2  setosa             5.006
## 5:          5.0         3.6          1.4         0.2  setosa             5.006
## 6:          5.4         3.9          1.7         0.4  setosa             5.006
##    Sepal.Width_mean Petal.Length_median Petal.Width_median
## 1:            3.428                 1.5                0.2
## 2:            3.428                 1.5                0.2
## 3:            3.428                 1.5                0.2
## 4:            3.428                 1.5                0.2
## 5:            3.428                 1.5                0.2
## 6:            3.428                 1.5                0.2

# aggregation with 'take_all'
mtcars %>%
    take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)

##    am mpg_mean cyl_mean disp_mean  hp_mean drat_mean  wt_mean qsec_mean
## 1:  1 24.39231 5.076923  143.5308 126.8462  4.050000 2.411000  17.36000
## 2:  0 17.14737 6.947368  290.3789 160.2632  3.286316 3.768895  18.18316
##      vs_mean gear_mean carb_mean   mpg_sd   cyl_sd   disp_sd    hp_sd   drat_sd
## 1: 0.5384615  4.384615  2.923077 6.166504 1.552500  87.20399 84.06232 0.3640513
## 2: 0.3684211  3.210526  2.736842 3.833966 1.544657 110.17165 53.90820 0.3923039
##        wt_sd  qsec_sd     vs_sd   gear_sd  carb_sd mpg_n cyl_n disp_n hp_n
## 1: 0.6169816 1.792359 0.5188745 0.5063697 2.177978    13    13     13   13
## 2: 0.7774001 1.751308 0.4955946 0.4188539 1.147079    19    19     19   19
##    drat_n wt_n qsec_n vs_n gear_n carb_n
## 1:     13   13     13   13     13     13
## 2:     19   19     19   19     19     19

# conditionally aggregate all variables
iris %>%
    take_all(mean = if(is.numeric(.x)) mean(.x))

##    Sepal.Length_mean Sepal.Width_mean Petal.Length_mean Petal.Width_mean
## 1:          5.843333         3.057333             3.758         1.199333

# aggregate all variables conditionally on name
iris %>%
    take_all(
        mean = if(startsWith(.name, "Sepal")) mean(.x),
        median = if(startsWith(.name, "Petal")) median(.x),
        by = Species
    )

##       Species Sepal.Length_mean Sepal.Width_mean Petal.Length_median
## 1:     setosa             5.006            3.428                1.50
## 2: versicolor             5.936            2.770                4.35
## 3:  virginica             6.588            2.974                5.55
##    Petal.Width_median
## 1:                0.2
## 2:                1.3
## 3:                2.0

# parametric evaluation:
var = quote(mean(cyl))
mtcars %>% 
    let(mean_cyl = eval(var)) %>% 
    head()

##     mpg cyl disp  hp drat    wt  qsec vs am gear carb mean_cyl
## 1: 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4   6.1875
## 2: 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4   6.1875
## 3: 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1   6.1875
## 4: 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1   6.1875
## 5: 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2   6.1875
## 6: 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1   6.1875

take(mtcars, eval(var))

##    eval(var)
## 1:    6.1875

# all together
new_var = "mean_cyl"
mtcars %>% 
    let((new_var) := eval(var)) %>% 
    head()

##     mpg cyl disp  hp drat    wt  qsec vs am gear carb mean_cyl
## 1: 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4   6.1875
## 2: 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4   6.1875
## 3: 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1   6.1875
## 4: 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1   6.1875
## 5: 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2   6.1875
## 6: 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1   6.1875

take(mtcars, (new_var) := eval(var))

##    mean_cyl
## 1:   6.1875

Variable selection in the expressions

You can use ‘columns’ inside expression in the ‘take’/‘let’. ‘columns’ will be replaced with data.table with selected columns. In ‘let’ in the expressions with ‘:=’, ‘cols’ or ‘%to%’ can be placed in the left part of the expression. It is usefull for multiple assignment. There are four ways of column selection:

Simply by column names
By variable ranges, e. g. vs:carb. Alternatively, you can use ‘%to%’ instead of colon: ‘vs %to% carb’.
With regular expressions. Characters which start with ‘^’ or end with $ considered as Perl-style regular expression patterns. For example, ‘^Petal’ returns all variables started with ‘Petal’. ‘Width$’ returns all variables which end with ‘Width’. Pattern ‘^.’ matches all variables and pattern ’^.*my_str’ is equivalent to contains “my_str”’.
By character variables with interpolated parts. Expression in the curly brackets inside characters will be evaluated in the parent frame with ‘text_expand’ function. For example, a{1:3} will be transformed to the names ‘a1’, ‘a2’, ‘a3’. ‘cols’ is just a shortcut for ‘columns’.

# range selection
iris %>% 
    let(
        avg = rowMeans(Sepal.Length %to% Petal.Width)
    ) %>% 
    head()

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species   avg
## 1:          5.1         3.5          1.4         0.2  setosa 2.550
## 2:          4.9         3.0          1.4         0.2  setosa 2.375
## 3:          4.7         3.2          1.3         0.2  setosa 2.350
## 4:          4.6         3.1          1.5         0.2  setosa 2.350
## 5:          5.0         3.6          1.4         0.2  setosa 2.550
## 6:          5.4         3.9          1.7         0.4  setosa 2.850

# multiassignment
iris %>% 
    let(
        # starts with Sepal or Petal
        multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
    ) %>% 
    head()

##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species multipled1
## 1:          5.1         3.5          1.4         0.2  setosa       10.2
## 2:          4.9         3.0          1.4         0.2  setosa        9.8
## 3:          4.7         3.2          1.3         0.2  setosa        9.4
## 4:          4.6         3.1          1.5         0.2  setosa        9.2
## 5:          5.0         3.6          1.4         0.2  setosa       10.0
## 6:          5.4         3.9          1.7         0.4  setosa       10.8
##    multipled2 multipled3 multipled4
## 1:        7.0        2.8        0.4
## 2:        6.0        2.8        0.4
## 3:        6.4        2.6        0.4
## 4:        6.2        3.0        0.4
## 5:        7.2        2.8        0.4
## 6:        7.8        3.4        0.8

mtcars %>% 
    let(
        # text expansion
        cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
    ) %>% 
    head()

##     mpg cyl disp  hp drat    wt  qsec vs am gear carb scaled_mpg scaled_cyl
## 1: 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4  0.1508848 -0.1049878
## 2: 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4  0.1508848 -0.1049878
## 3: 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1  0.4495434 -1.2248578
## 4: 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1  0.2172534 -0.1049878
## 5: 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2 -0.2307345  1.0148821
## 6: 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1 -0.3302874 -0.1049878
##    scaled_disp  scaled_hp scaled_drat    scaled_wt scaled_qsec  scaled_vs
## 1: -0.57061982 -0.5350928   0.5675137 -0.610399567  -0.7771651 -0.8680278
## 2: -0.57061982 -0.5350928   0.5675137 -0.349785269  -0.4637808 -0.8680278
## 3: -0.99018209 -0.7830405   0.4739996 -0.917004624   0.4260068  1.1160357
## 4:  0.22009369 -0.5350928  -0.9661175 -0.002299538   0.8904872  1.1160357
## 5:  1.04308123  0.4129422  -0.8351978  0.227654255  -0.4637808 -0.8680278
## 6: -0.04616698 -0.6080186  -1.5646078  0.248094592   1.3269868  1.1160357
##     scaled_am scaled_gear scaled_carb
## 1:  1.1899014   0.4235542   0.7352031
## 2:  1.1899014   0.4235542   0.7352031
## 3:  1.1899014   0.4235542  -1.1221521
## 4: -0.8141431  -0.9318192  -1.1221521
## 5: -0.8141431  -0.9318192  -0.5030337
## 6: -0.8141431  -0.9318192  -1.1221521

# range selection in 'by'
# selection of range + additional column
mtcars %>% 
    take(
        res = sum(cols(mpg, disp %to% drat)),
        by = vs %to% gear
    )

##    vs am gear     res
## 1:  0  1    4  589.80
## 2:  1  1    4 1177.97
## 3:  1  0    3  985.64
## 4:  0  0    3 6839.45
## 5:  1  0    4 1125.35
## 6:  0  1    5 1874.61
## 7:  1  1    5  242.27

Joins

Here we use the same datasets as with lookups:

workers = fread("
    name company
    Nick Acme
    John Ajax
    Daniela Ajax
")

positions = fread("
    name position
    John designer
    Daniela engineer
    Cathie manager
")

workers

##       name company
## 1:    Nick    Acme
## 2:    John    Ajax
## 3: Daniela    Ajax

positions

##       name position
## 1:    John designer
## 2: Daniela engineer
## 3:  Cathie  manager

Different kinds of joins:

workers %>% dt_inner_join(positions)

## dt_inner_join: joining, by = "name"

##       name company position
## 1:    John    Ajax designer
## 2: Daniela    Ajax engineer

workers %>% dt_left_join(positions)

## dt_left_join: joining, by = "name"

##       name company position
## 1:    Nick    Acme     <NA>
## 2:    John    Ajax designer
## 3: Daniela    Ajax engineer

workers %>% dt_right_join(positions)

## dt_right_join: joining, by = "name"

##       name company position
## 1:    John    Ajax designer
## 2: Daniela    Ajax engineer
## 3:  Cathie    <NA>  manager

workers %>% dt_full_join(positions)

## dt_full_join: joining, by = "name"

##       name company position
## 1:    Nick    Acme     <NA>
## 2:    John    Ajax designer
## 3: Daniela    Ajax engineer
## 4:  Cathie    <NA>  manager

# filtering joins
workers %>% dt_anti_join(positions)

## dt_anti_join: joining, by = "name"

##    name company
## 1: Nick    Acme

workers %>% dt_semi_join(positions)

## dt_semi_join: joining, by = "name"

##       name company
## 1:    John    Ajax
## 2: Daniela    Ajax

To suppress the message, supply by argument:

workers %>% dt_left_join(positions, by = "name")

Use a named by if the join variables have different names:

positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
workers %>% dt_inner_join(positions2, by = c("name" = "worker"))

‘dplyr’-like interface for data.table.

There are a small subset of ‘dplyr’ verbs to work with data.table. Note that there is no group_by verb - use by or keyby argument when needed.

dt_mutate adds new variables or modify existing variables. If data is data.table then it modifies in-place.
dt_summarize computes summary statistics. Splits the data into subsets, computes summary statistics for each, and returns the result in the “data.table” form.
dt_summarize_all the same as dt_summarize but work over all non-grouping variables.
dt_filter Selects rows/cases where conditions are true. Rows where the condition evaluates to NA are dropped.
dt_select Selects column/variables from the data set. Range of variables are supported, e. g. vs:carb. Characters which start with ^ or end with \$ considered as Perl-style regular expression patterns. For example, '^Petal' returns all variables started with ‘Petal’. 'Width\$' returns all variables which end with ‘Width’. Pattern ^. matches all variables and pattern '^.*my_str' is equivalent to contains "my_str". See examples.
dt_arrange sorts dataset by variable(-s). Use ‘-’ to sort in desending order. If data is data.table then it modifies in-place.

The same examples with ‘dplyr’-verbs:

# examples from 'dplyr'
# newly created variables are available immediately
mtcars  %>%
    dt_mutate(
        cyl2 = cyl * 2,
        cyl4 = cyl2 * 2
    ) %>%
    head()

##     mpg cyl disp  hp drat    wt  qsec vs am gear carb cyl2 cyl4
## 1: 21.0   6  160 110 3.90 2.620 16.46  0  1    4    4   12   24
## 2: 21.0   6  160 110 3.90 2.875 17.02  0  1    4    4   12   24
## 3: 22.8   4  108  93 3.85 2.320 18.61  1  1    4    1    8   16
## 4: 21.4   6  258 110 3.08 3.215 19.44  1  0    3    1   12   24
## 5: 18.7   8  360 175 3.15 3.440 17.02  0  0    3    2   16   32
## 6: 18.1   6  225 105 2.76 3.460 20.22  1  0    3    1   12   24

# you can also use dt_mutate() to remove variables and
# modify existing variables
mtcars %>%
    dt_mutate(
        mpg = NULL,
        disp = disp * 0.0163871 # convert to litres
    ) %>%
    head()

##    cyl     disp  hp drat    wt  qsec vs am gear carb
## 1:   6 2.621936 110 3.90 2.620 16.46  0  1    4    4
## 2:   6 2.621936 110 3.90 2.875 17.02  0  1    4    4
## 3:   4 1.769807  93 3.85 2.320 18.61  1  1    4    1
## 4:   6 4.227872 110 3.08 3.215 19.44  1  0    3    1
## 5:   8 5.899356 175 3.15 3.440 17.02  0  0    3    2
## 6:   6 3.687098 105 2.76 3.460 20.22  1  0    3    1

# window functions are useful for grouped mutates
mtcars %>%
    dt_mutate(
        rank = rank(-mpg, ties.method = "min"),
        keyby = cyl) %>%
    print()

##      mpg cyl  disp  hp drat    wt  qsec vs am gear carb rank
##  1: 22.8   4 108.0  93 3.85 2.320 18.61  1  1    4    1    8
##  2: 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2    7
##  3: 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2    8
##  4: 32.4   4  78.7  66 4.08 2.200 19.47  1  1    4    1    2
##  5: 30.4   4  75.7  52 4.93 1.615 18.52  1  1    4    2    3
##  6: 33.9   4  71.1  65 4.22 1.835 19.90  1  1    4    1    1
##  7: 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1   10
##  8: 27.3   4  79.0  66 4.08 1.935 18.90  1  1    4    1    5
##  9: 26.0   4 120.3  91 4.43 2.140 16.70  0  1    5    2    6
## 10: 30.4   4  95.1 113 3.77 1.513 16.90  1  1    5    2    3
## 11: 21.4   4 121.0 109 4.11 2.780 18.60  1  1    4    2   11
## 12: 21.0   6 160.0 110 3.90 2.620 16.46  0  1    4    4    2
## 13: 21.0   6 160.0 110 3.90 2.875 17.02  0  1    4    4    2
## 14: 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1    1
## 15: 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1    6
## 16: 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4    5
## 17: 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4    7
## 18: 19.7   6 145.0 175 3.62 2.770 15.50  0  1    5    6    4
## 19: 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2    2
## 20: 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4   11
## 21: 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3    4
## 22: 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3    3
## 23: 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3    7
## 24: 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4   13
## 25: 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4   13
## 26: 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4   10
## 27: 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2    6
## 28: 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2    7
## 29: 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4   12
## 30: 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2    1
## 31: 15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4    5
## 32: 15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8    9
##      mpg cyl  disp  hp drat    wt  qsec vs am gear carb rank

# You can drop variables by setting them to NULL
mtcars %>% dt_mutate(cyl = NULL) %>% head()

##     mpg disp  hp drat    wt  qsec vs am gear carb
## 1: 21.0  160 110 3.90 2.620 16.46  0  1    4    4
## 2: 21.0  160 110 3.90 2.875 17.02  0  1    4    4
## 3: 22.8  108  93 3.85 2.320 18.61  1  1    4    1
## 4: 21.4  258 110 3.08 3.215 19.44  1  0    3    1
## 5: 18.7  360 175 3.15 3.440 17.02  0  0    3    2
## 6: 18.1  225 105 2.76 3.460 20.22  1  0    3    1

# A summary applied without by returns a single row
mtcars %>%
    dt_summarise(mean = mean(disp), n = .N)

##        mean  n
## 1: 230.7219 32

# Usually, you'll want to group first
mtcars %>%
    dt_summarise(mean = mean(disp), n = .N, by = cyl)

##    cyl     mean  n
## 1:   6 183.3143  7
## 2:   4 105.1364 11
## 3:   8 353.1000 14

# Multiple 'by' - variables
mtcars %>%
    dt_summarise(cyl_n = .N, by = list(cyl, vs))

##    cyl vs cyl_n
## 1:   6  0     3
## 2:   4  1    10
## 3:   6  1     4
## 4:   8  0    14
## 5:   4  0     1

# Newly created summaries immediately
# doesn't overwrite existing variables
mtcars %>%
    dt_summarise(disp = mean(disp),
                  sd = sd(disp),
                  by = cyl)

##    cyl     disp       sd
## 1:   6 183.3143 41.56246
## 2:   4 105.1364 26.87159
## 3:   8 353.1000 67.77132

# You can group by expressions:
mtcars %>%
    dt_summarise_all(mean, by = list(vsam = vs + am))

##    vsam      mpg      cyl     disp        hp     drat       wt     qsec
## 1:    1 20.28462 5.692308 189.4692 138.46154 3.738462 3.038846 18.04231
## 2:    2 28.37143 4.000000  89.8000  80.57143 4.148571 2.028286 18.70000
## 3:    0 15.05000 8.000000 357.6167 194.16667 3.120833 4.104083 17.14250
##        gear     carb
## 1: 4.076923 3.307692
## 2: 4.142857 1.428571
## 3: 3.000000 3.083333

# filter by condition
mtcars %>%
    dt_filter(am==0)

##      mpg cyl  disp  hp drat    wt  qsec vs am gear carb
##  1: 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
##  2: 18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
##  3: 18.1   6 225.0 105 2.76 3.460 20.22  1  0    3    1
##  4: 14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
##  5: 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
##  6: 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
##  7: 19.2   6 167.6 123 3.92 3.440 18.30  1  0    4    4
##  8: 17.8   6 167.6 123 3.92 3.440 18.90  1  0    4    4
##  9: 16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
## 10: 17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
## 11: 15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
## 12: 10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
## 13: 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
## 14: 14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
## 15: 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1
## 16: 15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
## 17: 15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
## 18: 13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
## 19: 19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2

# filter by compound condition
mtcars %>%
    dt_filter(am==0,  mpg>mean(mpg))

##     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
## 1: 21.4   6 258.0 110 3.08 3.215 19.44  1  0    3    1
## 2: 24.4   4 146.7  62 3.69 3.190 20.00  1  0    4    2
## 3: 22.8   4 140.8  95 3.92 3.150 22.90  1  0    4    2
## 4: 21.5   4 120.1  97 3.70 2.465 20.01  1  0    3    1

# select
mtcars %>% 
  dt_select(vs:carb, cyl) %>% 
  head()

##                   vs am gear carb cyl
## Mazda RX4          0  1    4    4   6
## Mazda RX4 Wag      0  1    4    4   6
## Datsun 710         1  1    4    1   4
## Hornet 4 Drive     1  0    3    1   6
## Hornet Sportabout  0  0    3    2   8
## Valiant            1  0    3    1   6

mtcars %>% 
  dt_select(-am, -cyl) %>% 
  head()

##                    mpg disp  hp drat    wt  qsec vs gear carb
## Mazda RX4         21.0  160 110 3.90 2.620 16.46  0    4    4
## Mazda RX4 Wag     21.0  160 110 3.90 2.875 17.02  0    4    4
## Datsun 710        22.8  108  93 3.85 2.320 18.61  1    4    1
## Hornet 4 Drive    21.4  258 110 3.08 3.215 19.44  1    3    1
## Hornet Sportabout 18.7  360 175 3.15 3.440 17.02  0    3    2
## Valiant           18.1  225 105 2.76 3.460 20.22  1    3    1

# regular expression pattern
dt_select(iris, "^Petal") %>% head() # variables which start from 'Petal'

##   Petal.Length Petal.Width
## 1          1.4         0.2
## 2          1.4         0.2
## 3          1.3         0.2
## 4          1.5         0.2
## 5          1.4         0.2
## 6          1.7         0.4

dt_select(iris, "Width$") %>% head()  # variables which end with 'Width'

##   Sepal.Width Petal.Width
## 1         3.5         0.2
## 2         3.0         0.2
## 3         3.2         0.2
## 4         3.1         0.2
## 5         3.6         0.2
## 6         3.9         0.4

# move Species variable to the front
# pattern "^." matches all variables
dt_select(iris, Species, "^.") %>% head()

##   Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1  setosa          5.1         3.5          1.4         0.2
## 2  setosa          4.9         3.0          1.4         0.2
## 3  setosa          4.7         3.2          1.3         0.2
## 4  setosa          4.6         3.1          1.5         0.2
## 5  setosa          5.0         3.6          1.4         0.2
## 6  setosa          5.4         3.9          1.7         0.4

# pattern "^.*al" means "contains 'al'"
dt_select(iris, "^.*al") %>% head()

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4

dt_select(iris, 1:4) %>% head()  # numeric indexing - all variables except Species

##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1          5.1         3.5          1.4         0.2
## 2          4.9         3.0          1.4         0.2
## 3          4.7         3.2          1.3         0.2
## 4          4.6         3.1          1.5         0.2
## 5          5.0         3.6          1.4         0.2
## 6          5.4         3.9          1.7         0.4

# sorting
dt_arrange(mtcars, cyl, disp)
dt_arrange(mtcars, -disp)

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.