The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
Package provides pipe-style interface for data.table
package. It preserves all data.table features without significant impact
on performance. let
and take
functions are
simplified interfaces for most common data manipulation tasks.
rows(mtcars, am==0)
columns(mtcars, mpg, vs:carb)
take(mtcars, mean_mpg = mean(mpg), by = am)
take_all(mtcars, mean, by = am)
take(mtcars, mpg, hp, fun = mean, by = am)
by
argument:
take_all(mtcars, mean)
%>%
to chain several
operations: mtcars %>%
let(mpg_hp = mpg/hp) %>%
take(mean(mpg_hp), by = am)
mtcars %>%
let(new_var = 42,
new_var2 = new_var*hp) %>%
head()
let(mtcars, am = NULL) %>% head()
iris %>%
let_all(
scaled = (.x - mean(.x))/sd(.x),
by = Species) %>%
head()
iris %>%
take_all(
mean = if(startsWith(.name, "Sepal")) mean(.x),
median = if(startsWith(.name, "Petal")) median(.x),
by = Species
)
:=
: new_var = "my_var"
old_var = "mpg"
mtcars %>%
let((new_var) := get(old_var)*2) %>%
head()
# or,
expr = quote(mean(cyl))
mtcars %>%
let((new_var) := eval(expr)) %>%
head()
# the same with `take`
by_var = "vs,am"
take(mtcars, (new_var) := eval(expr), by = by_var)
query_if
function translates its arguments one-to-one to
[.data.table
method. Additionally there are some
conveniences such as automatic data.frame
conversion to
data.table
.
Let’s make datasets for lookups:
workers = fread("
name company
Nick Acme
John Ajax
Daniela Ajax
")
positions = fread("
name position
John designer
Daniela engineer
Cathie manager
")
# xlookup
workers = let(workers,
position = xlookup(name, positions$name, positions$position)
)
# vlookup
# by default we search in the first column and return values from second column
workers = let(workers,
position = vlookup(name, positions, no_match = "Not found")
)
# the same
workers = let(workers,
position = vlookup(name, positions,
result_column = "position",
no_match = "Not found") # or, result_column = 2
)
head(workers)
## name company position
## <char> <char> <char>
## 1: Nick Acme Not found
## 2: John Ajax designer
## 3: Daniela Ajax engineer
We will use for demonstartion well-known mtcars
dataset
and some examples from dplyr
package.
library(maditr)
data(mtcars)
# Newly created variables are available immediately
mtcars %>%
let(
cyl2 = cyl * 2,
cyl4 = cyl2 * 2
) %>% head()
## mpg cyl disp hp drat wt qsec vs am gear carb cyl2
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 12
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 12
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 8
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 12
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 16
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 12
## cyl4
## <num>
## 1: 24
## 2: 24
## 3: 16
## 4: 24
## 5: 32
## 6: 24
# You can also use let() to remove variables and
# modify existing variables
mtcars %>%
let(
mpg = NULL,
disp = disp * 0.0163871 # convert to litres
) %>% head()
## cyl disp hp drat wt qsec vs am gear carb
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 6 2.621936 110 3.90 2.620 16.46 0 1 4 4
## 2: 6 2.621936 110 3.90 2.875 17.02 0 1 4 4
## 3: 4 1.769807 93 3.85 2.320 18.61 1 1 4 1
## 4: 6 4.227872 110 3.08 3.215 19.44 1 0 3 1
## 5: 8 5.899356 175 3.15 3.440 17.02 0 0 3 2
## 6: 6 3.687098 105 2.76 3.460 20.22 1 0 3 1
# window functions are useful for grouped computations
mtcars %>%
let(rank = rank(-mpg, ties.method = "min"),
by = cyl) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb rank
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <int>
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 2
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 8
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 1
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 2
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 6
# You can drop variables by setting them to NULL
mtcars %>%
let(cyl = NULL) %>%
head()
## mpg disp hp drat wt qsec vs am gear carb
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.0 160 110 3.90 2.620 16.46 0 1 4 4
## 2: 21.0 160 110 3.90 2.875 17.02 0 1 4 4
## 3: 22.8 108 93 3.85 2.320 18.61 1 1 4 1
## 4: 21.4 258 110 3.08 3.215 19.44 1 0 3 1
## 5: 18.7 360 175 3.15 3.440 17.02 0 0 3 2
## 6: 18.1 225 105 2.76 3.460 20.22 1 0 3 1
# keeps all existing variables
mtcars %>%
let(displ_l = disp / 61.0237) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb displ_l
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 2.621932
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 2.621932
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 1.769804
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 4.227866
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 5.899347
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 3.687092
# keeps only the variables you create
mtcars %>%
take(displ_l = disp / 61.0237) %>%
head()
## displ_l
## <num>
## 1: 2.621932
## 2: 2.621932
## 3: 1.769804
## 4: 4.227866
## 5: 5.899347
## 6: 3.687092
# can refer to both contextual variables and variable names:
var = 100
mtcars %>%
let(cyl = cyl * var) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.0 600 160 110 3.90 2.620 16.46 0 1 4 4
## 2: 21.0 600 160 110 3.90 2.875 17.02 0 1 4 4
## 3: 22.8 400 108 93 3.85 2.320 18.61 1 1 4 1
## 4: 21.4 600 258 110 3.08 3.215 19.44 1 0 3 1
## 5: 18.7 800 360 175 3.15 3.440 17.02 0 0 3 2
## 6: 18.1 600 225 105 2.76 3.460 20.22 1 0 3 1
# select rows
mtcars %>%
rows(am==0) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 2: 18.7 8 360.0 175 3.15 3.440 17.02 0 0 3 2
## 3: 18.1 6 225.0 105 2.76 3.460 20.22 1 0 3 1
## 4: 14.3 8 360.0 245 3.21 3.570 15.84 0 0 3 4
## 5: 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 6: 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
# select rows with compound condition
mtcars %>%
rows(am==0 & mpg>mean(mpg))
## mpg cyl disp hp drat wt qsec vs am gear carb
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.4 6 258.0 110 3.08 3.215 19.44 1 0 3 1
## 2: 24.4 4 146.7 62 3.69 3.190 20.00 1 0 4 2
## 3: 22.8 4 140.8 95 3.92 3.150 22.90 1 0 4 2
## 4: 21.5 4 120.1 97 3.70 2.465 20.01 1 0 3 1
# select columns
mtcars %>%
columns(vs:carb, cyl)
## vs am gear carb cyl
## Mazda RX4 0 1 4 4 6
## Mazda RX4 Wag 0 1 4 4 6
## Datsun 710 1 1 4 1 4
## Hornet 4 Drive 1 0 3 1 6
## Hornet Sportabout 0 0 3 2 8
## Valiant 1 0 3 1 6
## Duster 360 0 0 3 4 8
## Merc 240D 1 0 4 2 4
## Merc 230 1 0 4 2 4
## Merc 280 1 0 4 4 6
## Merc 280C 1 0 4 4 6
## Merc 450SE 0 0 3 3 8
## Merc 450SL 0 0 3 3 8
## Merc 450SLC 0 0 3 3 8
## Cadillac Fleetwood 0 0 3 4 8
## Lincoln Continental 0 0 3 4 8
## Chrysler Imperial 0 0 3 4 8
## Fiat 128 1 1 4 1 4
## Honda Civic 1 1 4 2 4
## Toyota Corolla 1 1 4 1 4
## Toyota Corona 1 0 3 1 4
## Dodge Challenger 0 0 3 2 8
## AMC Javelin 0 0 3 2 8
## Camaro Z28 0 0 3 4 8
## Pontiac Firebird 0 0 3 2 8
## Fiat X1-9 1 1 4 1 4
## Porsche 914-2 0 1 5 2 4
## Lotus Europa 1 1 5 2 4
## Ford Pantera L 0 1 5 4 8
## Ferrari Dino 0 1 5 6 6
## Maserati Bora 0 1 5 8 8
## Volvo 142E 1 1 4 2 4
mtcars %>%
columns(-am, -cyl)
## mpg disp hp drat wt qsec vs gear carb
## Mazda RX4 21.0 160.0 110 3.90 2.620 16.46 0 4 4
## Mazda RX4 Wag 21.0 160.0 110 3.90 2.875 17.02 0 4 4
## Datsun 710 22.8 108.0 93 3.85 2.320 18.61 1 4 1
## Hornet 4 Drive 21.4 258.0 110 3.08 3.215 19.44 1 3 1
## Hornet Sportabout 18.7 360.0 175 3.15 3.440 17.02 0 3 2
## Valiant 18.1 225.0 105 2.76 3.460 20.22 1 3 1
## Duster 360 14.3 360.0 245 3.21 3.570 15.84 0 3 4
## Merc 240D 24.4 146.7 62 3.69 3.190 20.00 1 4 2
## Merc 230 22.8 140.8 95 3.92 3.150 22.90 1 4 2
## Merc 280 19.2 167.6 123 3.92 3.440 18.30 1 4 4
## Merc 280C 17.8 167.6 123 3.92 3.440 18.90 1 4 4
## Merc 450SE 16.4 275.8 180 3.07 4.070 17.40 0 3 3
## Merc 450SL 17.3 275.8 180 3.07 3.730 17.60 0 3 3
## Merc 450SLC 15.2 275.8 180 3.07 3.780 18.00 0 3 3
## Cadillac Fleetwood 10.4 472.0 205 2.93 5.250 17.98 0 3 4
## Lincoln Continental 10.4 460.0 215 3.00 5.424 17.82 0 3 4
## Chrysler Imperial 14.7 440.0 230 3.23 5.345 17.42 0 3 4
## Fiat 128 32.4 78.7 66 4.08 2.200 19.47 1 4 1
## Honda Civic 30.4 75.7 52 4.93 1.615 18.52 1 4 2
## Toyota Corolla 33.9 71.1 65 4.22 1.835 19.90 1 4 1
## Toyota Corona 21.5 120.1 97 3.70 2.465 20.01 1 3 1
## Dodge Challenger 15.5 318.0 150 2.76 3.520 16.87 0 3 2
## AMC Javelin 15.2 304.0 150 3.15 3.435 17.30 0 3 2
## Camaro Z28 13.3 350.0 245 3.73 3.840 15.41 0 3 4
## Pontiac Firebird 19.2 400.0 175 3.08 3.845 17.05 0 3 2
## Fiat X1-9 27.3 79.0 66 4.08 1.935 18.90 1 4 1
## Porsche 914-2 26.0 120.3 91 4.43 2.140 16.70 0 5 2
## Lotus Europa 30.4 95.1 113 3.77 1.513 16.90 1 5 2
## Ford Pantera L 15.8 351.0 264 4.22 3.170 14.50 0 5 4
## Ferrari Dino 19.7 145.0 175 3.62 2.770 15.50 0 5 6
## Maserati Bora 15.0 301.0 335 3.54 3.570 14.60 0 5 8
## Volvo 142E 21.4 121.0 109 4.11 2.780 18.60 1 4 2
# regular expression pattern
columns(iris, "^Petal") %>% head() # variables which start from 'Petal'
## Petal.Length Petal.Width
## 1 1.4 0.2
## 2 1.4 0.2
## 3 1.3 0.2
## 4 1.5 0.2
## 5 1.4 0.2
## 6 1.7 0.4
columns(iris, "Width$") %>% head() # variables which end with 'Width'
## Sepal.Width Petal.Width
## 1 3.5 0.2
## 2 3.0 0.2
## 3 3.2 0.2
## 4 3.1 0.2
## 5 3.6 0.2
## 6 3.9 0.4
# move Species variable to the front
# pattern "^." matches all variables
columns(iris, Species, "^.") %>% head()
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 setosa 5.1 3.5 1.4 0.2
## 2 setosa 4.9 3.0 1.4 0.2
## 3 setosa 4.7 3.2 1.3 0.2
## 4 setosa 4.6 3.1 1.5 0.2
## 5 setosa 5.0 3.6 1.4 0.2
## 6 setosa 5.4 3.9 1.7 0.4
# pattern "^.*al" means "contains 'al'"
columns(iris, "^.*al") %>% head()
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
## 6 5.4 3.9 1.7 0.4
# numeric indexing - all variables except Species
columns(iris, 1:4) %>% head()
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 5.1 3.5 1.4 0.2
## 2 4.9 3.0 1.4 0.2
## 3 4.7 3.2 1.3 0.2
## 4 4.6 3.1 1.5 0.2
## 5 5.0 3.6 1.4 0.2
## 6 5.4 3.9 1.7 0.4
# A 'take' with summary functions applied without 'by' argument returns an aggregated data
mtcars %>%
take(mean = mean(disp), n = .N)
## mean n
## <num> <int>
## 1: 230.7219 32
# Usually, you'll want to group first
mtcars %>%
take(mean = mean(disp), n = .N, by = am)
## am mean n
## <num> <num> <int>
## 1: 1 143.5308 13
## 2: 0 290.3789 19
# grouping by multiple variables
mtcars %>%
take(mean = mean(disp), n = .N, by = list(am, vs))
## am vs mean n
## <num> <num> <num> <int>
## 1: 1 0 206.2167 6
## 2: 1 1 89.8000 7
## 3: 0 1 175.1143 7
## 4: 0 0 357.6167 12
# You can group by expressions:
mtcars %>%
take_all(
mean,
by = list(vsam = vs + am)
)
## vsam mpg cyl disp hp drat wt qsec
## <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 1 20.28462 5.692308 189.4692 138.46154 3.738462 3.038846 18.04231
## 2: 2 28.37143 4.000000 89.8000 80.57143 4.148571 2.028286 18.70000
## 3: 0 15.05000 8.000000 357.6167 194.16667 3.120833 4.104083 17.14250
## gear carb
## <num> <num>
## 1: 4.076923 3.307692
## 2: 4.142857 1.428571
## 3: 3.000000 3.083333
# modify all non-grouping variables in-place
mtcars %>%
let_all((.x - mean(.x))/sd(.x), by = am) %>%
head()
## mpg cyl disp hp drat wt qsec
## <num> <num> <num> <num> <num> <num> <num>
## 1: -0.5501185 0.5945745 0.1888587 -0.2004008 -0.4120299 0.3387459 -0.5021316
## 2: -0.5501185 0.5945745 0.1888587 -0.2004008 -0.4120299 0.7520483 -0.1896942
## 3: -0.2582189 -0.6936702 -0.4074443 -0.4026317 -0.5493732 -0.1474922 0.6974050
## 4: 1.1091990 -0.6133196 -0.2938955 -0.9323843 -0.5259081 -0.7124963 0.7176592
## 5: 0.4049674 0.6814663 0.6319326 0.2733692 -0.3474750 -0.4230701 -0.6641654
## 6: 0.2484716 -0.6133196 -0.5934281 -1.0251346 -1.3416023 -0.3973433 1.1630407
## vs am gear carb
## <num> <num> <num> <num>
## 1: -1.0377490 1 -0.7595545 0.4944600
## 2: -1.0377490 1 -0.7595545 0.4944600
## 3: 0.8894992 1 -0.7595545 -0.8829642
## 4: 1.2743862 0 -0.5026247 -1.5141438
## 5: -0.7433919 0 -0.5026247 -0.6423641
## 6: 1.2743862 0 -0.5026247 -1.5141438
# modify all non-grouping variables to new variables
mtcars %>%
let_all(scaled = (.x - mean(.x))/sd(.x), by = am) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb mpg_scaled
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 -0.5501185
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 -0.5501185
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 -0.2582189
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 1.1091990
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 0.4049674
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 0.2484716
## cyl_scaled disp_scaled hp_scaled drat_scaled wt_scaled qsec_scaled
## <num> <num> <num> <num> <num> <num>
## 1: 0.5945745 0.1888587 -0.2004008 -0.4120299 0.3387459 -0.5021316
## 2: 0.5945745 0.1888587 -0.2004008 -0.4120299 0.7520483 -0.1896942
## 3: -0.6936702 -0.4074443 -0.4026317 -0.5493732 -0.1474922 0.6974050
## 4: -0.6133196 -0.2938955 -0.9323843 -0.5259081 -0.7124963 0.7176592
## 5: 0.6814663 0.6319326 0.2733692 -0.3474750 -0.4230701 -0.6641654
## 6: -0.6133196 -0.5934281 -1.0251346 -1.3416023 -0.3973433 1.1630407
## vs_scaled gear_scaled carb_scaled
## <num> <num> <num>
## 1: -1.0377490 -0.7595545 0.4944600
## 2: -1.0377490 -0.7595545 0.4944600
## 3: 0.8894992 -0.7595545 -0.8829642
## 4: 1.2743862 -0.5026247 -1.5141438
## 5: -0.7433919 -0.5026247 -0.6423641
## 6: 1.2743862 -0.5026247 -1.5141438
# conditionally modify all variables
iris %>%
let_all(mean = if(is.numeric(.x)) mean(.x)) %>%
head()
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Length_mean
## <num> <num> <num> <num> <fctr> <num>
## 1: 5.1 3.5 1.4 0.2 setosa 5.843333
## 2: 4.9 3.0 1.4 0.2 setosa 5.843333
## 3: 4.7 3.2 1.3 0.2 setosa 5.843333
## 4: 4.6 3.1 1.5 0.2 setosa 5.843333
## 5: 5.0 3.6 1.4 0.2 setosa 5.843333
## 6: 5.4 3.9 1.7 0.4 setosa 5.843333
## Sepal.Width_mean Petal.Length_mean Petal.Width_mean
## <num> <num> <num>
## 1: 3.057333 3.758 1.199333
## 2: 3.057333 3.758 1.199333
## 3: 3.057333 3.758 1.199333
## 4: 3.057333 3.758 1.199333
## 5: 3.057333 3.758 1.199333
## 6: 3.057333 3.758 1.199333
# modify all variables conditionally on name
iris %>%
let_all(
mean = if(startsWith(.name, "Sepal")) mean(.x),
median = if(startsWith(.name, "Petal")) median(.x),
by = Species
) %>%
head()
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species Sepal.Length_mean
## <num> <num> <num> <num> <fctr> <num>
## 1: 5.1 3.5 1.4 0.2 setosa 5.006
## 2: 4.9 3.0 1.4 0.2 setosa 5.006
## 3: 4.7 3.2 1.3 0.2 setosa 5.006
## 4: 4.6 3.1 1.5 0.2 setosa 5.006
## 5: 5.0 3.6 1.4 0.2 setosa 5.006
## 6: 5.4 3.9 1.7 0.4 setosa 5.006
## Sepal.Width_mean Petal.Length_median Petal.Width_median
## <num> <num> <num>
## 1: 3.428 1.5 0.2
## 2: 3.428 1.5 0.2
## 3: 3.428 1.5 0.2
## 4: 3.428 1.5 0.2
## 5: 3.428 1.5 0.2
## 6: 3.428 1.5 0.2
# aggregation with 'take_all'
mtcars %>%
take_all(mean = mean(.x), sd = sd(.x), n = .N, by = am)
## am mpg_mean cyl_mean disp_mean hp_mean drat_mean wt_mean qsec_mean
## <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 1 24.39231 5.076923 143.5308 126.8462 4.050000 2.411000 17.36000
## 2: 0 17.14737 6.947368 290.3789 160.2632 3.286316 3.768895 18.18316
## vs_mean gear_mean carb_mean mpg_sd cyl_sd disp_sd hp_sd drat_sd
## <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 0.5384615 4.384615 2.923077 6.166504 1.552500 87.20399 84.06232 0.3640513
## 2: 0.3684211 3.210526 2.736842 3.833966 1.544657 110.17165 53.90820 0.3923039
## wt_sd qsec_sd vs_sd gear_sd carb_sd mpg_n cyl_n disp_n hp_n
## <num> <num> <num> <num> <num> <int> <int> <int> <int>
## 1: 0.6169816 1.792359 0.5188745 0.5063697 2.177978 13 13 13 13
## 2: 0.7774001 1.751308 0.4955946 0.4188539 1.147079 19 19 19 19
## drat_n wt_n qsec_n vs_n gear_n carb_n
## <int> <int> <int> <int> <int> <int>
## 1: 13 13 13 13 13 13
## 2: 19 19 19 19 19 19
# conditionally aggregate all variables
iris %>%
take_all(mean = if(is.numeric(.x)) mean(.x))
## Sepal.Length_mean Sepal.Width_mean Petal.Length_mean Petal.Width_mean
## <num> <num> <num> <num>
## 1: 5.843333 3.057333 3.758 1.199333
# aggregate all variables conditionally on name
iris %>%
take_all(
mean = if(startsWith(.name, "Sepal")) mean(.x),
median = if(startsWith(.name, "Petal")) median(.x),
by = Species
)
## Species Sepal.Length_mean Sepal.Width_mean Petal.Length_median
## <fctr> <num> <num> <num>
## 1: setosa 5.006 3.428 1.50
## 2: versicolor 5.936 2.770 4.35
## 3: virginica 6.588 2.974 5.55
## Petal.Width_median
## <num>
## 1: 0.2
## 2: 1.3
## 3: 2.0
# parametric evaluation:
var = quote(mean(cyl))
mtcars %>%
let(mean_cyl = eval(var)) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb mean_cyl
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 6.1875
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 6.1875
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 6.1875
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 6.1875
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 6.1875
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 6.1875
take(mtcars, eval(var))
## eval(var)
## <num>
## 1: 6.1875
# all together
new_var = "mean_cyl"
mtcars %>%
let((new_var) := eval(var)) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb mean_cyl
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 6.1875
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 6.1875
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 6.1875
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 6.1875
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 6.1875
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 6.1875
take(mtcars, (new_var) := eval(var))
## mean_cyl
## <num>
## 1: 6.1875
You can use ‘columns’ inside expression in the ‘take’/‘let’. ‘columns’ will be replaced with data.table with selected columns. In ‘let’ in the expressions with ‘:=’, ‘cols’ or ‘%to%’ can be placed in the left part of the expression. It is usefull for multiple assignment. There are four ways of column selection:
a{1:3}
will be
transformed to the names ‘a1’, ‘a2’, ‘a3’. ‘cols’ is just a shortcut for
‘columns’.# range selection
iris %>%
let(
avg = rowMeans(Sepal.Length %to% Petal.Width)
) %>%
head()
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species avg
## <num> <num> <num> <num> <fctr> <num>
## 1: 5.1 3.5 1.4 0.2 setosa 2.550
## 2: 4.9 3.0 1.4 0.2 setosa 2.375
## 3: 4.7 3.2 1.3 0.2 setosa 2.350
## 4: 4.6 3.1 1.5 0.2 setosa 2.350
## 5: 5.0 3.6 1.4 0.2 setosa 2.550
## 6: 5.4 3.9 1.7 0.4 setosa 2.850
# multiassignment
iris %>%
let(
# starts with Sepal or Petal
multipled1 %to% multipled4 := cols("^(Sepal|Petal)")*2
) %>%
head()
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species multipled1
## <num> <num> <num> <num> <fctr> <num>
## 1: 5.1 3.5 1.4 0.2 setosa 10.2
## 2: 4.9 3.0 1.4 0.2 setosa 9.8
## 3: 4.7 3.2 1.3 0.2 setosa 9.4
## 4: 4.6 3.1 1.5 0.2 setosa 9.2
## 5: 5.0 3.6 1.4 0.2 setosa 10.0
## 6: 5.4 3.9 1.7 0.4 setosa 10.8
## multipled2 multipled3 multipled4
## <num> <num> <num>
## 1: 7.0 2.8 0.4
## 2: 6.0 2.8 0.4
## 3: 6.4 2.6 0.4
## 4: 6.2 3.0 0.4
## 5: 7.2 2.8 0.4
## 6: 7.8 3.4 0.8
mtcars %>%
let(
# text expansion
cols("scaled_{names(mtcars)}") := lapply(cols("{names(mtcars)}"), scale)
) %>%
head()
## mpg cyl disp hp drat wt qsec vs am gear carb scaled_mpg
## <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num> <num>
## 1: 21.0 6 160 110 3.90 2.620 16.46 0 1 4 4 0.1508848
## 2: 21.0 6 160 110 3.90 2.875 17.02 0 1 4 4 0.1508848
## 3: 22.8 4 108 93 3.85 2.320 18.61 1 1 4 1 0.4495434
## 4: 21.4 6 258 110 3.08 3.215 19.44 1 0 3 1 0.2172534
## 5: 18.7 8 360 175 3.15 3.440 17.02 0 0 3 2 -0.2307345
## 6: 18.1 6 225 105 2.76 3.460 20.22 1 0 3 1 -0.3302874
## scaled_cyl scaled_disp scaled_hp scaled_drat scaled_wt scaled_qsec
## <num> <num> <num> <num> <num> <num>
## 1: -0.1049878 -0.57061982 -0.5350928 0.5675137 -0.610399567 -0.7771651
## 2: -0.1049878 -0.57061982 -0.5350928 0.5675137 -0.349785269 -0.4637808
## 3: -1.2248578 -0.99018209 -0.7830405 0.4739996 -0.917004624 0.4260068
## 4: -0.1049878 0.22009369 -0.5350928 -0.9661175 -0.002299538 0.8904872
## 5: 1.0148821 1.04308123 0.4129422 -0.8351978 0.227654255 -0.4637808
## 6: -0.1049878 -0.04616698 -0.6080186 -1.5646078 0.248094592 1.3269868
## scaled_vs scaled_am scaled_gear scaled_carb
## <num> <num> <num> <num>
## 1: -0.8680278 1.1899014 0.4235542 0.7352031
## 2: -0.8680278 1.1899014 0.4235542 0.7352031
## 3: 1.1160357 1.1899014 0.4235542 -1.1221521
## 4: 1.1160357 -0.8141431 -0.9318192 -1.1221521
## 5: -0.8680278 -0.8141431 -0.9318192 -0.5030337
## 6: 1.1160357 -0.8141431 -0.9318192 -1.1221521
# range selection in 'by'
# selection of range + additional column
mtcars %>%
take(
res = sum(cols(mpg, disp %to% drat)),
by = vs %to% gear
)
## vs am gear res
## <num> <num> <num> <num>
## 1: 0 1 4 589.80
## 2: 1 1 4 1177.97
## 3: 1 0 3 985.64
## 4: 0 0 3 6839.45
## 5: 1 0 4 1125.35
## 6: 0 1 5 1874.61
## 7: 1 1 5 242.27
Here we use the same datasets as with lookups:
workers = fread("
name company
Nick Acme
John Ajax
Daniela Ajax
")
positions = fread("
name position
John designer
Daniela engineer
Cathie manager
")
workers
## name company
## <char> <char>
## 1: Nick Acme
## 2: John Ajax
## 3: Daniela Ajax
positions
## name position
## <char> <char>
## 1: John designer
## 2: Daniela engineer
## 3: Cathie manager
Different kinds of joins:
workers %>% dt_inner_join(positions)
## dt_inner_join: joining, by = "name"
## name company position
## <char> <char> <char>
## 1: John Ajax designer
## 2: Daniela Ajax engineer
workers %>% dt_left_join(positions)
## dt_left_join: joining, by = "name"
## name company position
## <char> <char> <char>
## 1: Nick Acme <NA>
## 2: John Ajax designer
## 3: Daniela Ajax engineer
workers %>% dt_right_join(positions)
## dt_right_join: joining, by = "name"
## name company position
## <char> <char> <char>
## 1: John Ajax designer
## 2: Daniela Ajax engineer
## 3: Cathie <NA> manager
workers %>% dt_full_join(positions)
## dt_full_join: joining, by = "name"
## name company position
## <char> <char> <char>
## 1: Nick Acme <NA>
## 2: John Ajax designer
## 3: Daniela Ajax engineer
## 4: Cathie <NA> manager
# filtering joins
workers %>% dt_anti_join(positions)
## dt_anti_join: joining, by = "name"
## name company
## <char> <char>
## 1: Nick Acme
workers %>% dt_semi_join(positions)
## dt_semi_join: joining, by = "name"
## name company
## <char> <char>
## 1: John Ajax
## 2: Daniela Ajax
To suppress the message, supply by
argument:
workers %>% dt_left_join(positions, by = "name")
Use a named by
if the join variables have different
names:
positions2 = setNames(positions, c("worker", "position")) # rename first column in 'positions'
workers %>% dt_inner_join(positions2, by = c("name" = "worker"))
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.