Eample 1: Basic usage

This vignette has referred to dplyr’s vignette in https://dplyr.tidyverse.org/articles/dplyr.html. We’ll try to reproduce all the results. First load the needed packages.

library(tidyfst)
#> 
#> Life's short, use R.
library(nycflights13)

data.table(flights)
#>         year month day dep_time sched_dep_time dep_delay arr_time
#>      1: 2013     1   1      517            515         2      830
#>      2: 2013     1   1      533            529         4      850
#>      3: 2013     1   1      542            540         2      923
#>      4: 2013     1   1      544            545        -1     1004
#>      5: 2013     1   1      554            600        -6      812
#>     ---                                                          
#> 336772: 2013     9  30       NA           1455        NA       NA
#> 336773: 2013     9  30       NA           2200        NA       NA
#> 336774: 2013     9  30       NA           1210        NA       NA
#> 336775: 2013     9  30       NA           1159        NA       NA
#> 336776: 2013     9  30       NA            840        NA       NA
#>         sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#>      1:            819        11      UA   1545  N14228    EWR  IAH      227
#>      2:            830        20      UA   1714  N24211    LGA  IAH      227
#>      3:            850        33      AA   1141  N619AA    JFK  MIA      160
#>      4:           1022       -18      B6    725  N804JB    JFK  BQN      183
#>      5:            837       -25      DL    461  N668DN    LGA  ATL      116
#>     ---                                                                     
#> 336772:           1634        NA      9E   3393    <NA>    JFK  DCA       NA
#> 336773:           2312        NA      9E   3525    <NA>    LGA  SYR       NA
#> 336774:           1330        NA      MQ   3461  N535MQ    LGA  BNA       NA
#> 336775:           1344        NA      MQ   3572  N511MQ    LGA  CLE       NA
#> 336776:           1020        NA      MQ   3531  N839MQ    LGA  RDU       NA
#>         distance hour minute           time_hour
#>      1:     1400    5     15 2013-01-01 05:00:00
#>      2:     1416    5     29 2013-01-01 05:00:00
#>      3:     1089    5     40 2013-01-01 05:00:00
#>      4:     1576    5     45 2013-01-01 05:00:00
#>      5:      762    6      0 2013-01-01 06:00:00
#>     ---                                         
#> 336772:      213   14     55 2013-09-30 14:00:00
#> 336773:      198   22      0 2013-09-30 22:00:00
#> 336774:      764   12     10 2013-09-30 12:00:00
#> 336775:      419   11     59 2013-09-30 11:00:00
#> 336776:      431    8     40 2013-09-30 08:00:00

Filter rows with filter_dt()

filter_dt(flights, month == 1, day == 1)
#>      year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#>   1: 2013     1   1      517            515         2      830            819
#>   2: 2013     1   1      533            529         4      850            830
#>   3: 2013     1   1      542            540         2      923            850
#>   4: 2013     1   1      544            545        -1     1004           1022
#>   5: 2013     1   1      554            600        -6      812            837
#>  ---                                                                         
#> 838: 2013     1   1     2356           2359        -3      425            437
#> 839: 2013     1   1       NA           1630        NA       NA           1815
#> 840: 2013     1   1       NA           1935        NA       NA           2240
#> 841: 2013     1   1       NA           1500        NA       NA           1825
#> 842: 2013     1   1       NA            600        NA       NA            901
#>      arr_delay carrier flight tailnum origin dest air_time distance hour minute
#>   1:        11      UA   1545  N14228    EWR  IAH      227     1400    5     15
#>   2:        20      UA   1714  N24211    LGA  IAH      227     1416    5     29
#>   3:        33      AA   1141  N619AA    JFK  MIA      160     1089    5     40
#>   4:       -18      B6    725  N804JB    JFK  BQN      183     1576    5     45
#>   5:       -25      DL    461  N668DN    LGA  ATL      116      762    6      0
#>  ---                                                                           
#> 838:       -12      B6    727  N588JB    JFK  BQN      186     1576   23     59
#> 839:        NA      EV   4308  N18120    EWR  RDU       NA      416   16     30
#> 840:        NA      AA    791  N3EHAA    LGA  DFW       NA     1389   19     35
#> 841:        NA      AA   1925  N3EVAA    LGA  MIA       NA     1096   15      0
#> 842:        NA      B6    125  N618JB    JFK  FLL       NA     1069    6      0
#>                time_hour
#>   1: 2013-01-01 05:00:00
#>   2: 2013-01-01 05:00:00
#>   3: 2013-01-01 05:00:00
#>   4: 2013-01-01 05:00:00
#>   5: 2013-01-01 06:00:00
#>  ---                    
#> 838: 2013-01-01 23:00:00
#> 839: 2013-01-01 16:00:00
#> 840: 2013-01-01 19:00:00
#> 841: 2013-01-01 15:00:00
#> 842: 2013-01-01 06:00:00

Arrange rows with arrange_dt()

arrange_dt(flights, year, month, day)
#>         year month day dep_time sched_dep_time dep_delay arr_time
#>      1: 2013     1   1      517            515         2      830
#>      2: 2013     1   1      533            529         4      850
#>      3: 2013     1   1      542            540         2      923
#>      4: 2013     1   1      544            545        -1     1004
#>      5: 2013     1   1      554            600        -6      812
#>     ---                                                          
#> 336772: 2013    12  31       NA            705        NA       NA
#> 336773: 2013    12  31       NA            825        NA       NA
#> 336774: 2013    12  31       NA           1615        NA       NA
#> 336775: 2013    12  31       NA            600        NA       NA
#> 336776: 2013    12  31       NA            830        NA       NA
#>         sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#>      1:            819        11      UA   1545  N14228    EWR  IAH      227
#>      2:            830        20      UA   1714  N24211    LGA  IAH      227
#>      3:            850        33      AA   1141  N619AA    JFK  MIA      160
#>      4:           1022       -18      B6    725  N804JB    JFK  BQN      183
#>      5:            837       -25      DL    461  N668DN    LGA  ATL      116
#>     ---                                                                     
#> 336772:            931        NA      UA   1729    <NA>    EWR  DEN       NA
#> 336773:           1029        NA      US   1831    <NA>    JFK  CLT       NA
#> 336774:           1800        NA      MQ   3301  N844MQ    LGA  RDU       NA
#> 336775:            735        NA      UA    219    <NA>    EWR  ORD       NA
#> 336776:           1154        NA      UA    443    <NA>    JFK  LAX       NA
#>         distance hour minute           time_hour
#>      1:     1400    5     15 2013-01-01 05:00:00
#>      2:     1416    5     29 2013-01-01 05:00:00
#>      3:     1089    5     40 2013-01-01 05:00:00
#>      4:     1576    5     45 2013-01-01 05:00:00
#>      5:      762    6      0 2013-01-01 06:00:00
#>     ---                                         
#> 336772:     1605    7      5 2013-12-31 07:00:00
#> 336773:      541    8     25 2013-12-31 08:00:00
#> 336774:      431   16     15 2013-12-31 16:00:00
#> 336775:      719    6      0 2013-12-31 06:00:00
#> 336776:     2475    8     30 2013-12-31 08:00:00

Use - (minus symbol) to order a column in descending order:

arrange_dt(flights, -arr_delay)
#>         year month day dep_time sched_dep_time dep_delay arr_time
#>      1: 2013     1   9      641            900      1301     1242
#>      2: 2013     6  15     1432           1935      1137     1607
#>      3: 2013     1  10     1121           1635      1126     1239
#>      4: 2013     9  20     1139           1845      1014     1457
#>      5: 2013     7  22      845           1600      1005     1044
#>     ---                                                          
#> 336772: 2013     9  30       NA           1455        NA       NA
#> 336773: 2013     9  30       NA           2200        NA       NA
#> 336774: 2013     9  30       NA           1210        NA       NA
#> 336775: 2013     9  30       NA           1159        NA       NA
#> 336776: 2013     9  30       NA            840        NA       NA
#>         sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#>      1:           1530      1272      HA     51  N384HA    JFK  HNL      640
#>      2:           2120      1127      MQ   3535  N504MQ    JFK  CMH       74
#>      3:           1810      1109      MQ   3695  N517MQ    EWR  ORD      111
#>      4:           2210      1007      AA    177  N338AA    JFK  SFO      354
#>      5:           1815       989      MQ   3075  N665MQ    JFK  CVG       96
#>     ---                                                                     
#> 336772:           1634        NA      9E   3393    <NA>    JFK  DCA       NA
#> 336773:           2312        NA      9E   3525    <NA>    LGA  SYR       NA
#> 336774:           1330        NA      MQ   3461  N535MQ    LGA  BNA       NA
#> 336775:           1344        NA      MQ   3572  N511MQ    LGA  CLE       NA
#> 336776:           1020        NA      MQ   3531  N839MQ    LGA  RDU       NA
#>         distance hour minute           time_hour
#>      1:     4983    9      0 2013-01-09 09:00:00
#>      2:      483   19     35 2013-06-15 19:00:00
#>      3:      719   16     35 2013-01-10 16:00:00
#>      4:     2586   18     45 2013-09-20 18:00:00
#>      5:      589   16      0 2013-07-22 16:00:00
#>     ---                                         
#> 336772:      213   14     55 2013-09-30 14:00:00
#> 336773:      198   22      0 2013-09-30 22:00:00
#> 336774:      764   12     10 2013-09-30 12:00:00
#> 336775:      419   11     59 2013-09-30 11:00:00
#> 336776:      431    8     40 2013-09-30 08:00:00

Select columns with select_dt()

select_dt(flights, year, month, day)
#>         year month day
#>      1: 2013     1   1
#>      2: 2013     1   1
#>      3: 2013     1   1
#>      4: 2013     1   1
#>      5: 2013     1   1
#>     ---               
#> 336772: 2013     9  30
#> 336773: 2013     9  30
#> 336774: 2013     9  30
#> 336775: 2013     9  30
#> 336776: 2013     9  30

select_dt(flights, year:day) and select_dt(flights, -(year:day)) are not supported. But I have added a feature to help select with regular expression, which means you can:

select_dt(flights, "^dep")
#>         dep_time dep_delay
#>      1:      517         2
#>      2:      533         4
#>      3:      542         2
#>      4:      544        -1
#>      5:      554        -6
#>     ---                   
#> 336772:       NA        NA
#> 336773:       NA        NA
#> 336774:       NA        NA
#> 336775:       NA        NA
#> 336776:       NA        NA

The rename process is almost the same as that in dplyr:

select_dt(flights, tail_num = tailnum)
#>         tail_num
#>      1:   N14228
#>      2:   N24211
#>      3:   N619AA
#>      4:   N804JB
#>      5:   N668DN
#>     ---         
#> 336772:     <NA>
#> 336773:     <NA>
#> 336774:   N535MQ
#> 336775:   N511MQ
#> 336776:   N839MQ
rename_dt(flights, tail_num = tailnum)
#>         year month day dep_time sched_dep_time dep_delay arr_time
#>      1: 2013     1   1      517            515         2      830
#>      2: 2013     1   1      533            529         4      850
#>      3: 2013     1   1      542            540         2      923
#>      4: 2013     1   1      544            545        -1     1004
#>      5: 2013     1   1      554            600        -6      812
#>     ---                                                          
#> 336772: 2013     9  30       NA           1455        NA       NA
#> 336773: 2013     9  30       NA           2200        NA       NA
#> 336774: 2013     9  30       NA           1210        NA       NA
#> 336775: 2013     9  30       NA           1159        NA       NA
#> 336776: 2013     9  30       NA            840        NA       NA
#>         sched_arr_time arr_delay carrier flight tail_num origin dest air_time
#>      1:            819        11      UA   1545   N14228    EWR  IAH      227
#>      2:            830        20      UA   1714   N24211    LGA  IAH      227
#>      3:            850        33      AA   1141   N619AA    JFK  MIA      160
#>      4:           1022       -18      B6    725   N804JB    JFK  BQN      183
#>      5:            837       -25      DL    461   N668DN    LGA  ATL      116
#>     ---                                                                      
#> 336772:           1634        NA      9E   3393     <NA>    JFK  DCA       NA
#> 336773:           2312        NA      9E   3525     <NA>    LGA  SYR       NA
#> 336774:           1330        NA      MQ   3461   N535MQ    LGA  BNA       NA
#> 336775:           1344        NA      MQ   3572   N511MQ    LGA  CLE       NA
#> 336776:           1020        NA      MQ   3531   N839MQ    LGA  RDU       NA
#>         distance hour minute           time_hour
#>      1:     1400    5     15 2013-01-01 05:00:00
#>      2:     1416    5     29 2013-01-01 05:00:00
#>      3:     1089    5     40 2013-01-01 05:00:00
#>      4:     1576    5     45 2013-01-01 05:00:00
#>      5:      762    6      0 2013-01-01 06:00:00
#>     ---                                         
#> 336772:      213   14     55 2013-09-30 14:00:00
#> 336773:      198   22      0 2013-09-30 22:00:00
#> 336774:      764   12     10 2013-09-30 12:00:00
#> 336775:      419   11     59 2013-09-30 11:00:00
#> 336776:      431    8     40 2013-09-30 08:00:00

Add new columns with mutate_dt()

mutate_dt(flights,
  gain = arr_delay - dep_delay,
  speed = distance / air_time * 60
)
#>         year month day dep_time sched_dep_time dep_delay arr_time
#>      1: 2013     1   1      517            515         2      830
#>      2: 2013     1   1      533            529         4      850
#>      3: 2013     1   1      542            540         2      923
#>      4: 2013     1   1      544            545        -1     1004
#>      5: 2013     1   1      554            600        -6      812
#>     ---                                                          
#> 336772: 2013     9  30       NA           1455        NA       NA
#> 336773: 2013     9  30       NA           2200        NA       NA
#> 336774: 2013     9  30       NA           1210        NA       NA
#> 336775: 2013     9  30       NA           1159        NA       NA
#> 336776: 2013     9  30       NA            840        NA       NA
#>         sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#>      1:            819        11      UA   1545  N14228    EWR  IAH      227
#>      2:            830        20      UA   1714  N24211    LGA  IAH      227
#>      3:            850        33      AA   1141  N619AA    JFK  MIA      160
#>      4:           1022       -18      B6    725  N804JB    JFK  BQN      183
#>      5:            837       -25      DL    461  N668DN    LGA  ATL      116
#>     ---                                                                     
#> 336772:           1634        NA      9E   3393    <NA>    JFK  DCA       NA
#> 336773:           2312        NA      9E   3525    <NA>    LGA  SYR       NA
#> 336774:           1330        NA      MQ   3461  N535MQ    LGA  BNA       NA
#> 336775:           1344        NA      MQ   3572  N511MQ    LGA  CLE       NA
#> 336776:           1020        NA      MQ   3531  N839MQ    LGA  RDU       NA
#>         distance hour minute           time_hour gain    speed
#>      1:     1400    5     15 2013-01-01 05:00:00    9 370.0441
#>      2:     1416    5     29 2013-01-01 05:00:00   16 374.2731
#>      3:     1089    5     40 2013-01-01 05:00:00   31 408.3750
#>      4:     1576    5     45 2013-01-01 05:00:00  -17 516.7213
#>      5:      762    6      0 2013-01-01 06:00:00  -19 394.1379
#>     ---                                                       
#> 336772:      213   14     55 2013-09-30 14:00:00   NA       NA
#> 336773:      198   22      0 2013-09-30 22:00:00   NA       NA
#> 336774:      764   12     10 2013-09-30 12:00:00   NA       NA
#> 336775:      419   11     59 2013-09-30 11:00:00   NA       NA
#> 336776:      431    8     40 2013-09-30 08:00:00   NA       NA

However, if you just create the column, please split them. The following codes would not work:

mutate_dt(flights,
  gain = arr_delay - dep_delay,
  gain_per_hour = gain / (air_time / 60)
)

Instead, use:

mutate_dt(flights,gain = arr_delay - dep_delay) %>%
  mutate_dt(gain_per_hour = gain / (air_time / 60))
#>         year month day dep_time sched_dep_time dep_delay arr_time
#>      1: 2013     1   1      517            515         2      830
#>      2: 2013     1   1      533            529         4      850
#>      3: 2013     1   1      542            540         2      923
#>      4: 2013     1   1      544            545        -1     1004
#>      5: 2013     1   1      554            600        -6      812
#>     ---                                                          
#> 336772: 2013     9  30       NA           1455        NA       NA
#> 336773: 2013     9  30       NA           2200        NA       NA
#> 336774: 2013     9  30       NA           1210        NA       NA
#> 336775: 2013     9  30       NA           1159        NA       NA
#> 336776: 2013     9  30       NA            840        NA       NA
#>         sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#>      1:            819        11      UA   1545  N14228    EWR  IAH      227
#>      2:            830        20      UA   1714  N24211    LGA  IAH      227
#>      3:            850        33      AA   1141  N619AA    JFK  MIA      160
#>      4:           1022       -18      B6    725  N804JB    JFK  BQN      183
#>      5:            837       -25      DL    461  N668DN    LGA  ATL      116
#>     ---                                                                     
#> 336772:           1634        NA      9E   3393    <NA>    JFK  DCA       NA
#> 336773:           2312        NA      9E   3525    <NA>    LGA  SYR       NA
#> 336774:           1330        NA      MQ   3461  N535MQ    LGA  BNA       NA
#> 336775:           1344        NA      MQ   3572  N511MQ    LGA  CLE       NA
#> 336776:           1020        NA      MQ   3531  N839MQ    LGA  RDU       NA
#>         distance hour minute           time_hour gain gain_per_hour
#>      1:     1400    5     15 2013-01-01 05:00:00    9      2.378855
#>      2:     1416    5     29 2013-01-01 05:00:00   16      4.229075
#>      3:     1089    5     40 2013-01-01 05:00:00   31     11.625000
#>      4:     1576    5     45 2013-01-01 05:00:00  -17     -5.573770
#>      5:      762    6      0 2013-01-01 06:00:00  -19     -9.827586
#>     ---                                                            
#> 336772:      213   14     55 2013-09-30 14:00:00   NA            NA
#> 336773:      198   22      0 2013-09-30 22:00:00   NA            NA
#> 336774:      764   12     10 2013-09-30 12:00:00   NA            NA
#> 336775:      419   11     59 2013-09-30 11:00:00   NA            NA
#> 336776:      431    8     40 2013-09-30 08:00:00   NA            NA

If you only want to keep the new variables, use transmute_dt():

transmute_dt(flights,
  gain = arr_delay - dep_delay
)
#>         gain
#>      1:    9
#>      2:   16
#>      3:   31
#>      4:  -17
#>      5:  -19
#>     ---     
#> 336772:   NA
#> 336773:   NA
#> 336774:   NA
#> 336775:   NA
#> 336776:   NA

Summarise values with summarise_dt()

summarise_dt(flights,
  delay = mean(dep_delay, na.rm = TRUE)
)
#>       delay
#> 1: 12.63907

Randomly sample rows with sample_n_dt() and sample_frac_dt()

sample_n_dt(flights, 10)
#>     year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#>  1: 2013     9   3     1154           1200        -6     1422           1435
#>  2: 2013    10  25     1959           1920        39     2158           2053
#>  3: 2013     9  23     1935           1903        32     2140           2109
#>  4: 2013    10  31      740            742        -2     1056           1052
#>  5: 2013     5  27     2004           1955         9     2224           2253
#>  6: 2013     6  10     1607           1415       112     1838           1619
#>  7: 2013     2  24     1837           1840        -3     2013           2020
#>  8: 2013     4   4     2240           2150        50      144             40
#>  9: 2013     4  17       NA           1815        NA       NA           1955
#> 10: 2013     4  24       NA           1505        NA       NA           1630
#>     arr_delay carrier flight tailnum origin dest air_time distance hour minute
#>  1:       -13      DL   1947  N683DA    LGA  ATL      109      762   12      0
#>  2:        65      9E   3555  N915XJ    LGA  BUF       50      292   19     20
#>  3:        31      EV   5795  N16981    EWR  CLT       80      529   19      3
#>  4:         4      UA   1455  N33284    EWR  LAX      341     2454    7     42
#>  5:       -29      9E   3450  N908XJ    JFK  JAX      102      828   19     55
#>  6:       139      B6   1275  N216JB    JFK  CHS      101      636   14     15
#>  7:        -7      MQ   3730  N8EGMQ    EWR  ORD      114      719   18     40
#>  8:        64      B6    515  N247JB    EWR  FLL      171     1065   21     50
#>  9:        NA      MQ   3730  N534MQ    EWR  ORD       NA      719   18     15
#> 10:        NA      MQ   3823  N534MQ    JFK  DCA       NA      213   15      5
#>               time_hour
#>  1: 2013-09-03 12:00:00
#>  2: 2013-10-25 19:00:00
#>  3: 2013-09-23 19:00:00
#>  4: 2013-10-31 07:00:00
#>  5: 2013-05-27 19:00:00
#>  6: 2013-06-10 14:00:00
#>  7: 2013-02-24 18:00:00
#>  8: 2013-04-04 21:00:00
#>  9: 2013-04-17 18:00:00
#> 10: 2013-04-24 15:00:00
sample_frac_dt(flights, 0.01)
#>       year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#>    1: 2013     1   9     1446           1446         0     1819           1757
#>    2: 2013     4  11     1024           1030        -6     1243           1248
#>    3: 2013     2  16     1710           1716        -6     1815           1826
#>    4: 2013    12   5     1757           1135       382     1908           1250
#>    5: 2013     9  26     1351           1355        -4     1607           1639
#>   ---                                                                         
#> 3363: 2013     8  22     2000           1835        85     2203           2010
#> 3364: 2013    11   3     1645           1650        -5     1904           1920
#> 3365: 2013    10   6      853            900        -7     1045           1034
#> 3366: 2013     5  27     1819           1816         3     1942           2005
#> 3367: 2013     2  20     2326           2044       162       19           2150
#>       arr_delay carrier flight tailnum origin dest air_time distance hour
#>    1:        22      UA    439  N446UA    LGA  IAH      247     1416   14
#>    2:        -5      DL   2343  N301DQ    EWR  ATL      106      746   10
#>    3:       -11      EV   4373  N13969    EWR  DCA       46      199   17
#>    4:       378      EV   4511  N29906    EWR  ROC       47      246   11
#>    5:       -32      UA    431  N845UA    EWR  DFW      178     1372   13
#>   ---                                                                    
#> 3363:       113      MQ   3674  N530MQ    LGA  CLE       62      419   18
#> 3364:       -16      WN    356  N965WN    LGA  DEN      235     1620   16
#> 3365:        11      UA    673  N808UA    LGA  ORD      108      733    9
#> 3366:       -23      UA   1651  N18220    EWR  CLE       65      404   18
#> 3367:       149      EV   4583  N11192    EWR  MHT       37      209   20
#>       minute           time_hour
#>    1:     46 2013-01-09 14:00:00
#>    2:     30 2013-04-11 10:00:00
#>    3:     16 2013-02-16 17:00:00
#>    4:     35 2013-12-05 11:00:00
#>    5:     55 2013-09-26 13:00:00
#>   ---                           
#> 3363:     35 2013-08-22 18:00:00
#> 3364:     50 2013-11-03 16:00:00
#> 3365:      0 2013-10-06 09:00:00
#> 3366:     16 2013-05-27 18:00:00
#> 3367:     44 2013-02-20 20:00:00

Grouped operations

For the below dplyr codes:

by_tailnum <- group_by(flights, tailnum)
delay <- summarise(by_tailnum,
  count = n(),
  dist = mean(distance, na.rm = TRUE),
  delay = mean(arr_delay, na.rm = TRUE))
delay <- filter(delay, count > 20, dist < 2000)

We could get it via:

flights %>% 
  summarise_dt( count = .N,
  dist = mean(distance, na.rm = TRUE),
  delay = mean(arr_delay, na.rm = TRUE),by = tailnum)
#>       tailnum count     dist      delay
#>    1:  N14228   111 1546.964   3.711712
#>    2:  N24211   130 1330.262   7.700000
#>    3:  N619AA    24 1339.208   7.652174
#>    4:  N804JB   219 1424.621  -1.860465
#>    5:  N668DN    49 1027.592   2.625000
#>   ---                                  
#> 4040:  N766SK     1  419.000 -24.000000
#> 4041:  N772SK     1  419.000  -8.000000
#> 4042:  N776SK     1  419.000 -18.000000
#> 4043:  N785SK     1  419.000 -16.000000
#> 4044:  N557AS     1 2402.000 -30.000000

summarise_dt (or summarize_dt) has a parameter “by”, you can specify the group. We could find the number of planes and the number of flights that go to each possible destination:

# the dplyr syntax:
# destinations <- group_by(flights, dest)
# summarise(destinations,
#   planes = n_distinct(tailnum),
#   flights = n()
# )

summarise_dt(flights,planes = uniqueN(tailnum),flights = .N,by = dest) %>% 
  arrange_dt(dest)
#>      dest planes flights
#>   1:  ABQ    108     254
#>   2:  ACK     58     265
#>   3:  ALB    172     439
#>   4:  ANC      6       8
#>   5:  ATL   1180   17215
#>  ---                    
#> 101:  TPA   1126    7466
#> 102:  TUL    105     315
#> 103:  TVC     60     101
#> 104:  TYS    273     631
#> 105:  XNA    176    1036

If you need to group by many variables, use:

# the dplyr syntax:
# daily <- group_by(flights, year, month, day)
# (per_day   <- summarise(daily, flights = n()))

flights %>% 
  summarise_dt(by = .(year,month,day),flights = .N)
#>      year month day flights
#>   1: 2013     1   1     842
#>   2: 2013     1   2     943
#>   3: 2013     1   3     914
#>   4: 2013     1   4     915
#>   5: 2013     1   5     720
#>  ---                       
#> 361: 2013     9  26     996
#> 362: 2013     9  27     996
#> 363: 2013     9  28     682
#> 364: 2013     9  29     914
#> 365: 2013     9  30     993

# (per_month <- summarise(per_day, flights = sum(flights)))
flights %>% 
  summarise_dt(by = .(year,month,day),flights = .N) %>% 
  summarise_dt(by = .(year,month),flights = sum(flights))
#>     year month flights
#>  1: 2013     1   27004
#>  2: 2013    10   28889
#>  3: 2013    11   27268
#>  4: 2013    12   28135
#>  5: 2013     2   24951
#>  6: 2013     3   28834
#>  7: 2013     4   28330
#>  8: 2013     5   28796
#>  9: 2013     6   28243
#> 10: 2013     7   29425
#> 11: 2013     8   29327
#> 12: 2013     9   27574

# (per_year  <- summarise(per_month, flights = sum(flights)))
flights %>% 
  summarise_dt(by = .(year,month,day),flights = .N) %>% 
  summarise_dt(by = .(year,month),flights = sum(flights)) %>% 
  summarise_dt(by = .(year),flights = sum(flights))
#>    year flights
#> 1: 2013  336776