This vignette has referred to dplyr
’s vignette in https://dplyr.tidyverse.org/articles/dplyr.html. We’ll try to reproduce all the results. First load the needed packages.
library(tidyfst)
#>
#> Life's short, use R.
library(nycflights13)
data.table(flights)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour
#> 1: 1400 5 15 2013-01-01 05:00:00
#> 2: 1416 5 29 2013-01-01 05:00:00
#> 3: 1089 5 40 2013-01-01 05:00:00
#> 4: 1576 5 45 2013-01-01 05:00:00
#> 5: 762 6 0 2013-01-01 06:00:00
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00
#> 336773: 198 22 0 2013-09-30 22:00:00
#> 336774: 764 12 10 2013-09-30 12:00:00
#> 336775: 419 11 59 2013-09-30 11:00:00
#> 336776: 431 8 40 2013-09-30 08:00:00
filter_dt()
filter_dt(flights, month == 1, day == 1)
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> 1: 2013 1 1 517 515 2 830 819
#> 2: 2013 1 1 533 529 4 850 830
#> 3: 2013 1 1 542 540 2 923 850
#> 4: 2013 1 1 544 545 -1 1004 1022
#> 5: 2013 1 1 554 600 -6 812 837
#> ---
#> 838: 2013 1 1 2356 2359 -3 425 437
#> 839: 2013 1 1 NA 1630 NA NA 1815
#> 840: 2013 1 1 NA 1935 NA NA 2240
#> 841: 2013 1 1 NA 1500 NA NA 1825
#> 842: 2013 1 1 NA 600 NA NA 901
#> arr_delay carrier flight tailnum origin dest air_time distance hour minute
#> 1: 11 UA 1545 N14228 EWR IAH 227 1400 5 15
#> 2: 20 UA 1714 N24211 LGA IAH 227 1416 5 29
#> 3: 33 AA 1141 N619AA JFK MIA 160 1089 5 40
#> 4: -18 B6 725 N804JB JFK BQN 183 1576 5 45
#> 5: -25 DL 461 N668DN LGA ATL 116 762 6 0
#> ---
#> 838: -12 B6 727 N588JB JFK BQN 186 1576 23 59
#> 839: NA EV 4308 N18120 EWR RDU NA 416 16 30
#> 840: NA AA 791 N3EHAA LGA DFW NA 1389 19 35
#> 841: NA AA 1925 N3EVAA LGA MIA NA 1096 15 0
#> 842: NA B6 125 N618JB JFK FLL NA 1069 6 0
#> time_hour
#> 1: 2013-01-01 05:00:00
#> 2: 2013-01-01 05:00:00
#> 3: 2013-01-01 05:00:00
#> 4: 2013-01-01 05:00:00
#> 5: 2013-01-01 06:00:00
#> ---
#> 838: 2013-01-01 23:00:00
#> 839: 2013-01-01 16:00:00
#> 840: 2013-01-01 19:00:00
#> 841: 2013-01-01 15:00:00
#> 842: 2013-01-01 06:00:00
arrange_dt()
arrange_dt(flights, year, month, day)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 12 31 NA 705 NA NA
#> 336773: 2013 12 31 NA 825 NA NA
#> 336774: 2013 12 31 NA 1615 NA NA
#> 336775: 2013 12 31 NA 600 NA NA
#> 336776: 2013 12 31 NA 830 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 931 NA UA 1729 <NA> EWR DEN NA
#> 336773: 1029 NA US 1831 <NA> JFK CLT NA
#> 336774: 1800 NA MQ 3301 N844MQ LGA RDU NA
#> 336775: 735 NA UA 219 <NA> EWR ORD NA
#> 336776: 1154 NA UA 443 <NA> JFK LAX NA
#> distance hour minute time_hour
#> 1: 1400 5 15 2013-01-01 05:00:00
#> 2: 1416 5 29 2013-01-01 05:00:00
#> 3: 1089 5 40 2013-01-01 05:00:00
#> 4: 1576 5 45 2013-01-01 05:00:00
#> 5: 762 6 0 2013-01-01 06:00:00
#> ---
#> 336772: 1605 7 5 2013-12-31 07:00:00
#> 336773: 541 8 25 2013-12-31 08:00:00
#> 336774: 431 16 15 2013-12-31 16:00:00
#> 336775: 719 6 0 2013-12-31 06:00:00
#> 336776: 2475 8 30 2013-12-31 08:00:00
Use -
(minus symbol) to order a column in descending order:
arrange_dt(flights, -arr_delay)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 9 641 900 1301 1242
#> 2: 2013 6 15 1432 1935 1137 1607
#> 3: 2013 1 10 1121 1635 1126 1239
#> 4: 2013 9 20 1139 1845 1014 1457
#> 5: 2013 7 22 845 1600 1005 1044
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 1530 1272 HA 51 N384HA JFK HNL 640
#> 2: 2120 1127 MQ 3535 N504MQ JFK CMH 74
#> 3: 1810 1109 MQ 3695 N517MQ EWR ORD 111
#> 4: 2210 1007 AA 177 N338AA JFK SFO 354
#> 5: 1815 989 MQ 3075 N665MQ JFK CVG 96
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour
#> 1: 4983 9 0 2013-01-09 09:00:00
#> 2: 483 19 35 2013-06-15 19:00:00
#> 3: 719 16 35 2013-01-10 16:00:00
#> 4: 2586 18 45 2013-09-20 18:00:00
#> 5: 589 16 0 2013-07-22 16:00:00
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00
#> 336773: 198 22 0 2013-09-30 22:00:00
#> 336774: 764 12 10 2013-09-30 12:00:00
#> 336775: 419 11 59 2013-09-30 11:00:00
#> 336776: 431 8 40 2013-09-30 08:00:00
select_dt()
select_dt(flights, year, month, day)
#> year month day
#> 1: 2013 1 1
#> 2: 2013 1 1
#> 3: 2013 1 1
#> 4: 2013 1 1
#> 5: 2013 1 1
#> ---
#> 336772: 2013 9 30
#> 336773: 2013 9 30
#> 336774: 2013 9 30
#> 336775: 2013 9 30
#> 336776: 2013 9 30
select_dt(flights, year:day)
and select_dt(flights, -(year:day))
are not supported. But I have added a feature to help select with regular expression, which means you can:
select_dt(flights, "^dep")
#> dep_time dep_delay
#> 1: 517 2
#> 2: 533 4
#> 3: 542 2
#> 4: 544 -1
#> 5: 554 -6
#> ---
#> 336772: NA NA
#> 336773: NA NA
#> 336774: NA NA
#> 336775: NA NA
#> 336776: NA NA
The rename process is almost the same as that in dplyr
:
select_dt(flights, tail_num = tailnum)
#> tail_num
#> 1: N14228
#> 2: N24211
#> 3: N619AA
#> 4: N804JB
#> 5: N668DN
#> ---
#> 336772: <NA>
#> 336773: <NA>
#> 336774: N535MQ
#> 336775: N511MQ
#> 336776: N839MQ
rename_dt(flights, tail_num = tailnum)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tail_num origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour
#> 1: 1400 5 15 2013-01-01 05:00:00
#> 2: 1416 5 29 2013-01-01 05:00:00
#> 3: 1089 5 40 2013-01-01 05:00:00
#> 4: 1576 5 45 2013-01-01 05:00:00
#> 5: 762 6 0 2013-01-01 06:00:00
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00
#> 336773: 198 22 0 2013-09-30 22:00:00
#> 336774: 764 12 10 2013-09-30 12:00:00
#> 336775: 419 11 59 2013-09-30 11:00:00
#> 336776: 431 8 40 2013-09-30 08:00:00
mutate_dt()
mutate_dt(flights,
gain = arr_delay - dep_delay,
speed = distance / air_time * 60
)
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour gain speed
#> 1: 1400 5 15 2013-01-01 05:00:00 9 370.0441
#> 2: 1416 5 29 2013-01-01 05:00:00 16 374.2731
#> 3: 1089 5 40 2013-01-01 05:00:00 31 408.3750
#> 4: 1576 5 45 2013-01-01 05:00:00 -17 516.7213
#> 5: 762 6 0 2013-01-01 06:00:00 -19 394.1379
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00 NA NA
#> 336773: 198 22 0 2013-09-30 22:00:00 NA NA
#> 336774: 764 12 10 2013-09-30 12:00:00 NA NA
#> 336775: 419 11 59 2013-09-30 11:00:00 NA NA
#> 336776: 431 8 40 2013-09-30 08:00:00 NA NA
However, if you just create the column, please split them. The following codes would not work:
Instead, use:
mutate_dt(flights,gain = arr_delay - dep_delay) %>%
mutate_dt(gain_per_hour = gain / (air_time / 60))
#> year month day dep_time sched_dep_time dep_delay arr_time
#> 1: 2013 1 1 517 515 2 830
#> 2: 2013 1 1 533 529 4 850
#> 3: 2013 1 1 542 540 2 923
#> 4: 2013 1 1 544 545 -1 1004
#> 5: 2013 1 1 554 600 -6 812
#> ---
#> 336772: 2013 9 30 NA 1455 NA NA
#> 336773: 2013 9 30 NA 2200 NA NA
#> 336774: 2013 9 30 NA 1210 NA NA
#> 336775: 2013 9 30 NA 1159 NA NA
#> 336776: 2013 9 30 NA 840 NA NA
#> sched_arr_time arr_delay carrier flight tailnum origin dest air_time
#> 1: 819 11 UA 1545 N14228 EWR IAH 227
#> 2: 830 20 UA 1714 N24211 LGA IAH 227
#> 3: 850 33 AA 1141 N619AA JFK MIA 160
#> 4: 1022 -18 B6 725 N804JB JFK BQN 183
#> 5: 837 -25 DL 461 N668DN LGA ATL 116
#> ---
#> 336772: 1634 NA 9E 3393 <NA> JFK DCA NA
#> 336773: 2312 NA 9E 3525 <NA> LGA SYR NA
#> 336774: 1330 NA MQ 3461 N535MQ LGA BNA NA
#> 336775: 1344 NA MQ 3572 N511MQ LGA CLE NA
#> 336776: 1020 NA MQ 3531 N839MQ LGA RDU NA
#> distance hour minute time_hour gain gain_per_hour
#> 1: 1400 5 15 2013-01-01 05:00:00 9 2.378855
#> 2: 1416 5 29 2013-01-01 05:00:00 16 4.229075
#> 3: 1089 5 40 2013-01-01 05:00:00 31 11.625000
#> 4: 1576 5 45 2013-01-01 05:00:00 -17 -5.573770
#> 5: 762 6 0 2013-01-01 06:00:00 -19 -9.827586
#> ---
#> 336772: 213 14 55 2013-09-30 14:00:00 NA NA
#> 336773: 198 22 0 2013-09-30 22:00:00 NA NA
#> 336774: 764 12 10 2013-09-30 12:00:00 NA NA
#> 336775: 419 11 59 2013-09-30 11:00:00 NA NA
#> 336776: 431 8 40 2013-09-30 08:00:00 NA NA
If you only want to keep the new variables, use transmute_dt()
:
summarise_dt()
sample_n_dt()
and sample_frac_dt()
sample_n_dt(flights, 10)
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> 1: 2013 9 3 1154 1200 -6 1422 1435
#> 2: 2013 10 25 1959 1920 39 2158 2053
#> 3: 2013 9 23 1935 1903 32 2140 2109
#> 4: 2013 10 31 740 742 -2 1056 1052
#> 5: 2013 5 27 2004 1955 9 2224 2253
#> 6: 2013 6 10 1607 1415 112 1838 1619
#> 7: 2013 2 24 1837 1840 -3 2013 2020
#> 8: 2013 4 4 2240 2150 50 144 40
#> 9: 2013 4 17 NA 1815 NA NA 1955
#> 10: 2013 4 24 NA 1505 NA NA 1630
#> arr_delay carrier flight tailnum origin dest air_time distance hour minute
#> 1: -13 DL 1947 N683DA LGA ATL 109 762 12 0
#> 2: 65 9E 3555 N915XJ LGA BUF 50 292 19 20
#> 3: 31 EV 5795 N16981 EWR CLT 80 529 19 3
#> 4: 4 UA 1455 N33284 EWR LAX 341 2454 7 42
#> 5: -29 9E 3450 N908XJ JFK JAX 102 828 19 55
#> 6: 139 B6 1275 N216JB JFK CHS 101 636 14 15
#> 7: -7 MQ 3730 N8EGMQ EWR ORD 114 719 18 40
#> 8: 64 B6 515 N247JB EWR FLL 171 1065 21 50
#> 9: NA MQ 3730 N534MQ EWR ORD NA 719 18 15
#> 10: NA MQ 3823 N534MQ JFK DCA NA 213 15 5
#> time_hour
#> 1: 2013-09-03 12:00:00
#> 2: 2013-10-25 19:00:00
#> 3: 2013-09-23 19:00:00
#> 4: 2013-10-31 07:00:00
#> 5: 2013-05-27 19:00:00
#> 6: 2013-06-10 14:00:00
#> 7: 2013-02-24 18:00:00
#> 8: 2013-04-04 21:00:00
#> 9: 2013-04-17 18:00:00
#> 10: 2013-04-24 15:00:00
sample_frac_dt(flights, 0.01)
#> year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
#> 1: 2013 1 9 1446 1446 0 1819 1757
#> 2: 2013 4 11 1024 1030 -6 1243 1248
#> 3: 2013 2 16 1710 1716 -6 1815 1826
#> 4: 2013 12 5 1757 1135 382 1908 1250
#> 5: 2013 9 26 1351 1355 -4 1607 1639
#> ---
#> 3363: 2013 8 22 2000 1835 85 2203 2010
#> 3364: 2013 11 3 1645 1650 -5 1904 1920
#> 3365: 2013 10 6 853 900 -7 1045 1034
#> 3366: 2013 5 27 1819 1816 3 1942 2005
#> 3367: 2013 2 20 2326 2044 162 19 2150
#> arr_delay carrier flight tailnum origin dest air_time distance hour
#> 1: 22 UA 439 N446UA LGA IAH 247 1416 14
#> 2: -5 DL 2343 N301DQ EWR ATL 106 746 10
#> 3: -11 EV 4373 N13969 EWR DCA 46 199 17
#> 4: 378 EV 4511 N29906 EWR ROC 47 246 11
#> 5: -32 UA 431 N845UA EWR DFW 178 1372 13
#> ---
#> 3363: 113 MQ 3674 N530MQ LGA CLE 62 419 18
#> 3364: -16 WN 356 N965WN LGA DEN 235 1620 16
#> 3365: 11 UA 673 N808UA LGA ORD 108 733 9
#> 3366: -23 UA 1651 N18220 EWR CLE 65 404 18
#> 3367: 149 EV 4583 N11192 EWR MHT 37 209 20
#> minute time_hour
#> 1: 46 2013-01-09 14:00:00
#> 2: 30 2013-04-11 10:00:00
#> 3: 16 2013-02-16 17:00:00
#> 4: 35 2013-12-05 11:00:00
#> 5: 55 2013-09-26 13:00:00
#> ---
#> 3363: 35 2013-08-22 18:00:00
#> 3364: 50 2013-11-03 16:00:00
#> 3365: 0 2013-10-06 09:00:00
#> 3366: 16 2013-05-27 18:00:00
#> 3367: 44 2013-02-20 20:00:00
For the below dplyr
codes:
by_tailnum <- group_by(flights, tailnum)
delay <- summarise(by_tailnum,
count = n(),
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE))
delay <- filter(delay, count > 20, dist < 2000)
We could get it via:
flights %>%
summarise_dt( count = .N,
dist = mean(distance, na.rm = TRUE),
delay = mean(arr_delay, na.rm = TRUE),by = tailnum)
#> tailnum count dist delay
#> 1: N14228 111 1546.964 3.711712
#> 2: N24211 130 1330.262 7.700000
#> 3: N619AA 24 1339.208 7.652174
#> 4: N804JB 219 1424.621 -1.860465
#> 5: N668DN 49 1027.592 2.625000
#> ---
#> 4040: N766SK 1 419.000 -24.000000
#> 4041: N772SK 1 419.000 -8.000000
#> 4042: N776SK 1 419.000 -18.000000
#> 4043: N785SK 1 419.000 -16.000000
#> 4044: N557AS 1 2402.000 -30.000000
summarise_dt
(or summarize_dt
) has a parameter “by”, you can specify the group. We could find the number of planes and the number of flights that go to each possible destination:
# the dplyr syntax:
# destinations <- group_by(flights, dest)
# summarise(destinations,
# planes = n_distinct(tailnum),
# flights = n()
# )
summarise_dt(flights,planes = uniqueN(tailnum),flights = .N,by = dest) %>%
arrange_dt(dest)
#> dest planes flights
#> 1: ABQ 108 254
#> 2: ACK 58 265
#> 3: ALB 172 439
#> 4: ANC 6 8
#> 5: ATL 1180 17215
#> ---
#> 101: TPA 1126 7466
#> 102: TUL 105 315
#> 103: TVC 60 101
#> 104: TYS 273 631
#> 105: XNA 176 1036
If you need to group by many variables, use:
# the dplyr syntax:
# daily <- group_by(flights, year, month, day)
# (per_day <- summarise(daily, flights = n()))
flights %>%
summarise_dt(by = .(year,month,day),flights = .N)
#> year month day flights
#> 1: 2013 1 1 842
#> 2: 2013 1 2 943
#> 3: 2013 1 3 914
#> 4: 2013 1 4 915
#> 5: 2013 1 5 720
#> ---
#> 361: 2013 9 26 996
#> 362: 2013 9 27 996
#> 363: 2013 9 28 682
#> 364: 2013 9 29 914
#> 365: 2013 9 30 993
# (per_month <- summarise(per_day, flights = sum(flights)))
flights %>%
summarise_dt(by = .(year,month,day),flights = .N) %>%
summarise_dt(by = .(year,month),flights = sum(flights))
#> year month flights
#> 1: 2013 1 27004
#> 2: 2013 10 28889
#> 3: 2013 11 27268
#> 4: 2013 12 28135
#> 5: 2013 2 24951
#> 6: 2013 3 28834
#> 7: 2013 4 28330
#> 8: 2013 5 28796
#> 9: 2013 6 28243
#> 10: 2013 7 29425
#> 11: 2013 8 29327
#> 12: 2013 9 27574
# (per_year <- summarise(per_month, flights = sum(flights)))
flights %>%
summarise_dt(by = .(year,month,day),flights = .N) %>%
summarise_dt(by = .(year,month),flights = sum(flights)) %>%
summarise_dt(by = .(year),flights = sum(flights))
#> year flights
#> 1: 2013 336776