summary

John Mount

2017-06-12

replyr_summary example.

replyr_summary works on various data sources, counts NA, and returns a data.frame (instead of text).


d <- data.frame(x=c(NA,'b'), y=c(1,NA), stringsAsFactors= FALSE)

summary(d)
 #        x                   y    
 #   Length:2           Min.   :1  
 #   Class :character   1st Qu.:1  
 #   Mode  :character   Median :1  
 #                      Mean   :1  
 #                      3rd Qu.:1  
 #                      Max.   :1  
 #                      NA's   :1

replyr::replyr_summary(d)
 #    column index     class nrows nna nunique min max mean sd lexmin lexmax
 #  1      x     1 character     2   1      NA  NA  NA   NA NA      b      b
 #  2      y     2   numeric     2   1      NA   1   1    1 NA   <NA>   <NA>

my_db <- dplyr::src_sqlite(":memory:", create = TRUE)
dbData <- dplyr::copy_to(my_db, d)

summary(dbData)
 #      Length Class          Mode
 #  src 2      src_dbi        list
 #  ops 2      op_base_remote list

replyr::replyr_summary(dbData)
 #    column index     class nrows nna nunique min max mean sd lexmin lexmax
 #  1      x     1 character    NA  NA      NA  NA  NA   NA NA      b      b
 #  2      y     2   numeric    NA  NA      NA   1   1    1 NA   <NA>   <NA>

# glimpse works more like str or head
dplyr::glimpse(dbData)
 #  Observations: 2
 #  Variables: 2
 #  $ x <chr> NA, "b"
 #  $ y <dbl> 1, NA
rm(list=ls())
gc()
 #            used (Mb) gc trigger  (Mb) max used  (Mb)
 #  Ncells  958986 51.3    1770749  94.6  1770749  94.6
 #  Vcells 9954453 76.0   26948296 205.6 25950476 198.0