The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
library(dplyr)
library(explore)
<- use_data_titanic(count = FALSE)
data glimpse(data)
#> Rows: 2,201
#> Columns: 4
#> $ Class <chr> "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd"…
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male…
#> $ Age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"…
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "…
<- data %>% clean_var(Age, name = "age")
data glimpse(data)
#> Rows: 2,201
#> Columns: 4
#> $ Class <chr> "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd", "3rd"…
#> $ Sex <chr> "Male", "Male", "Male", "Male", "Male", "Male", "Male", "Male…
#> $ age <chr> "Child", "Child", "Child", "Child", "Child", "Child", "Child"…
#> $ Survived <chr> "No", "No", "No", "No", "No", "No", "No", "No", "No", "No", "…
<- use_data_beer()
data %>% describe(energy_kcal_100ml)
data #> variable = energy_kcal_100ml
#> type = double
#> na = 11 of 161 (6.8%)
#> unique = 34
#> min|max = 20 | 62
#> q05|q95 = 24 | 56.65
#> q25|q75 = 37 | 44
#> median = 42
#> mean = 39.89333
<- data %>% clean_var(energy_kcal_100ml, na = 42)
data %>% describe(energy_kcal_100ml)
data #> variable = energy_kcal_100ml
#> type = double
#> na = 0 of 161 (0%)
#> unique = 33
#> min|max = 20 | 62
#> q05|q95 = 24 | 55
#> q25|q75 = 38 | 44
#> median = 42
#> mean = 40.03727
<- create_data_person()
data %>% describe(age)
data #> variable = age
#> type = integer
#> na = 0 of 1 000 (0%)
#> unique = 80
#> min|max = 16 | 95
#> q05|q95 = 21 | 92
#> q25|q75 = 37 | 76
#> median = 55
#> mean = 55.845
<- data %>% clean_var(age, min_val = 20, max_val = 80)
data %>% describe(age)
data #> variable = age
#> type = integer
#> na = 0 of 1 000 (0%)
#> unique = 61
#> min|max = 20 | 80
#> q05|q95 = 21 | 80
#> q25|q75 = 37 | 76
#> median = 55
#> mean = 54.276
%>% describe(income)
data #> variable = income
#> type = double
#> na = 0 of 1 000 (0%)
#> unique = 228
#> min|max = 0 | 150
#> q05|q95 = 6 | 123.025
#> q25|q75 = 35 | 88.625
#> median = 62
#> mean = 61.5875
<- data %>% clean_var(income, rescale01 = TRUE)
data %>% describe(income)
data #> variable = income
#> type = double
#> na = 0 of 1 000 (0%)
#> unique = 228
#> min|max = 0 | 1
#> q05|q95 = 0.04 | 0.820167
#> q25|q75 = 0.233333 | 0.590833
#> median = 0.4
#> mean = 0.410583
1, "handset"] <- " android "
data[2, "handset"] <- "ANDROID"
data[%>% describe(handset)
data #> variable = handset
#> type = character
#> na = 0 of 1 000 (0%)
#> unique = 5
#> android = 1 (0.1%)
#> ANDROID = 1 (0.1%)
#> Android = 471 (47.1%)
#> Apple = 430 (43%)
#> Other = 97 (9.7%)
<- data %>% clean_var(handset, simplify_text = TRUE)
data %>% describe(handset)
data #> variable = handset
#> type = character
#> na = 0 of 1 000 (0%)
#> unique = 3
#> ANDROID = 473 (47.3%)
#> APPLE = 430 (43%)
#> OTHER = 97 (9.7%)
drop_var_no_variance()
Drop all variables with no
variancedrop_var_not_numeric()
Drop all not numeric
variablesdrop_var_low_variance()
Drop all variables with low
variancedrop_var_by_names()
Drop variables by namedrop_var_with_na()
Drop all variables with
NA-values<- use_data_beer()
data %>% describe_tbl()
data #> 161 observations with 11 variables
#> 19 observations containing missings (NA)
#> 5 variables containing missings (NA)
#> 1 variables with no variance
%>%
data drop_var_no_variance() %>%
describe_tbl()
#> 161 observations with 10 variables
#> 19 observations containing missings (NA)
#> 5 variables containing missings (NA)
#> 0 variables with no variance
%>%
data drop_var_with_na() %>%
describe_tbl()
#> 161 observations with 6 variables
#> 0 observations containing missings (NA)
#> 0 variables containing missings (NA)
#> 1 variables with no variance
drop_obs_with_na()
Drop all observations with
NA-values%>%
data drop_obs_with_na() %>%
describe_tbl()
#> 142 observations with 11 variables
#> 0 observations containing missings (NA)
#> 0 variables containing missings (NA)
#> 1 variables with no variance
drop_obs_if()
Drop all observations where expression is
true%>%
data count_pct(type)
#> # A tibble: 3 × 4
#> type n total pct
#> <chr> <int> <int> <dbl>
#> 1 Alkoholfrei 27 161 16.8
#> 2 Bock 8 161 4.97
#> 3 Rest 126 161 78.3
%>%
data drop_obs_if(type == "Alkoholfrei") %>%
count_pct(type)
#> # A tibble: 2 × 4
#> type n total pct
#> <chr> <int> <int> <dbl>
#> 1 Bock 8 134 5.97
#> 2 Rest 126 134 94.0
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.