## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>",
                      fig.width = 7, fig.height = 4.5)

## ----setup--------------------------------------------------------------------
library(dataProfilerR)

## ----data---------------------------------------------------------------------
set.seed(1)
n <- 200
df <- data.frame(
  age        = round(rnorm(n, 40, 12)),
  income     = c(rlnorm(n - 1, log(50000), 0.4), 5e6),   # one extreme outlier
  signup     = as.Date("2025-01-01") + sample(0:600, n, replace = TRUE),
  plan       = sample(c("free", "pro", "enterprise"), n, replace = TRUE),
  region     = sample(c("NA", "EU", "APAC"), n, replace = TRUE),
  constant   = 1L,                                        # zero-variance column
  note       = replicate(n, paste(sample(letters, 12), collapse = "")),
  stringsAsFactors = FALSE
)
df$income[sample(n, 20)] <- NA          # inject missingness
df$plan[sample(n, 8)]    <- NA

## ----profile------------------------------------------------------------------
p <- profile_data(df, dataset_name = "customers")
p

## ----summary------------------------------------------------------------------
summary(p)

## ----structure----------------------------------------------------------------
p$metadata$column_types
p$diagnostics$quality$components
head(p$statistics$numeric[, c("column", "mean", "sd", "skewness")])

## ----missing-plot-------------------------------------------------------------
plot(p, which = "missing")

## ----dist-plot----------------------------------------------------------------
plot(p, which = "distribution", column = "income")

## ----corr-plot----------------------------------------------------------------
plot(p, which = "correlation")

## ----tuning-------------------------------------------------------------------
p2 <- profile_data(df, build_plots = FALSE, outlier_method = "robust",
                   cor_method = "spearman")
p2$diagnostics$outliers$per_column

## ----association--------------------------------------------------------------
p$statistics$association
plot(p, which = "association")

## ----dates--------------------------------------------------------------------
p$diagnostics$dates

## ----groups-------------------------------------------------------------------
pg <- profile_data(df, group_by = "plan")
head(pg$diagnostics$groups$numeric_by_group, 8)

## ----report, eval=FALSE-------------------------------------------------------
# report(p, "customers_report.html")

