The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.

statuser

Miscellaneous R functions for papers, blogposts, & teaching by Uri Simonsohn.

Installation

# Install from GitHub with `groundhog` (for version control)
groundhog::groundhog.library("urisohn/statuser", date)   #date used for version control 

# Or install from GitHub with `devtools`
devtools::install_github("urisohn/statuser")

Overview

Functions I often use and are not (sufficiently?) available in existing packages.

Functions

📄 Uri’s Papers

twolines(): Two-Lines Test of U-Shapes

Implements the two-lines test for U-shaped (or inverted U-shaped) relationships introduced by Simonsohn (2018).

Reference: Simonsohn, Uri (2018) “Two lines: A valid alternative to the invalid testing of U-shaped relationships with quadratic regressions.” AMPPS, 538-555. https://doi.org/10.1177/2515245918805755

# Simple example with simulated data
set.seed(123)
x <- rnorm(100)
y <- -x^2 + rnorm(100)
data <- data.frame(x = x, y = y)
result <- twolines(y ~ x, data = data)

# With covariates
z <- rnorm(100)
y <- -x^2 + 0.5*z + rnorm(100)
data <- data.frame(x = x, y = y, z = z)
result <- twolines(y ~ x + z, data = data)

# Without data argument (variables evaluated from environment)
x <- rnorm(100)
y <- -x^2 + rnorm(100)
result <- twolines(y ~ x)

# Suppress Robin Hood details
result <- twolines(y ~ x, data = data, quiet = TRUE)

# Save plot to PNG
result <- twolines(y ~ x, data = data, pngfile = "twolines_plot.png")

📊 Graphing

plot_cdf(): CDF for multiple groups in one plot

y <- rnorm(100)
x <- rep(c("A", "B"), 50)
plot_cdf(y, x)  # Uses default colors (red4, dodgerblue for 2 groups)
plot_cdf(y, x, col = c("red", "blue"))  # Custom colors

plot_density(): Density for multiple groups in one plot

y <- rnorm(100)
x <- rep(c("A", "B"), 50)
plot_density(y, x)  # Uses default colors (red4, dodgerblue for 2 groups)
plot_density(y, x, col = c("red", "blue"))  # Custom colors
plot_density(y, x, show.means = FALSE)  # Hide mean segments

plot_freq(): Frequency distribution without binning, with value labels

x <- c(1, 1, 2, 2, 2, 5, 5)
plot_freq(x)

# Grouped frequency plot
df <- data.frame(value = c(1, 1, 2, 2, 2, 5, 5), group = c("A", "A", "A", "B", "B", "A", "B"))
plot_freq(value, by = group, data = df)

# Show percentages instead of frequencies
plot_freq(value, by = group, data = df, freq = FALSE)

# Customize legend and labels
plot_freq(value, by = group, data = df, legend.title = "Group", col.text = "black")

plot_gam(): Plot GAM model predictions with optional distribution plot

library(mgcv)
# Fit a GAM model
data(mtcars)
model <- gam(mpg ~ s(hp) + s(wt) + factor(cyl), data = mtcars)

# Basic plot
plot_gam(model, "hp")

# With distribution plot below (auto-selects plot_freq or plot_density)
plot_gam(model, "hp", plot2 = "auto")

# Always use frequency plot
plot_gam(model, "hp", plot2 = "freq")

# Always use density plot
plot_gam(model, "hp", plot2 = "density")

# Customize colors for main plot and bottom plot
plot_gam(model, "hp", plot2 = "auto", 
         col = "blue4", bg = adjustcolor('dodgerblue', .2),  # Main plot colors
         col2 = "steelblue", bg2 = "gray95")  # Bottom plot colors

# Hold other variables at different quantile
plot_gam(model, "hp", quantile.others = 25)

scatter.gam(): Scatter plots with GAM smooth lines

x <- rnorm(100)
y <- 2*x + rnorm(100)
scatter.gam(x, y)

text2(): Adds to text(): align=‘center’ , bg=‘yellow’

plot(1:10, 1:10, type = "n")
   text2(2, 8, "Left", align = "left", bg = "lightblue")
   text2(5, 8, "Center", align = "center", bg = "lightgreen")
   text2(8, 8, "Right",    align = "right", bg = "lightyellow")
   text2(5, 5, "Red Text", col = "red", bg = "white")

resize_images(): Saves any image (or all in folder) as PNG with set width.

# Resize a single image file
   resize_images("path/to/image.svg", width = 800)

# Resize all images in a folder to 800px width
   resize_images("path/to/images", width = 800)

# Resize images to different widths
   resize_images("path/to/images", width = c(800, 1200, 600))

📈 Statistical Analyses

table2(): Enhances base table(): (1) variable names are shown, (2) proportions are an option

df <- data.frame(
  group = c("A", "A", "B", "B", "A"),
  status = c("X", "Y", "X", "Y", "X")
)

# table() does not show var names, table2() does
table (df$group, df$status)
table2(df$group, df$status)

# can report proportinos (building in prop.table() )
table2(df$group, df$status, prop = "all")    # Overall proportions
table2(df$group, df$status, prop = "row")    # Row proportions
table2(df$group, df$status, prop = "column") # Column proportions

desc_var(): Comprehensive variable summary stats, optionally by grouping variable(s)

# Why use desc_var() instead of psych::describeBy()?
# - Returns a single dataframe (not a list) - easier to export, filter, merge
# - Shows mode statistics (most frequent values) - useful for discrete data
# - Counts missing values automatically
# - Columns are labeled for easy interpretation
# - Supports multiple grouping variables with formula syntax
# - Results are sorted by grouping variables

# With grouping - compare groups side-by-side
df <- data.frame(score = rnorm(100), condition = rep(c("Control", "Treatment"), 50))
desc_var(score, condition, data = df)

# Formula syntax (single grouping variable)
desc_var(score ~ condition, data = df)

# Multiple grouping variables - results sorted by all grouping variables
df2 <- data.frame(
  score = rnorm(200),
  x1 = rep(c("A", "B"), 100),
  x2 = rep(c("men", "women"), each = 100),
  x3 = sample(1:3, replace = TRUE, size = 200)
)
desc_var(score ~ x1 + x2 + x3, data = df2)

# Without grouping - get stats for full dataset
desc_var(score, data = df)

# Direct vectors (no data frame needed)
scores <- rnorm(100)
groups <- rep(c("A", "B"), 50)
desc_var(scores, groups)

# Custom decimal places for cleaner output
desc_var(score, condition, data = df, decimals = 2)

lm2(): Enhanced linear regression with robust SEs, standardized coefficients, and diagnostics

# Basic usage - uses HC3 robust standard errors by default
lm2(mpg ~ wt + hp, data = mtcars)

# Works without data argument if variables exist in environment
y <- rnorm(100)
x1 <- rnorm(100)
x2 <- rnorm(100)
lm2(y ~ x1 + x2)

# Output includes:
# - Robust and classical standard errors side-by-side
# - Standardized coefficients (effect.size)
# - Missing value counts per variable
# - Red flags when robust and classical SEs differ substantially

# Get estimatr's native output instead
lm2(mpg ~ wt + hp, data = mtcars, output = "estimatr")

# Use different robust SE type (HC0, HC1, HC2, HC3)
lm2(mpg ~ wt + hp, data = mtcars, se_type = "HC2")

t.test2(): Enhances base t.test: (1) console shows mean diff & var names, (2) output is dataframe, not list

# Data for example
    men <- rnorm(100, mean = 5, sd = 1)
    women <- rnorm(100, mean = 4.8, sd = 1)

# t.test() is harder to interpret, does not show difference of means(!) or indicate which mean is subtraced from which

    t.test(men, women) 
    t.test2(men, women)

# Formula syntax
    data <- data.frame(y = rnorm(100), group = rep(c("A", "B"), 50))
    t2 = t.test2(y ~ group, data = data)  # Columns: A, B, A-B, SE_A-B, conf.intL, conf.intH, level, t, df, p.value, method, se_A, se_B
    t2

# Formula syntax without data argument
    x1=rnorm(100)
    x2=rnorm(100)
    y <- c(x1, x2)
    condition <- rep(c('A', 'B'), c(length(x1), length(x2)))
    t.test2(y ~ condition)

✨ Formatting

format_pvalue(): Format p-values for clean display in figures and tables (e.g., p<.0001)

format_pvalue(0.05)
format_pvalue(0.0001, include_p = TRUE)

message2(): Print colored messages to console

message2("This is a red message", col = "red", font = 2)
message2("This is a cyan message", col = "cyan")

🗂️ Miscellaneous

list2(): Create lists with objects without having to name them

    x <- 1:5
    y <- letters[1:3]
    z <- matrix(1:4, nrow = 2)
    list2(x, y, z)

convert_to_sql(): Convert CSV to SQL

    convert_to_sql("data.csv", "data.sql")

clear(): Clear environment, console, and all graphics devices

# Create some objects
    x <- 1:10
    y <- rnorm(10)
    plot(x, y)

# Clear everything
    clear()

Dependencies

mgcv (for scatter.gam(), plot_gam(), and twolines())
rsvg (for resize_images())
magick (for resize_images())
sandwich (for twolines())
lmtest (for twolines())
estimatr (for lm2())

Author

Uri Simonsohn
Email: urisohn@gmail.com

License

GPL-3

Version

0.1.4

These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.