## ----setup, include=FALSE-----------------------------------------------------
library(surveycore)

## ----nest-diagnostic----------------------------------------------------------
# NHANES: only two distinct PSU values, but 15 strata
# Each stratum has its own PSU 1 and PSU 2 → nest = TRUE
length(unique(nhanes_2017$sdmvpsu)) # 2
length(unique(nhanes_2017$sdmvstra)) # 15

## ----nhanes-------------------------------------------------------------------
# Subset to MEC exam participants (ridstatr == 2) before using wtmec2yr.
# The 550 interview-only participants have wtmec2yr = 0 and are not part
# of the exam sample.
nhanes_exam <- nhanes_2017[nhanes_2017$ridstatr == 2, ]

svy_nhanes <- as_survey(
  nhanes_exam,
  ids = sdmvpsu,
  strata = sdmvstra,
  weights = wtmec2yr,
  nest = TRUE # PSU IDs are locally unique within strata
)
svy_nhanes

## ----nhanes-interview---------------------------------------------------------
svy_nhanes_int <- as_survey(
  nhanes_2017,
  ids = sdmvpsu,
  strata = sdmvstra,
  weights = wtint2yr,
  nest = TRUE
)

## ----anes---------------------------------------------------------------------
# Pre-election analysis (party ID, ideology, candidate preference)
svy_anes_pre <- as_survey(
  anes_2024,
  ids = v240103c,
  strata = v240103d,
  weights = v240103a
)

# Post-election analysis (validated vote choice: v242066, v242067)
svy_anes_post <- as_survey(
  anes_2024,
  ids = v240103c,
  strata = v240103d,
  weights = v240103b
)

## ----gss----------------------------------------------------------------------
# Standard analysis weight
svy_gss <- as_survey(
  gss_2024,
  ids = vpsu,
  strata = vstrat,
  weights = wtssps
)

# Non-response adjusted weight (preferred when non-response bias is a concern)
svy_gss_nr <- as_survey(
  gss_2024,
  ids = vpsu,
  strata = vstrat,
  weights = wtssnrps
)

## ----npors--------------------------------------------------------------------
svy_npors <- as_survey(
  pew_npors_2025,
  strata = stratum,
  weights = weight
)

## ----acs----------------------------------------------------------------------
svy_acs <- as_survey_replicate(
  acs_pums_wy,
  weights = pwgtp,
  repweights = pwgtp1:pwgtp80,
  type = "successive-difference"
)
svy_acs

## ----pew-jewish---------------------------------------------------------------
svy_jewish <- as_survey_replicate(
  pew_jewish_2020,
  weights = extweight,
  repweights = extweight1:extweight100,
  type = "JK1"
)
svy_jewish

## ----nwtco, eval=requireNamespace("survival", quietly=TRUE)-------------------
nwtco <- survival::nwtco

# in.subcohort is stored as 0/1 — must be logical for as_survey_twophase()
nwtco$in.subcohort <- as.logical(nwtco$in.subcohort)

# Phase 1: all 4,028 enrolled patients (each patient is their own unit)
phase1 <- as_survey(nwtco, ids = seqno)

# Phase 2: subcohort, with Phase 2 sampling stratified by relapse status
svy_twophase <- as_survey_twophase(
  phase1,
  strata2 = rel, # Phase 2 strata: cases (rel=1) vs. non-cases (rel=0)
  subset = in.subcohort, # Logical column: TRUE = selected into Phase 2
  method = "full"
)
svy_twophase

## ----apisrs-------------------------------------------------------------------
set.seed(101)
N <- 400 # total schools in district
n <- 80 # schools sampled

school_survey <- data.frame(
  school_id = sample(seq_len(N), n),
  avg_score = round(rnorm(n, mean = 72, sd = 11), 1),
  pct_frpl = round(runif(n, 0.10, 0.85), 2), # % free/reduced price lunch
  enrollment = round(runif(n, 180, 850)),
  sw = N / n, # equal sampling weight = 400/80 = 5.0
  fpc = N # population size for FPC
)

svy_srs <- as_survey(
  school_survey,
  weights = sw, # each sampled school represents 5 schools in the population
  fpc = fpc # reduces SEs: we sampled 20% of the population
)
svy_srs

## ----nationscape--------------------------------------------------------------
svy_ns <- as_survey_nonprob(ns_wave1, weights = weight)
svy_ns

# Presidential approval rating (July 2019)
get_freqs(svy_ns, pres_approval)

