The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
The ksformat package provides SAS PROC FORMAT-like functionality for R. This vignette walks through the most common use cases.
Create a format for gender codes (auto-stored in library as “sex”):
fnew(
"M" = "Male",
"F" = "Female",
.missing = "Unknown",
.other = "Other Gender",
name = "sex"
)
gender_codes <- c("M", "F", "M", NA, "X", "F")
formatted_genders <- fput(gender_codes, "sex")
data.frame(
code = gender_codes,
label = formatted_genders
)
#> code label
#> 1 M Male
#> 2 F Female
#> 3 M Male
#> 4 <NA> Unknown
#> 5 X Other Gender
#> 6 F Female
fprint("sex")
#> KS Format:sex
#> Type: character
#> Mappings:
#> M => Male
#> F => Female
#> .missing => Unknown
#> .other => Other GenderDefine formats in SAS-like text (auto-registered):
fparse(text = '
VALUE age (numeric)
[0, 18) = "Child"
[18, 65) = "Adult"
[65, HIGH] = "Senior"
.missing = "Age Unknown"
;
')
ages <- c(5, 15.3, 17.9, 18, 45, 64.99, 65, 85, NA)
age_groups <- fputn(ages, "age")
data.frame(
age = ages,
group = age_groups
)
#> age group
#> 1 5.00 Child
#> 2 15.30 Child
#> 3 17.90 Child
#> 4 18.00 Adult
#> 5 45.00 Adult
#> 6 64.99 Adult
#> 7 65.00 Senior
#> 8 85.00 Senior
#> 9 NA Age Unknownfparse(text = '
VALUE bmi (numeric)
[0, 18.5) = "Underweight"
[18.5, 25) = "Normal"
[25, 30) = "Overweight"
[30, HIGH] = "Obese"
.missing = "No data"
;
')
bmi_values <- c(16.2, 18.5, 22.7, 25, 29.9, 35.1, NA)
bmi_labels <- fputn(bmi_values, "bmi")
data.frame(
bmi = bmi_values,
category = bmi_labels
)
#> bmi category
#> 1 16.2 Underweight
#> 2 18.5 Normal
#> 3 22.7 Normal
#> 4 25.0 Overweight
#> 5 29.9 Overweight
#> 6 35.1 Obese
#> 7 NA No datafparse(text = '
VALUE score (numeric)
(0, 50] = "Low"
(50, 100] = "High"
.other = "Out of range"
;
')
scores <- c(0, 1, 50, 51, 100, 101)
score_labels <- fputn(scores, "score")
data.frame(
score = scores,
label = score_labels
)
#> score label
#> 1 0 Out of range
#> 2 1 Low
#> 3 50 Low
#> 4 51 High
#> 5 100 High
#> 6 101 Out of rangeInvalues convert labels back to values. The default
target_type is "numeric":
finput(
"Male" = 1,
"Female" = 2,
name = "sex_inv"
)
#> KS Invalue: sex_inv
#> Target Type: numeric
#> Mappings:
#> Male => 1
#> Female => 2
labels <- c("Male", "Female", "Male", "Unknown", "Female")
codes <- finputn(labels, "sex_inv")
data.frame(
label = labels,
code = codes
)
#> label code
#> 1 Male 1
#> 2 Female 2
#> 3 Male 1
#> 4 Unknown NA
#> 5 Female 2fnew_bid() creates both a format and an invalue at
once:
status_bi <- fnew_bid(
"A" = "Active",
"I" = "Inactive",
"P" = "Pending",
name = "status"
)
# Forward: code -> label
status_codes <- c("A", "I", "P", "A")
status_labels <- fputc(status_codes, "status")
data.frame(code = status_codes, label = status_labels)
#> code label
#> 1 A Active
#> 2 I Inactive
#> 3 P Pending
#> 4 A Active
# Reverse: label -> code
test_labels <- c("Active", "Pending", "Inactive")
test_codes <- finputc(test_labels, "status_inv")
data.frame(label = test_labels, code = test_codes)
#> label code
#> 1 Active A
#> 2 Pending P
#> 3 Inactive Ifparse(text = '
// Study format definitions
VALUE race (character)
"W" = "White"
"B" = "Black"
"A" = "Asian"
.missing = "Unknown"
;
INVALUE race_inv
"White" = 1
"Black" = 2
"Asian" = 3
;
')
flist() # character vector of names
#> [1] "age" "bmi" "race" "race_inv" "score"
#> [6] "sex" "sex_inv" "status" "status_inv"
fprint()
#> Registered formats:
#> age - VALUE (numeric), 3 mapping(s)
#> bmi - VALUE (numeric), 4 mapping(s)
#> race - VALUE (character), 3 mapping(s)
#> race_inv - INVALUE (numeric), 3 mapping(s)
#> score - VALUE (numeric), 2 mapping(s)
#> sex - VALUE (character), 2 mapping(s)
#> sex_inv - INVALUE (numeric), 2 mapping(s)
#> status - VALUE (character), 3 mapping(s)
#> status_inv - INVALUE (character), 3 mapping(s)df <- data.frame(
id = 1:6,
sex = c("M", "F", "M", "F", NA, "X"),
age = c(15, 25, 45, 70, 35, NA),
stringsAsFactors = FALSE
)
sex_f <- format_get("sex")
age_f <- format_get("age")
df_formatted <- fput_df(
df,
sex = sex_f,
age = age_f,
suffix = "_label"
)
df_formatted
#> id sex age sex_label age_label
#> 1 1 M 15 Male Child
#> 2 2 F 25 Female Adult
#> 3 3 M 45 Male Adult
#> 4 4 F 70 Female Senior
#> 5 5 <NA> 35 Unknown Adult
#> 6 6 X NA Other Gender Age Unknown# With .missing label
fput(c("M", "F", NA), "sex")
#> [1] "Male" "Female" "Unknown"
# With keep_na = TRUE
fput(c("M", "F", NA), sex_f, keep_na = TRUE)
#> [1] "Male" "Female" NA
# is_missing() checks
is_missing(NA)
#> [1] TRUE
is_missing(NaN)
#> [1] TRUE
is_missing("") # TRUE — empty strings are treated as missing
#> [1] TRUESAS date format names are auto-resolved — no pre-creation needed:
today <- Sys.Date()
data.frame(
format = c("DATE9.", "MMDDYY10.", "DDMMYY10.", "YYMMDD10.",
"MONYY7.", "WORDDATE.", "YEAR4.", "QTR."),
result = c(
fputn(today, "DATE9."),
fputn(today, "MMDDYY10."),
fputn(today, "DDMMYY10."),
fputn(today, "YYMMDD10."),
fputn(today, "MONYY7."),
fputn(today, "WORDDATE."),
fputn(today, "YEAR4."),
fputn(today, "QTR.")
)
)
#> format result
#> 1 DATE9. 21MAY2026
#> 2 MMDDYY10. 05/21/2026
#> 3 DDMMYY10. 21/05/2026
#> 4 YYMMDD10. 2026-05-21
#> 5 MONYY7. MAY2026
#> 6 WORDDATE. May 21, 2026
#> 7 YEAR4. 2026
#> 8 QTR. 2
# Multiple dates
dates <- as.Date(c("2020-01-15", "2020-06-30", "2020-12-25"))
fputn(dates, "DATE9.")
#> [1] "15JAN2020" "30JUN2020" "25DEC2020"Time is represented as seconds since midnight:
seconds <- c(0, 3600, 45000, 86399)
data.frame(
seconds = seconds,
TIME8 = fputn(seconds, "TIME8."),
TIME5 = fputn(seconds, "TIME5."),
HHMM = fputn(seconds, "HHMM.")
)
#> seconds TIME8 TIME5 HHMM
#> 1 0 0:00:00 0:00 00:00
#> 2 3600 1:00:00 1:00 01:00
#> 3 45000 12:30:00 12:30 12:30
#> 4 86399 23:59:59 23:59 23:59now <- Sys.time()
data.frame(
format = c("DATETIME20.", "DATETIME13.", "DTDATE.", "DTYYMMDD."),
result = c(
fputn(now, "DATETIME20."),
fputn(now, "DATETIME13."),
fputn(now, "DTDATE."),
fputn(now, "DTYYMMDD.")
)
)
#> format result
#> 1 DATETIME20. 21MAY2026:10:59:14
#> 2 DATETIME13. 21MAY26:10:59
#> 3 DTDATE. 21MAY2026
#> 4 DTYYMMDD. 2026-05-21
# From numeric R-epoch seconds
r_secs <- as.numeric(as.POSIXct("2025-06-15 14:30:00", tz = "UTC"))
fputn(r_secs, "DATETIME20.")
#> [1] "15JUN2025:14:30:00"fnew_date()# SAS-named format
fnew_date("DATE9.", name = "bday_fmt")
#> KS Format:bday_fmt
#> Type: date
#> Pattern: %d%b%Y (DATE9.)
birthdays <- as.Date(c("1990-03-25", "1985-11-03", "2000-07-14"))
fput(birthdays, "bday_fmt")
#> [1] "25MAR1990" "03NOV1985" "14JUL2000"
# Custom strftime pattern (e.g. DD.MM.YYYY)
fnew_date("%d.%m.%Y", name = "ru_date", type = "date")
#> KS Format:ru_date
#> Type: date
#> Pattern: %d.%m.%Y
fput(birthdays, "ru_date")
#> [1] "25.03.1990" "03.11.1985" "14.07.2000"
# Custom pattern with missing label
fnew_date("MMDDYY10.", name = "us_date", .missing = "NO DATE")
#> KS Format:us_date
#> Type: date
#> Pattern: %m/%d/%Y (MMDDYY10.)
#> .missing => NO DATE
mixed <- c(as.Date("2025-01-01"), NA, as.Date("2025-12-31"))
fput(mixed, "us_date")
#> [1] "01/01/2025" "NO DATE" "12/31/2025"
fprint("bday_fmt")
#> KS Format:bday_fmt
#> Type: date
#> Pattern: %d%b%Y (DATE9.)patients <- data.frame(
id = 1:4,
visit_date = as.Date(c("2025-01-10", "2025-02-15", "2025-03-20", NA)),
stringsAsFactors = FALSE
)
visit_fmt <- fnew_date("DATE9.", name = "visit_fmt", .missing = "NOT RECORDED")
fput_df(patients, visit_date = visit_fmt)
#> id visit_date visit_date_fmt
#> 1 1 2025-01-10 10JAN2025
#> 2 2 2025-02-15 15FEB2025
#> 3 3 2025-03-20 20MAR2025
#> 4 4 <NA> NOT RECORDEDfparse(text = '
VALUE enrldt (date)
pattern = "DATE9."
.missing = "Not Enrolled"
;
VALUE visit_time (time)
pattern = "TIME8."
;
VALUE stamp (datetime)
pattern = "DATETIME20."
;
')
fput(as.Date("2025-03-01"), "enrldt")
#> [1] "01MAR2025"
fput(36000, "visit_time")
#> [1] "10:00:00"
fput(as.POSIXct("2025-03-01 10:00:00", tz = "UTC"), "stamp")
#> [1] "01MAR2025:10:00:00"
# Export back to text
enrl_obj <- format_get("enrldt")
cat(fexport(enrldt = enrl_obj))
#> VALUE enrldt (date)
#> pattern = "DATE9."
#> .missing = "Not Enrolled"
#> ;
fclear()
#> All formats cleared from library.With multilabel formats, a single value can match multiple labels:
fnew(
"0,5,TRUE,TRUE" = "Infant",
"6,11,TRUE,TRUE" = "Child",
"12,17,TRUE,TRUE" = "Adolescent",
"0,17,TRUE,TRUE" = "Pediatric",
"18,64,TRUE,TRUE" = "Adult",
"65,Inf,TRUE,TRUE" = "Elderly",
"18,Inf,TRUE,TRUE" = "Non-Pediatric",
name = "age_categories",
type = "numeric",
multilabel = TRUE
)
ages <- c(3, 14, 25, 70)
# fput returns first match only
fput(ages, "age_categories")
#> [1] "Infant" "Pediatric" "Adult" "Non-Pediatric"
# fput_all returns ALL matching labels
all_labels <- fput_all(ages, "age_categories")
for (i in seq_along(ages)) {
cat("Age", ages[i], "->", paste(all_labels[[i]], collapse = ", "), "\n")
}
#> Age 3 -> Infant, Pediatric
#> Age 14 -> Pediatric, Adolescent
#> Age 25 -> Adult, Non-Pediatric
#> Age 70 -> Non-Pediatric, Elderlyfnew(
"0,100,TRUE,TRUE" = "Valid Score",
"0,49,TRUE,TRUE" = "Below Average",
"50,100,TRUE,TRUE" = "Above Average",
"90,100,TRUE,TRUE" = "Excellent",
.missing = "No Score",
.other = "Out of Range",
name = "score_ml",
type = "numeric",
multilabel = TRUE
)
scores <- c(95, 45, NA, 150)
ml_result <- fput_all(scores, "score_ml")
for (i in seq_along(scores)) {
cat("Score", ifelse(is.na(scores[i]), "NA", scores[i]),
"->", paste(ml_result[[i]], collapse = ", "), "\n")
}
#> Score 95 -> Valid Score, Above Average, Excellent
#> Score 45 -> Below Average, Valid Score
#> Score NA -> No Score
#> Score 150 -> Out of Rangefparse(text = '
VALUE risk (numeric, multilabel)
[0, 3] = "Low Risk"
[0, 7] = "Monitored"
(3, 7] = "Medium Risk"
(7, 10] = "High Risk"
;
')
risk_scores <- c(2, 5, 9)
risk_labels <- fput_all(risk_scores, "risk")
for (i in seq_along(risk_scores)) {
cat("Score", risk_scores[i], "->",
paste(risk_labels[[i]], collapse = " | "), "\n")
}
#> Score 2 -> Low Risk | Monitored
#> Score 5 -> Monitored | Medium Risk
#> Score 9 -> High Riskrisk_obj <- format_get("risk")
cat(fexport(risk = risk_obj))
#> VALUE risk (numeric, multilabel)
#> [0, 3] = "Low Risk"
#> [0, 7] = "Monitored"
#> (3, 7] = "Medium Risk"
#> (7, 10] = "High Risk"
#> ;
fprint("risk")
#> KS Format:risk (multilabel)
#> Type: numeric
#> Mappings:
#> [0, 3] => Low Risk
#> [0, 7] => Monitored
#> (3, 7] => Medium Risk
#> (7, 10] => High Riskfnew(
"1,1,TRUE,TRUE" = "Mild",
"2,2,TRUE,TRUE" = "Moderate",
"3,3,TRUE,TRUE" = "Severe",
"4,4,TRUE,TRUE" = "Life-threatening",
"5,5,TRUE,TRUE" = "Fatal",
"3,5,TRUE,TRUE" = "Serious",
"1,2,TRUE,TRUE" = "Non-serious",
name = "ae_grade",
type = "numeric",
multilabel = TRUE
)
grades <- c(1, 2, 3, 4, 5)
ae_labels <- fput_all(grades, "ae_grade")
for (i in seq_along(grades)) {
cat("Grade", grades[i], ":",
paste(ae_labels[[i]], collapse = " + "), "\n")
}
#> Grade 1 : Mild + Non-serious
#> Grade 2 : Non-serious + Moderate
#> Grade 3 : Severe + Serious
#> Grade 4 : Serious + Life-threatening
#> Grade 5 : Serious + Fatal
fclear()
#> All formats cleared from library.sex_nc <- fnew(
"M" = "Male",
"F" = "Female",
.missing = "Unknown",
name = "sex_nc",
type = "character",
ignore_case = TRUE
)
input <- c("m", "F", "M", "f", NA)
fput(input, sex_nc)
#> [1] "Male" "Female" "Male" "Female" "Unknown"
# Note the [nocase] flag
fprint("sex_nc")
#> KS Format:sex_nc (nocase)
#> Type: character
#> Mappings:
#> M => Male
#> F => Female
#> .missing => Unknown
# Also works with fputc
fputc("m", "sex_nc")
#> [1] "Male"
fclear()
#> All formats cleared from library.Expression labels contain .x1, .x2, etc.,
which reference extra arguments passed to fput(). This lets
you compute labels dynamically.
sprintf Expression.x1, .x2)ifelse Expressionmixed_fmt <- fnew(
"header" = "HEADER",
"n" = "sprintf('N=%s', .x1)",
"pct" = "sprintf('%.1f%%', .x1 * 100)",
name = "mixed",
type = "character"
)
keys <- c("header", "n", "pct", "header", "n")
vals <- c(0, 42, 0.15, 0, 100)
fput(keys, mixed_fmt, vals)
#> [1] "HEADER" "N=42" "15.0%" "HEADER" "N=100".other FallbackA realistic clinical-trial example: e() marks labels as
expressions evaluated at apply-time, .x1 references the
extra argument, and multiline dplyr::case_when shows
complex conditional formatting.
# Population counts used as denominators
n.trt <- data.frame(pop = c("fas","pps","saf"), ntot = c(34, 30, 36))
get_n <- function(pop) {
n.trt$ntot[n.trt$pop == pop]
}
fnew(
"n_fas" = e("get_n('fas')"),
"n_pps" = e("get_n('pps')"),
"n_saf" = e("get_n('saf')"),
"n" = "sprintf('%d', .x1)",
"n_pct_fas" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('fas'))",
"n_pct_pps" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('pps'))",
"n_pct_saf" = "sprintf('%d (%5.1f%%)', .x1, .x1 * 100 / get_n('saf'))",
"pct" = "dplyr::case_when(
.x1>0 & .x1<0.1 ~ sprintf('%5s', ' <0.1%'),
.x1>=0.1 | .x1==0 ~ sprintf(paste0('%5.', 1 ,'f%%'), .x1)
)",
"pval" = "dplyr::case_when(
.x1>=0 & .x1<0.001 ~ sprintf('%s', '<0.001'),
.x1>=0.001 & .x1<=0.999 ~ sprintf(paste0('%.', 3 ,'f'), .x1),
.x1>0.999 ~ sprintf('%s', '>0.999'), .default = '--'
)",
name = "stat",
type = "character"
)The same format can be created via fparse(). Note that
multiline expressions must be collapsed to single lines in the text
block, and (eval) marks evaluated labels:
fmt <- '
VALUE stat_01 (character)
"n_fas" = "get_n(\'fas\')" (eval)
"n_pps" = "get_n(\'pps\')" (eval)
"n_saf" = "get_n(\'saf\')" (eval)
"n" = "sprintf(\'%d\', .x1)"
"pct" = "dplyr::case_when(.x1>0 & .x1<0.1 ~ sprintf(\'%5s\', \' <0.1%\'), .x1>=0.1 | .x1==0 ~ sprintf(paste0(\'%5.\', 1 ,\'f%%\'), .x1))"
"n_pct_fas" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'fas\'))"
"n_pct_pps" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'pps\'))"
"n_pct_saf" = "sprintf(\'%d (%5.1f%%)\', .x1, .x1 * 100 / get_n(\'saf\'))"
"pval" = "dplyr::case_when(.x1>=0 & .x1<0.001 ~ sprintf(\'%s\', \'<0.001\'), .x1>=0.001 & .x1<=0.999 ~ sprintf(paste0(\'%.\', 3 ,\'f\'), .x1), .x1>0.999 ~ sprintf(\'%s\', \'>0.999\'), .default = \'--\')"
;'
fparse(fmt)Both stat (via fnew) and
stat_01 (via fparse) produce identical
results:
df <- data.frame(
types = c("n_fas", "n_pps", "n_saf", "n", "pct", "pct", "n", "pval", "pval",
"n_pct_fas", "n_pct_pps", "n_pct_saf"),
values = c(NA, NA, NA, 42, 0.053, 0.0008, 100, 0.255, 0.0003, 22, 22, 22)
)
df$fmt <- fput(df$types, "stat", df$values)
df$fmt_01 <- fput(df$types, "stat_01", df$values)
print(df)
#> types values fmt fmt_01
#> 1 n_fas NA 34 34
#> 2 n_pps NA 30 30
#> 3 n_saf NA 36 36
#> 4 n 42.0000 42 42
#> 5 pct 0.0530 <0.1% <0.1%
#> 6 pct 0.0008 <0.1% <0.1%
#> 7 n 100.0000 100 100
#> 8 pval 0.2550 0.255 0.255
#> 9 pval 0.0003 <0.001 <0.001
#> 10 n_pct_fas 22.0000 22 ( 64.7%) 22 ( 64.7%)
#> 11 n_pct_pps 22.0000 22 ( 73.3%) 22 ( 73.3%)
#> 12 n_pct_saf 22.0000 22 ( 61.1%) 22 ( 61.1%)
fclear()
#> All formats cleared from library.Each element can use a different format, determined by a vector of format names:
# Dispatch format: maps type code to format name
fnew("1" = "groupx", "2" = "groupy", "3" = "groupz",
name = "typefmt", type = "numeric")
# Per-group character formats
fnew("positive" = "agree", "negative" = "disagree", "neutral" = "notsure",
name = "groupx", type = "character")
fnew("positive" = "accept", "negative" = "reject", "neutral" = "possible",
name = "groupy", type = "character")
fnew("positive" = "pass", "negative" = "fail", "neutral" = "retest",
name = "groupz", type = "character")
type <- c(1, 1, 1, 2, 2, 2, 3, 3, 3)
response <- c("positive", "negative", "neutral",
"positive", "negative", "neutral",
"positive", "negative", "neutral")
# Step 1: map type -> format name
respfmt <- fput(type, "typefmt")
# Step 2: apply per-element format
word <- fputc(response, respfmt)
data.frame(type = type, response = response, respfmt = respfmt, word = word)
#> type response respfmt word
#> 1 1 positive groupx agree
#> 2 1 negative groupx disagree
#> 3 1 neutral groupx notsure
#> 4 2 positive groupy accept
#> 5 2 negative groupy reject
#> 6 2 neutral groupy possible
#> 7 3 positive groupz pass
#> 8 3 negative groupz fail
#> 9 3 neutral groupz retest
fclear()
#> All formats cleared from library.A SAS-style workflow where format names are looked up dynamically per observation:
# Format that maps key codes to date format names
fnew("1" = "date9.", "2" = "mmddyy10.",
name = "writfmt", type = "numeric")
fnew_date("date9.")
#> KS Format:DATE9.
#> Type: date
#> Pattern: %d%b%Y (DATE9.)
fnew_date("mmddyy10.")
#> KS Format:MMDDYY10.
#> Type: date
#> Pattern: %m/%d/%Y (MMDDYY10.)
# Input data (R date numbers = days since 1970-01-01)
number <- c(12103, 10899)
key <- c(1, 2)
# Look up format name per observation
datefmt <- fputn(key, "writfmt")
# Apply per-element date format
date <- fputn(number, datefmt)
data.frame(number = number, key = key, datefmt = datefmt, date = date)
#> number key datefmt date
#> 1 12103 1 date9. 20FEB2003
#> 2 10899 2 mmddyy10. 11/04/1999
fclear()
#> All formats cleared from library.The fimport() function reads a CSV file exported from a
SAS format catalogue (PROC FORMAT ... CNTLOUT=):
imported <- fimport(csv_path)
#> Warning: Skipping PICTURE format: "PICFMT"
#> ℹ TYPE="P" is not supported by ksformat.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.A' (HLO='S') has no R equivalent.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.B' (HLO='S') has no R equivalent.
#> ✔ Imported 4 formats and 1 invalue from
#> '/private/var/folders/rn/3s0h46m118j426j_fmjr1z8m0000gn/T/RtmpcG4IyO/Rinst1310a1404be79/ksformat/extdata/test_cntlout.csv'.
names(imported)
#> [1] "AGEGRP" "BMICAT" "GENDER" "RACEIN" "SMISSING"
flist()
#> [1] "AGEGRP" "BMICAT" "GENDER" "RACEIN" "SMISSING"
fprint()
#> Registered formats:
#> AGEGRP - VALUE (numeric), 3 mapping(s)
#> BMICAT - VALUE (numeric), 4 mapping(s)
#> GENDER - VALUE (character), 2 mapping(s)
#> RACEIN - INVALUE (numeric), 3 mapping(s)
#> SMISSING - VALUE (numeric), 1 mapping(s)# Character format (GENDER)
gender_codes <- c("M", "F", NA, "X")
data.frame(
code = gender_codes,
label = fputc(gender_codes, "GENDER")
)
#> code label
#> 1 M Male
#> 2 F Female
#> 3 <NA> Unknown
#> 4 X X
# Numeric format (AGEGRP)
ages <- c(5, 17, 18, 45, 65, 100, NA, -1)
data.frame(
age = ages,
group = fputn(ages, "AGEGRP")
)
#> age group
#> 1 5 Child
#> 2 17 Child
#> 3 18 Adult
#> 4 45 Adult
#> 5 65 Senior
#> 6 100 Senior
#> 7 NA Missing Age
#> 8 -1 Other
# Numeric format (BMICAT)
bmi_values <- c(15.0, 18.5, 22.3, 25.0, 28.7, 30.0, 35.5)
data.frame(
bmi = bmi_values,
category = fputn(bmi_values, "BMICAT")
)
#> bmi category
#> 1 15.0 Underweight
#> 2 18.5 Normal
#> 3 22.3 Normal
#> 4 25.0 Overweight
#> 5 28.7 Overweight
#> 6 30.0 Obese
#> 7 35.5 Obese
# Invalue (RACEIN)
race_labels <- c("White", "Black", "Asian", "Other")
data.frame(
label = race_labels,
code = finputn(race_labels, "RACEIN")
)
#> label code
#> 1 White 1
#> 2 Black 2
#> 3 Asian 3
#> 4 Other NAdf <- data.frame(
id = 1:5,
sex = c("M", "F", "M", NA, "F"),
age = c(10, 30, 70, NA, 50),
stringsAsFactors = FALSE
)
gender_fmt <- imported[["GENDER"]]
age_fmt <- imported[["AGEGRP"]]
fput_df(df, sex = gender_fmt, age = age_fmt, suffix = "_label")
#> id sex age sex_label age_label
#> 1 1 M 10 Male Child
#> 2 2 F 30 Female Adult
#> 3 3 M 70 Male Senior
#> 4 4 <NA> NA Unknown Missing Age
#> 5 5 F 50 Female Adultcat(fexport(AGEGRP = age_fmt))
#> VALUE AGEGRP (numeric)
#> [0, 17] = "Child"
#> [18, 64] = "Adult"
#> [65, HIGH] = "Senior"
#> .missing = "Missing Age"
#> .other = "Other"
#> ;
cat(fexport(GENDER = gender_fmt))
#> VALUE GENDER (character)
#> "M" = "Male"
#> "F" = "Female"
#> .missing = "Unknown"
#> ;fclear()
#> All formats cleared from library.
manual <- fimport(csv_path, register = FALSE)
#> Warning: Skipping PICTURE format: "PICFMT"
#> ℹ TYPE="P" is not supported by ksformat.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.A' (HLO='S') has no R equivalent.
#> Warning: Skipped incompatible entry in format "SMISSING":
#> ✖ SAS special missing value '.B' (HLO='S') has no R equivalent.
#> ✔ Imported 4 formats and 1 invalue from
#> '/private/var/folders/rn/3s0h46m118j426j_fmjr1z8m0000gn/T/RtmpcG4IyO/Rinst1310a1404be79/ksformat/extdata/test_cntlout.csv'.
# Library should be empty
flist()
#> character(0)
fprint()
#> Format library is empty
# Use directly from returned list
fput(c("M", "F"), manual[["GENDER"]])
#> [1] "Male" "Female"
fclear()
#> All formats cleared from library.Expression labels can select between languages at apply-time using an extra argument:
# Single format, language selected via .x1 extra argument
sex_bi <- fnew(
"M" = "ifelse(.x1 == 'en', 'Male', 'Homme')",
"F" = "ifelse(.x1 == 'en', 'Female', 'Femme')",
.missing = "Unknown",
name = "sex_bi"
)
# .x1 = language code per observation
fput(c("M", "F", "M"), sex_bi, c("en", "fr", "en"))
#> [1] "Male" "Femme" "Male"
# -> "Male" "Femme" "Male"
# Alternative: one format per language, selected at apply-time
fnew("M" = "Male", "F" = "Female", .missing = "Unknown", name = "sex_en")
fnew("M" = "Homme", "F" = "Femme", .missing = "Inconnu", name = "sex_fr")
lang <- "fr"
fput(c("M", "F", NA), paste0("sex_", lang))
#> [1] "Homme" "Femme" "Inconnu"
# -> "Homme" "Femme" "Inconnu"
fclear()
#> All formats cleared from library.fputk()fputk() pastes multiple vectors into a composite key
before format lookup. This is useful when a format is keyed on the
combination of several columns, a common pattern in clinical data (e.g.,
looking up a visit date by subject + visit number).
# Simulate a Subject Visits (SV) domain
SV <- data.frame(
USUBJID = c("SUBJ-001", "SUBJ-001", "SUBJ-001", "SUBJ-002", "SUBJ-002"),
VISITNUM = c(1, 2, 3, 1, 2),
SVSTDTC = c("2025-01-15", "2025-02-20", "2025-03-10",
"2025-01-18", "2025-02-25"),
stringsAsFactors = FALSE
)
# Simulate a Questionnaires (QS) domain
QS <- data.frame(
USUBJID = c("SUBJ-001", "SUBJ-001", "SUBJ-002", "SUBJ-002", "SUBJ-002"),
VISITNUM = c(1, 2, 1, 2, 3),
QSTESTCD = c("SCORE1", "SCORE1", "SCORE1", "SCORE1", "SCORE1"),
QSSTRESN = c(85, 90, 72, 78, NA),
stringsAsFactors = FALSE
)
SV
#> USUBJID VISITNUM SVSTDTC
#> 1 SUBJ-001 1 2025-01-15
#> 2 SUBJ-001 2 2025-02-20
#> 3 SUBJ-001 3 2025-03-10
#> 4 SUBJ-002 1 2025-01-18
#> 5 SUBJ-002 2 2025-02-25
QS
#> USUBJID VISITNUM QSTESTCD QSSTRESN
#> 1 SUBJ-001 1 SCORE1 85
#> 2 SUBJ-001 2 SCORE1 90
#> 3 SUBJ-002 1 SCORE1 72
#> 4 SUBJ-002 2 SCORE1 78
#> 5 SUBJ-002 3 SCORE1 NARegister a format keyed on USUBJID|VISITNUM with values
being the visit start date (SVSTDTC) as character
strings:
# Create composite key -> date string mapping from SV
fnew(
fmap(paste(SV$USUBJID, SV$VISITNUM, sep = "|"), SV$SVSTDTC),
.other = "NOT FOUND",
name = "svdtc",
type = "character",
ignore_case = TRUE
)
fprint("svdtc")
#> KS Format:svdtc (nocase)
#> Type: character
#> Mappings:
#> SUBJ-001|1 => 2025-01-15
#> SUBJ-001|2 => 2025-02-20
#> SUBJ-001|3 => 2025-03-10
#> SUBJ-002|1 => 2025-01-18
#> SUBJ-002|2 => 2025-02-25
#> .other => NOT FOUNDNow look up visit dates in the QS domain using
fputk():
QS$SVSTDTC <- fputk(QS$USUBJID, QS$VISITNUM, format = "svdtc")
QS
#> USUBJID VISITNUM QSTESTCD QSSTRESN SVSTDTC
#> 1 SUBJ-001 1 SCORE1 85 2025-01-15
#> 2 SUBJ-001 2 SCORE1 90 2025-02-20
#> 3 SUBJ-002 1 SCORE1 72 2025-01-18
#> 4 SUBJ-002 2 SCORE1 78 2025-02-25
#> 5 SUBJ-002 3 SCORE1 NA NOT FOUND
class(QS$SVSTDTC) # character
#> [1] "character"
fclear()
#> All formats cleared from library.Using type = "Date", values are stored as native R
Date objects and fput()/fputk()
return them directly — no string conversion needed:
# Create composite key -> Date mapping from SV
fnew(
fmap(
paste(SV$USUBJID, SV$VISITNUM, sep = "|"),
as.Date(SV$SVSTDTC, format = "%Y-%m-%d")
),
.other = NA,
name = "svdtn",
type = "Date",
ignore_case = TRUE
)
fprint("svdtn")
#> KS Format:svdtn (nocase)
#> Type: Date
#> Mappings:
#> SUBJ-001|1 => 2025-01-15
#> SUBJ-001|2 => 2025-02-20
#> SUBJ-001|3 => 2025-03-10
#> SUBJ-002|1 => 2025-01-18
#> SUBJ-002|2 => 2025-02-25QS$SVSTDTC_DT <- fputk(QS$USUBJID, QS$VISITNUM, format = "svdtn")
QS
#> USUBJID VISITNUM QSTESTCD QSSTRESN SVSTDTC SVSTDTC_DT
#> 1 SUBJ-001 1 SCORE1 85 2025-01-15 2025-01-15
#> 2 SUBJ-001 2 SCORE1 90 2025-02-20 2025-02-20
#> 3 SUBJ-002 1 SCORE1 72 2025-01-18 2025-01-18
#> 4 SUBJ-002 2 SCORE1 78 2025-02-25 2025-02-25
#> 5 SUBJ-002 3 SCORE1 NA NOT FOUND <NA>
class(QS$SVSTDTC_DT) # Date
#> [1] "Date"
# Typed NA for unmatched keys (SUBJ-002 Visit 3 not in SV)
is.na(QS$SVSTDTC_DT[5])
#> [1] TRUE
# Date arithmetic works directly
QS$SVSTDTC_DT + 7 # add 7 days
#> [1] "2025-01-22" "2025-02-27" "2025-01-25" "2025-03-04" NA
fclear()
#> All formats cleared from library.fmap()When building formats from data (e.g., a data frame with 1000+ rows),
you need a named vector mapping keys to values. By default,
fnew() treats named vectors differently depending on the
output type:
Date,
POSIXct, logical): c(key = value)
— natural direction, no reversal.c(Label = "Code")
— R convention, names and values are swapped
internally.This inconsistency is confusing for data-driven formats. The
fmap() helper solves it: fmap(keys, values)
works identically for all types.
Suppose we have a demographics dataset and need two lookup formats from the same data — one returning Date objects, one returning character strings:
fmap(keys, values) pattern for both typesBoth formats use the identical calling style —
fmap(keys, values) where keys are input lookup values and
values are output objects:
# Date lookup
fnew(
fmap(keys, as.Date(dm$RFICDTC, format = "%Y-%m-%d")),
.other = NA,
type = "Date",
ignore_case = TRUE,
name = "icdtn"
)
# Character lookup — same fmap(keys, values) pattern!
fnew(
fmap(keys, dm$RFICDTC),
.other = "NOT FOUND",
type = "character",
ignore_case = TRUE,
name = "icdtc"
)
fprint("icdtn")
#> KS Format:icdtn (nocase)
#> Type: Date
#> Mappings:
#> SUBJ-001|001 => 2023-03-09
#> SUBJ-002|002 => 2024-08-13
#> SUBJ-003|003 => 2025-06-17
fprint("icdtc")
#> KS Format:icdtc (nocase)
#> Type: character
#> Mappings:
#> SUBJ-001|001 => 2023-03-09T08:45
#> SUBJ-002|002 => 2024-08-13T09:53
#> SUBJ-003|003 => 2025-06-17T09:03
#> .other => NOT FOUND# Both return the expected results
fputk("SUBJ-001", "001", format = "icdtn")
#> [1] "2023-03-09"
class(fputk("SUBJ-001", "001", format = "icdtn"))
#> [1] "Date"
fputk("SUBJ-001", "001", format = "icdtc")
#> [1] "2023-03-09T08:45"
class(fputk("SUBJ-001", "001", format = "icdtc"))
#> [1] "character"
fclear()
#> All formats cleared from library.No extra parameters needed — fmap() tells
fnew() to use the natural direction for all types.
The default auto-reversal preserves the standard R convention where
c(Label = "Code") maps Code -> Label. This
is natural for hand-written formats:
| Use case | Style | Reversal |
|---|---|---|
| Data-driven (any type) | fmap(keys, values) |
Suppressed |
| Hand-written (char/num) | c(Label = "Code") or
"Code" = "Label" |
Auto (default) |
Value types (Date, etc.) |
fmap(keys, values) or
setNames(values, keys) |
No reversal (default) |
fparse() and
fputk()Examples 20–21 built composite-key formats programmatically with
fnew() and fmap(). When the mapping is
small and known in advance (e.g., a study-specific
visit schedule), you can define the same lookup entirely in text with
fparse().
The simplest approach: store dates as character strings using a
regular character format.
fparse(text = '
VALUE svdtc (character, nocase)
"SUBJ-001|1" = "2025-01-15"
"SUBJ-001|2" = "2025-02-20"
"SUBJ-001|3" = "2025-03-10"
"SUBJ-002|1" = "2025-01-18"
"SUBJ-002|2" = "2025-02-25"
.other = "NOT FOUND"
;
')
fprint("svdtc")
#> KS Format:svdtc (nocase)
#> Type: character
#> Mappings:
#> SUBJ-001|1 => 2025-01-15
#> SUBJ-001|2 => 2025-02-20
#> SUBJ-001|3 => 2025-03-10
#> SUBJ-002|1 => 2025-01-18
#> SUBJ-002|2 => 2025-02-25
#> .other => NOT FOUNDApply with fputk() to look up visit dates from a
questionnaire domain:
QS <- data.frame(
USUBJID = c("SUBJ-001", "SUBJ-001", "SUBJ-002", "SUBJ-002", "SUBJ-002"),
VISITNUM = c(1, 2, 1, 2, 3),
QSSTRESN = c(85, 90, 72, 78, NA),
stringsAsFactors = FALSE
)
QS$SVSTDTC <- fputk(QS$USUBJID, QS$VISITNUM, format = "svdtc")
QS
#> USUBJID VISITNUM QSSTRESN SVSTDTC
#> 1 SUBJ-001 1 85 2025-01-15
#> 2 SUBJ-001 2 90 2025-02-20
#> 3 SUBJ-002 1 72 2025-01-18
#> 4 SUBJ-002 2 78 2025-02-25
#> 5 SUBJ-002 3 NA NOT FOUND
fclear()
#> All formats cleared from library.Use the Date value type with format: to
store dates as native R Date objects. The
format: parameter tells fparse() how to parse
the date strings in the text block:
fparse(text = '
VALUE svdtn (Date, format: %Y-%m-%d, nocase)
"SUBJ-001|1" = "2025-01-15"
"SUBJ-001|2" = "2025-02-20"
"SUBJ-001|3" = "2025-03-10"
"SUBJ-002|1" = "2025-01-18"
"SUBJ-002|2" = "2025-02-25"
;
')
fprint("svdtn")
#> KS Format:svdtn (nocase)
#> Type: Date
#> Mappings:
#> SUBJ-001|1 => 2025-01-15
#> SUBJ-001|2 => 2025-02-20
#> SUBJ-001|3 => 2025-03-10
#> SUBJ-002|1 => 2025-01-18
#> SUBJ-002|2 => 2025-02-25Now fputk() returns real Date objects —
arithmetic and comparison work directly:
QS$SVSTDTC_DT <- fputk(QS$USUBJID, QS$VISITNUM, format = "svdtn")
QS
#> USUBJID VISITNUM QSSTRESN SVSTDTC SVSTDTC_DT
#> 1 SUBJ-001 1 85 2025-01-15 2025-01-15
#> 2 SUBJ-001 2 90 2025-02-20 2025-02-20
#> 3 SUBJ-002 1 72 2025-01-18 2025-01-18
#> 4 SUBJ-002 2 78 2025-02-25 2025-02-25
#> 5 SUBJ-002 3 NA NOT FOUND <NA>
class(QS$SVSTDTC_DT) # Date
#> [1] "Date"
is.na(QS$SVSTDTC_DT[5]) # TRUE — no match for SUBJ-002 Visit 3
#> [1] TRUE
# Date arithmetic works directly
QS$SVSTDTC_DT + 7
#> [1] "2025-01-22" "2025-02-27" "2025-01-25" "2025-03-04" NAFormats created with fparse() can be exported back to
text with fexport() and re-parsed — useful for
version-controlled format definitions:
franges()franges() extracts all range-based mappings from a
format and returns them as a tidy data.frame — useful for
auditing, documentation, or downstream processing.
fparse(text = '
VALUE age (numeric)
[0, 18) = "Child"
[18, 65) = "Adult"
[65, HIGH] = "Senior"
.missing = "Unknown"
;
')
franges("age")
#> low high inc_low inc_high label
#> 1 0 18 TRUE FALSE Child
#> 2 18 65 TRUE FALSE Adult
#> 3 65 Inf TRUE TRUE SeniorYou can use the result like any data frame — filter, display, or feed into further calculations:
df <- franges("age")
# Which ranges have a finite upper bound?
df[is.finite(df$high), ]
#> low high inc_low inc_high label
#> 1 0 18 TRUE FALSE Child
#> 2 18 65 TRUE FALSE Adultfranges() silently excludes discrete entries
(.missing, .other, plain string keys) — only
range rows appear. It returns an empty data.frame with the
same columns when the format contains no ranges.
fmap_to_ranges()When a range format stores numeric codes as its
labels (e.g. visit windows coded as weeks),
fmap_to_ranges() turns a vector of those codes back into
the original [low, high] bounds — one row per input
value.
fparse(text = '
VALUE visit_ther (numeric)
[LOW, 1] = 0
[ 8, 22] = 2
[22, 36] = 4
[37, 50] = 6
[51, 63] = 8
[64, 78] = 10
[79, 91] = 12
;
')
coded_weeks <- c(0, 2, 4, 6, 8, 10, 12)
fmap_to_ranges(coded_weeks, "visit_ther")
#> low high inc_low inc_high
#> 1 -Inf 1 TRUE TRUE
#> 2 8 22 TRUE TRUE
#> 3 22 36 TRUE TRUE
#> 4 37 50 TRUE TRUE
#> 5 51 63 TRUE TRUE
#> 6 64 78 TRUE TRUE
#> 7 79 91 TRUE TRUEUnmatched values produce NA rows, making it safe to pass
arbitrary vectors:
date_range and datetime_range formats
bucket Date or POSIXct input into character
labels using ISO date/datetime interval bounds. They reuse the same
range-table engine as numeric ranges, so the findInterval()
fast path is active for sorted, disjoint buckets.
fnew(
"2023-01-01,2024-01-01,TRUE,FALSE" = "FY23",
"2024-01-01,2025-01-01,TRUE,FALSE" = "FY24",
"2025-01-01,2026-01-01,TRUE,FALSE" = "FY25",
type = "date_range",
name = "fiscal_year"
)
dates <- as.Date(c("2023-06-15", "2024-03-01", "2024-12-31",
"2025-07-04", "2022-01-01", NA))
data.frame(
date = dates,
fy = fput(dates, "fiscal_year")
)
#> date fy
#> 1 2023-06-15 FY23
#> 2 2024-03-01 FY24
#> 3 2024-12-31 FY24
#> 4 2025-07-04 FY25
#> 5 2022-01-01 2022-01-01
#> 6 <NA> <NA>fparse()fparse(text = '
VALUE quarter (date_range)
[2024-01-01, 2024-04-01) = "Q1-2024"
[2024-04-01, 2024-07-01) = "Q2-2024"
[2024-07-01, 2024-10-01) = "Q3-2024"
[2024-10-01, 2025-01-01) = "Q4-2024"
.other = "Outside 2024"
;
')
sample_dates <- as.Date(c("2024-02-14", "2024-05-20", "2024-08-08",
"2024-11-30", "2025-03-01"))
data.frame(
date = sample_dates,
quarter = fput(sample_dates, "quarter")
)
#> date quarter
#> 1 2024-02-14 Q1-2024
#> 2 2024-05-20 Q2-2024
#> 3 2024-08-08 Q3-2024
#> 4 2024-11-30 Q4-2024
#> 5 2025-03-01 Outside 2024LOW / HIGH open-ended boundsLOW and HIGH represent \(-\infty\) and \(+\infty\) — any date before or after a
cutpoint falls in the open arm.
fparse(text = '
VALUE era (date_range)
[LOW, 2000-01-01) = "Pre-2000"
[2000-01-01, 2010-01-01) = "2000s"
[2010-01-01, 2020-01-01) = "2010s"
[2020-01-01, HIGH] = "2020+"
;
')
event_dates <- as.Date(c("1985-07-04", "2005-12-25",
"2015-06-01", "2023-11-11"))
data.frame(
date = event_dates,
era = fput(event_dates, "era")
)
#> date era
#> 1 1985-07-04 Pre-2000
#> 2 2005-12-25 2000s
#> 3 2015-06-01 2010s
#> 4 2023-11-11 2020+Formats export with ISO date bounds and re-parse without loss:
multilabel and
fput_all()fparse(text = '
VALUE study_window (date_range, multilabel)
[2024-01-01, 2024-07-01) = "First Half"
[2024-04-01, 2024-10-01) = "Mid-Year"
[2024-07-01, 2025-01-01) = "Second Half"
;
')
checkup_dates <- as.Date(c("2024-02-15", "2024-05-20", "2024-09-01"))
all_windows <- fput_all(checkup_dates, "study_window")
for (i in seq_along(checkup_dates)) {
cat(format(checkup_dates[i]), "->",
paste(all_windows[[i]], collapse = " | "), "\n")
}
#> 2024-02-15 -> First Half
#> 2024-05-20 -> First Half | Mid-Year
#> 2024-09-01 -> Mid-Year | Second HalfWhen no explicit type is given, fparse() infers
date_range from ISO date bounds and
datetime_range when bounds include a time component:
fparse(text = '
VALUE auto_fy
[2024-01-01, 2025-01-01) = "2024"
;
VALUE auto_shift
[2024-01-15 08:00, 2024-01-15 16:00) = "Day shift"
;
')
cat("auto_fy type :", format_get("auto_fy")$type, "\n")
#> auto_fy type : date_range
cat("auto_shift type:", format_get("auto_shift")$type, "\n")
#> auto_shift type: datetime_rangedatetime_range works identically to
date_range but matches against POSIXct values. Bounds are
expressed as YYYY-MM-DD HH:MM[:SS] strings.
fparse(text = '
VALUE shift (datetime_range)
[2024-01-15 00:00, 2024-01-15 08:00) = "Night"
[2024-01-15 08:00, 2024-01-15 16:00) = "Day"
[2024-01-15 16:00, 2024-01-16 00:00) = "Evening"
;
')
timestamps <- as.POSIXct(
c("2024-01-15 03:22:00", "2024-01-15 11:45:00",
"2024-01-15 19:00:00"),
tz = "UTC"
)
data.frame(
ts = format(timestamps, tz = "UTC"),
shift = fput(timestamps, "shift")
)
#> ts shift
#> 1 2024-01-15 03:22:00 Night
#> 2 2024-01-15 11:45:00 Day
#> 3 2024-01-15 19:00:00 Eveningfputk()The stratified_range type combines a discrete stratum
(such as a study arm, subject id, or any composite key) with a numeric /
Date / POSIXct range. Each stratum has its own bucket boundaries, and
fputk() dispatches to the right bucket for each row.
fmap_strata()visits <- fmap_strata(
stratum = c("ARM_A", "ARM_A", "ARM_A", "ARM_B", "ARM_B"),
low = c(0, 7, 28, 0, 14),
high = c(7, 28, Inf, 14, Inf),
label = c("Baseline", "Wk1-3", "Wk4+", "Baseline", "Wk2+"),
inc_high = c(FALSE, FALSE, TRUE, FALSE, TRUE)
)
fnew(visits, type = "stratified_range",
".other|ARM_A" = "A_outside",
.other = "outside_window",
name = "vw")
df <- data.frame(
arm = c("ARM_A", "ARM_A", "ARM_B", "ARM_B", "ARM_C"),
day = c(3, 35, 5, 40, 10)
)
df$visit <- fputk(df$arm, df$day, format = "vw")
df
#> arm day visit
#> 1 ARM_A 3 Baseline
#> 2 ARM_A 35 Wk4+
#> 3 ARM_B 5 Baseline
#> 4 ARM_B 40 Wk2+
#> 5 ARM_C 10 outside_windowfparse()fparse(text = '
VALUE vw_text (stratified_range, range_subtype: numeric)
"ARM_A"|[0, 7) = "Baseline"
"ARM_A"|[7, 28) = "Wk1-3"
"ARM_A"|[28, HIGH]= "Wk4+"
"ARM_B"|[0, 14) = "Baseline"
"ARM_B"|[14, HIGH]= "Wk2+"
".other|ARM_A" = "A_outside"
.other = "outside_window"
;
')
fputk(df$arm, df$day, format = "vw_text")
#> [1] "Baseline" "Wk4+" "Baseline" "Wk2+"
#> [5] "outside_window"windows <- fmap_strata(
stratum = c("S001", "S001", "S002", "S002"),
low = as.Date(c("2024-01-01", "2024-01-15",
"2024-02-01", "2024-02-20")),
high = as.Date(c("2024-01-15", "2024-02-01",
"2024-02-20", "2024-03-10")),
label = c("Screen", "Treat", "Screen", "Treat")
)
fnew(windows, type = "stratified_range", range_subtype = "date",
.other = "off-window", name = "win")
subj <- c("S001", "S001", "S002", "S002", "S003")
visits <- as.Date(c("2024-01-05", "2024-01-20",
"2024-02-10", "2024-03-01", "2024-01-01"))
data.frame(
subj = subj,
date = visits,
phase = fputk(subj, visits, format = "win")
)
#> subj date phase
#> 1 S001 2024-01-05 Screen
#> 2 S001 2024-01-20 Treat
#> 3 S002 2024-02-10 Screen
#> 4 S002 2024-03-01 Treat
#> 5 S003 2024-01-01 off-windowfexport() / fparse()txt <- fexport(format_get("vw"))
cat(txt, "\n")
#> VALUE vw (stratified_range, range_subtype: numeric, strata_sep: |)
#> "ARM_A"|[0, 7) = "Baseline"
#> "ARM_A"|[7, 28) = "Wk1-3"
#> "ARM_A"|[28, HIGH] = "Wk4+"
#> ".other|ARM_A" = "A_outside"
#> "ARM_B"|[0, 14) = "Baseline"
#> "ARM_B"|[14, HIGH] = "Wk2+"
#> .other = "outside_window"
#> ;
fclear()
#> All formats cleared from library.
fparse(text = txt)
fputk(df$arm, df$day, format = "vw")
#> [1] "Baseline" "Wk4+" "Baseline" "Wk2+"
#> [5] "outside_window"fmap_ranges()For non-stratified numeric / Date ranges, fmap_ranges()
saves you from hand-crafting canonical keys.
na_as_string)When building a format from data using
fmap(paste(..., sep = "|"), values), base R’s
paste() converts any NA component to the
literal string "NA". The resulting
composite key is therefore "CAT|TEST|NA", not a missing
value.
By default, fputk() restores NA_character_
wherever any component is NA before the lookup — so the key
"CAT|TEST|NA" is never reached and the row falls through to
.other / .missing.
Setting na_as_string = TRUE keeps paste()’s
literal "NA", making the round-trip consistent.
A common ADaM task: derive PARAMCD from a combination of
LBCAT, LBSPEC, LBTESTCD, and
LBSTRESU, where some rows have LBSTRESU = NA
(dimensionless tests such as INR).
# Source lab mapping (as received from a specification)
lb_map <- data.frame(
LBCAT = c("BLOOD CHEMISTRY", "COAGULOGRAM", "COAGULATION PANEL", "COAGULOGRAM"),
LBSPEC = c("BLOOD", "BLOOD", "BLOOD", "BLOOD"),
LBTESTCD = c("ALB", "FIBRINO", "INR", "INR"),
LBSTRESU = c("g/L", "g/L", NA, NA),
PARAMCD = c("ALB", "FIBRINO", "INR", "INR"),
stringsAsFactors = FALSE
)
lb_map
#> LBCAT LBSPEC LBTESTCD LBSTRESU PARAMCD
#> 1 BLOOD CHEMISTRY BLOOD ALB g/L ALB
#> 2 COAGULOGRAM BLOOD FIBRINO g/L FIBRINO
#> 3 COAGULATION PANEL BLOOD INR <NA> INR
#> 4 COAGULOGRAM BLOOD INR <NA> INRBuild the format with fmap(paste(...), PARAMCD).
paste() converts NA in LBSTRESU
to "NA", so the stored keys for INR rows are
"COAGULATION PANEL|BLOOD|INR|NA" and
"COAGULOGRAM|BLOOD|INR|NA".
with(lb_map,
fmap(paste(LBCAT, LBSPEC, LBTESTCD, LBSTRESU, sep = "|"), PARAMCD)
) |>
fnew(ignore_case = TRUE, .other = NA,
type = "character", name = "lb_param")
fprint("lb_param")
#> KS Format:lb_param (nocase)
#> Type: character
#> Mappings:
#> BLOOD CHEMISTRY|BLOOD|ALB|g/L => ALB
#> COAGULOGRAM|BLOOD|FIBRINO|g/L => FIBRINO
#> COAGULATION PANEL|BLOOD|INR|NA => INR
#> COAGULOGRAM|BLOOD|INR|NA => INR
#> .other => NANow apply the format.
With the default na_as_string = FALSE, the INR rows get
NA (no match):
lb_map$PARAMCD_default <- with(lb_map,
fputk(LBCAT, LBSPEC, LBTESTCD, LBSTRESU, format = "lb_param")
)
lb_map[, c("LBTESTCD", "LBSTRESU", "PARAMCD", "PARAMCD_default")]
#> LBTESTCD LBSTRESU PARAMCD PARAMCD_default
#> 1 ALB g/L ALB ALB
#> 2 FIBRINO g/L FIBRINO FIBRINO
#> 3 INR <NA> INR <NA>
#> 4 INR <NA> INR <NA>With na_as_string = TRUE, paste() also
converts the lookup-side NA to "NA", so the
keys match:
lb_map$PARAMCD_back <- with(lb_map,
fputk(LBCAT, LBSPEC, LBTESTCD, LBSTRESU,
format = "lb_param", na_as_string = TRUE)
)
lb_map[, c("LBTESTCD", "LBSTRESU", "PARAMCD", "PARAMCD_back")]
#> LBTESTCD LBSTRESU PARAMCD PARAMCD_back
#> 1 ALB g/L ALB ALB
#> 2 FIBRINO g/L FIBRINO FIBRINO
#> 3 INR <NA> INR INR
#> 4 INR <NA> INR INRUse
na_as_string = TRUEwhenever the format was built with
fmap(paste(...), values)and any key column can containNA.
If the format keys were set by hand
(fnew("CAT|TEST|g/L" = "ALB", ...)), NA
components should still go through .missing — keep the
default na_as_string = FALSE.
finputk()finputk() is the invalue-side mirror of
fputk(): it pastes multiple columns into a composite label
and reverse-looks it up in a ks_invalue format. The same
na_as_string argument applies.
# Build an INVALUE from two-column composite labels
finput(
fmap(paste(c("BLOOD CHEMISTRY", "COAGULOGRAM", "COAGULATION PANEL"),
c("ALB", "FIBRINO", "INR"),
sep = "|"),
c(1L, 2L, 3L)),
target_type = "integer",
name = "lb_code_inv"
)
#> KS Invalue: lb_code_inv
#> Target Type: integer
#> Mappings:
#> BLOOD CHEMISTRY|ALB => 1
#> COAGULOGRAM|FIBRINO => 2
#> COAGULATION PANEL|INR => 3
# Reverse lookup: two separate columns → integer code
cat_vec <- c("BLOOD CHEMISTRY", "COAGULOGRAM", "COAGULATION PANEL", "OTHER")
test_vec <- c("ALB", "FIBRINO", "INR", "X")
finputk(cat_vec, test_vec, invalue_name = "lb_code_inv")
#> [1] 1 2 3 NA
# BLOOD CHEMISTRY|ALB → 1, COAGULOGRAM|FIBRINO → 2,
# COAGULATION PANEL|INR → 3, OTHER|X → NA (no match → missing_value)
fclear()
#> All formats cleared from library.na_as_string = TRUE)When the INVALUE was built from data containing NA
columns, use na_as_string = TRUE on both the build side
(fmap(paste(...), ...)) and the lookup side
(finputk(..., na_as_string = TRUE)).
# INVALUE where LBSTRESU can be NA (like INR)
finput(
fmap(
paste(lb_map$LBCAT, lb_map$LBTESTCD, lb_map$LBSTRESU, sep = "|"),
seq_len(nrow(lb_map))
),
target_type = "integer",
name = "lb_row_inv"
)
#> KS Invalue: lb_row_inv
#> Target Type: integer
#> Mappings:
#> BLOOD CHEMISTRY|ALB|g/L => 1
#> COAGULOGRAM|FIBRINO|g/L => 2
#> COAGULATION PANEL|INR|NA => 3
#> COAGULOGRAM|INR|NA => 4
# Reconstruct lb_map row indices — works even when LBSTRESU is NA
finputk(lb_map$LBCAT, lb_map$LBTESTCD, lb_map$LBSTRESU,
invalue_name = "lb_row_inv", na_as_string = TRUE)
#> [1] 1 2 3 4
fclear()
#> All formats cleared from library.The output type is always determined by the stored invalue’s
target_type (here integer). For character
output create the invalue with target_type = "character"
and finputk() returns a character vector.
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.