The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
In this vignette, we demonstrate how to use the OSAT score (Yan et al. (2012)).
library(designit)
library(tidyverse)
if (!requireNamespace("OSAT")) {
print("This vignette can only be rendered if `OSAT` package is installed.")
knitr::knit_exit()
}
#> Loading required namespace: OSAT
Loading samples. We add two dummy columns to demonstrate how to choose batch columns of interest.
osat_data_path <- system.file("extdata", package = "OSAT")
samples <- read_tsv(file.path(osat_data_path, "samples.txt"),
col_types = cols(SampleType = col_factor(), Race = col_factor(), AgeGrp = col_factor())
) |>
mutate(dummy_var1 = rnorm(n()), dummy_var2 = str_c(SampleType, Race, sep = " "))
Here we use OSAT to optimize setup.
gs <- OSAT::setup.sample(samples, optimal = c("SampleType", "Race", "AgeGrp"))
gc <- OSAT::setup.container(OSAT::IlluminaBeadChip96Plate, 7, batch = "plates")
set.seed(1234)
bench::system_time(
g_setup <- OSAT::create.optimized.setup(sample = gs, container = gc, nSim = params$iterations)
)
#> Warning in OSAT::create.optimized.setup(sample = gs, container = gc, nSim =
#> params$iterations): Using default optimization method: optimal.shuffle
#> process real
#> 480ms 476ms
OSAT::QC(g_setup)
#>
#> Test independence between "plates" and sample variables
#>
#> Pearson's Chi-squared test
#> Var X-squared df p.value
#> 1 SampleType 0.1126719 6 0.9999714
#> 2 Race 0.3062402 6 0.9994663
#> 3 AgeGrp 1.8220606 24 1.0000000
#>
Saving starting point of optimization
set.seed(1234)
g_setup_start <- OSAT::create.optimized.setup(sample = gs, container = gc, nSim = 1) |>
OSAT::get.experiment.setup()
#> Warning in OSAT::create.optimized.setup(sample = gs, container = gc, nSim = 1):
#> Using default optimization method: optimal.shuffle
Visualize various batch factors. OSAT score is optimized only for
plates
in this case.
OSAT::get.experiment.setup(g_setup) |>
select(AgeGrp, plates, chipRows, chipColumns, chips, rows, columns, wells) |>
pivot_longer(-AgeGrp) |>
count(AgeGrp, value, name) |>
ggplot(aes(AgeGrp, n, fill = factor(value))) +
geom_col(position = "dodge") +
facet_wrap(~name, scales = "free_y")
plot_batch <- function(df) {
df |>
select(plates, SampleType, Race, AgeGrp) |>
pivot_longer(c(SampleType, Race, AgeGrp), names_to = "variable", values_to = "level") |>
count(plates, variable, level) |>
ggplot(aes(level, n, fill = factor(plates))) +
geom_col(position = "dodge") +
facet_wrap(~variable, scales = "free", ncol = 1)
}
Before the optimization.
After the optimization.
Compare OSAT score generated using designit.
OSAT::getLayout(gc) |>
left_join(OSAT::get.experiment.setup(g_setup)) |>
data.table::data.table() |>
osat_score("plates", c("SampleType", "Race", "AgeGrp")) |>
with(score)
#> Joining with `by = join_by(plates, chipRows, chipColumns, chips, rows, columns,
#> wells)`
#> Warning in osat_score(data.table::data.table(left_join(OSAT::getLayout(gc), :
#> NAs in features / batch columns; they will be excluded from scoring
#> [1] 26.85714
# score using OSAT
g_setup@metadata$optValue |> tail(1)
#> [1] 28.85714
First let’s create a BatchContainer with same dimensions.
bc <- BatchContainer$new(
dimensions = c(plates = 7, chips = 8, rows = 6, columns = 2)
)
bc
#> Batch container with 672 locations.
#> Dimensions: plates, chips, rows, columns
bc$n_locations
#> [1] 672
Assign samples and get initial setup.
bc <- assign_in_order(bc, samples)
starting_assignment <- bc$get_locations() |>
left_join(g_setup_start) |>
pull(ID) |>
as.integer()
#> Joining with `by = join_by(plates, chips, rows, columns)`
bc$move_samples(location_assignment = starting_assignment)
bc$get_samples(remove_empty_locations = TRUE) |>
plot_batch()
scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp"))
bc$score(scoring_f)
#> Warning in osat_score(bc, batch_vars = batch_vars, feature_vars = feature_vars,
#> : NAs in features / batch columns; they will be excluded from scoring
#> score_1
#> 360.8571
g_setup@metadata$optValue |> head(1)
#> [1] 360.8571
# should be identical
bench::system_time({
set.seed(123)
bc_reference <- optimize_design(bc, scoring = scoring_f, max_iter = params$iterations)
})
#> Checking variances of 1-dim. score vector.
#> ... (6749.04) - OK
#> Initial score: 360.857
#> Achieved score: 348.857 at iteration 7
#> Achieved score: 344.857 at iteration 8
#> Achieved score: 338.857 at iteration 10
#> Achieved score: 326.857 at iteration 19
#> Achieved score: 322.857 at iteration 22
#> Achieved score: 320.857 at iteration 24
#> Achieved score: 318.857 at iteration 26
#> Achieved score: 314.857 at iteration 27
#> Achieved score: 308.857 at iteration 28
#> Achieved score: 302.857 at iteration 32
#> Achieved score: 300.857 at iteration 33
#> Achieved score: 292.857 at iteration 36
#> Achieved score: 290.857 at iteration 38
#> Achieved score: 290.857 at iteration 39
#> Achieved score: 284.857 at iteration 40
#> Achieved score: 274.857 at iteration 45
#> Achieved score: 270.857 at iteration 50
#> Achieved score: 264.857 at iteration 54
#> Achieved score: 256.857 at iteration 55
#> Achieved score: 246.857 at iteration 60
#> Achieved score: 240.857 at iteration 62
#> Achieved score: 222.857 at iteration 71
#> Achieved score: 220.857 at iteration 72
#> Achieved score: 214.857 at iteration 73
#> Achieved score: 202.857 at iteration 78
#> Achieved score: 200.857 at iteration 79
#> Achieved score: 192.857 at iteration 80
#> Achieved score: 184.857 at iteration 84
#> Achieved score: 178.857 at iteration 85
#> Achieved score: 174.857 at iteration 86
#> Achieved score: 172.857 at iteration 95
#> Achieved score: 166.857 at iteration 96
#> Achieved score: 156.857 at iteration 98
#> Achieved score: 150.857 at iteration 99
#> Achieved score: 146.857 at iteration 103
#> Achieved score: 144.857 at iteration 108
#> Achieved score: 142.857 at iteration 111
#> Achieved score: 140.857 at iteration 113
#> Achieved score: 136.857 at iteration 114
#> Achieved score: 134.857 at iteration 128
#> Achieved score: 130.857 at iteration 133
#> Achieved score: 120.857 at iteration 138
#> Achieved score: 116.857 at iteration 140
#> Achieved score: 108.857 at iteration 149
#> Achieved score: 106.857 at iteration 152
#> Achieved score: 106.857 at iteration 164
#> Achieved score: 104.857 at iteration 171
#> Achieved score: 102.857 at iteration 175
#> Achieved score: 100.857 at iteration 176
#> Achieved score: 96.857 at iteration 179
#> Achieved score: 92.857 at iteration 190
#> Achieved score: 90.857 at iteration 191
#> Achieved score: 88.857 at iteration 200
#> Achieved score: 84.857 at iteration 205
#> Achieved score: 78.857 at iteration 209
#> Achieved score: 76.857 at iteration 223
#> Achieved score: 74.857 at iteration 250
#> Achieved score: 70.857 at iteration 265
#> Achieved score: 68.857 at iteration 268
#> Achieved score: 68.857 at iteration 283
#> Achieved score: 64.857 at iteration 295
#> Achieved score: 62.857 at iteration 336
#> Achieved score: 60.857 at iteration 350
#> Achieved score: 58.857 at iteration 368
#> Achieved score: 56.857 at iteration 389
#> Achieved score: 54.857 at iteration 406
#> Achieved score: 52.857 at iteration 418
#> Achieved score: 50.857 at iteration 428
#> Achieved score: 50.857 at iteration 431
#> Achieved score: 48.857 at iteration 433
#> Achieved score: 46.857 at iteration 452
#> Achieved score: 44.857 at iteration 496
#> Achieved score: 42.857 at iteration 553
#> Achieved score: 40.857 at iteration 562
#> Achieved score: 38.857 at iteration 612
#> Achieved score: 36.857 at iteration 717
#> Achieved score: 34.857 at iteration 727
#> Achieved score: 32.857 at iteration 828
#> Achieved score: 30.857 at iteration 853
#> Achieved score: 30.857 at iteration 859
#> Achieved score: 28.857 at iteration 904
#> Achieved score: 26.857 at iteration 970
#> process real
#> 3.56s 3.54s
# final score
bc_reference$score(scoring_f)
#> score_1
#> 26.85714
bc_reference$plot_trace() +
ggtitle(str_glue("Final score={bc$score(scoring_f)}"))
data.table
Instead of relying on BatchContainer
, here we have a
manual optimization process using data.table
.
fast_osat_optimize <- function(bc, batch_vars, feature_vars, iterations) {
bc <- bc$copy()
ldf <- data.table::data.table(bc$get_locations())[, c("plates")][, ".sample_id" := bc$assignment]
fcols <- c(".sample_id", feature_vars)
smp <- data.table::data.table(bc$samples)[, ..fcols]
df <- smp[ldf, on = ".sample_id"]
v <- osat_score(df, batch_vars, feature_vars)
edf <- v$expected_dt
current_score <- v$score
scores <- numeric(length = iterations)
n_avail <- nrow(df)
for (i in 1:iterations) {
repeat {
pos <- sample(n_avail, 2)
# does not make sense to shuffle NAs
if (any(!is.na(df[pos, feature_vars[1]]))) {
break
}
}
val <- df[c(pos[2], pos[1]), fcols, with = FALSE]
df[c(pos[1], pos[2]), (fcols) := val]
new_score <- osat_score(df, batch_vars, feature_vars, edf)$score
if (new_score <= current_score) {
current_score <- new_score
} else {
df[c(pos[2], pos[1]), (fcols) := val]
}
scores[i] <- current_score
}
bc$assignment <- df$.sample_id
list(bc=bc, scores=scores)
}
bench::system_time({
set.seed(123)
opt_res <- fast_osat_optimize(bc, "plates", c("SampleType", "Race", "AgeGrp"), iterations = params$iterations)
})
#> Warning in osat_score(df, batch_vars, feature_vars): NAs in features / batch
#> columns; they will be excluded from scoring
#> Warning in (function (assignment) : this field might become read-only in the
#> future, please use $move_samples() instead
#> process real
#> 2.17s 2.15s
scoring_f <- osat_score_generator("plates", c("SampleType", "Race", "AgeGrp"))
burn_in_it <- floor(params$iterations * 0.1)
burn_in_it
#> [1] 100
bench::system_time({
set.seed(123)
bc_burn_in <- optimize_design(
bc,
scoring = scoring_f,
n_shuffle = c(
rep(20, burn_in_it),
rep(
2,
params$iterations - burn_in_it
)
),
max_iter = params$iterations
)
})
#> Warning in osat_score(bc, batch_vars = batch_vars, feature_vars = feature_vars,
#> : NAs in features / batch columns; they will be excluded from scoring
#> Checking variances of 1-dim. score vector.
#> ... (5018.697) - OK
#> Initial score: 360.857
#> Achieved score: 322.857 at iteration 1
#> Achieved score: 290.857 at iteration 5
#> Achieved score: 278.857 at iteration 7
#> Achieved score: 254.857 at iteration 8
#> Achieved score: 238.857 at iteration 15
#> Achieved score: 224.857 at iteration 28
#> Achieved score: 216.857 at iteration 32
#> Achieved score: 198.857 at iteration 34
#> Achieved score: 196.857 at iteration 50
#> Achieved score: 184.857 at iteration 54
#> Achieved score: 172.857 at iteration 59
#> Achieved score: 162.857 at iteration 75
#> Achieved score: 158.857 at iteration 80
#> Achieved score: 158.857 at iteration 93
#> Achieved score: 156.857 at iteration 101
#> Achieved score: 148.857 at iteration 102
#> Achieved score: 144.857 at iteration 119
#> Achieved score: 136.857 at iteration 125
#> Achieved score: 130.857 at iteration 136
#> Achieved score: 126.857 at iteration 139
#> Achieved score: 124.857 at iteration 140
#> Achieved score: 124.857 at iteration 141
#> Achieved score: 118.857 at iteration 145
#> Achieved score: 114.857 at iteration 147
#> Achieved score: 112.857 at iteration 150
#> Achieved score: 108.857 at iteration 161
#> Achieved score: 100.857 at iteration 166
#> Achieved score: 94.857 at iteration 168
#> Achieved score: 92.857 at iteration 169
#> Achieved score: 90.857 at iteration 176
#> Achieved score: 88.857 at iteration 186
#> Achieved score: 86.857 at iteration 188
#> Achieved score: 82.857 at iteration 191
#> Achieved score: 80.857 at iteration 194
#> Achieved score: 78.857 at iteration 195
#> Achieved score: 78.857 at iteration 199
#> Achieved score: 76.857 at iteration 206
#> Achieved score: 76.857 at iteration 213
#> Achieved score: 72.857 at iteration 226
#> Achieved score: 66.857 at iteration 231
#> Achieved score: 62.857 at iteration 233
#> Achieved score: 60.857 at iteration 237
#> Achieved score: 58.857 at iteration 245
#> Achieved score: 58.857 at iteration 249
#> Achieved score: 56.857 at iteration 252
#> Achieved score: 56.857 at iteration 256
#> Achieved score: 54.857 at iteration 267
#> Achieved score: 52.857 at iteration 290
#> Achieved score: 50.857 at iteration 296
#> Achieved score: 48.857 at iteration 300
#> Achieved score: 42.857 at iteration 318
#> Achieved score: 40.857 at iteration 348
#> Achieved score: 40.857 at iteration 357
#> Achieved score: 38.857 at iteration 384
#> Achieved score: 36.857 at iteration 420
#> Achieved score: 34.857 at iteration 485
#> Achieved score: 32.857 at iteration 636
#> Achieved score: 30.857 at iteration 783
#> process real
#> 3.37s 3.35s
tibble(
i = bc_burn_in$trace$scores[[1]]$step,
normal = bc_reference$trace$scores[[1]]$score_1,
burnin = bc_burn_in$trace$scores[[1]]$score_1
) |>
pivot_longer(-i, names_to = "method", values_to = "score") |>
ggplot(aes(i, score, col = method)) +
geom_line()
assign_random(bc)
#> Batch container with 672 locations and 576 samples (assigned).
#> Dimensions: plates, chips, rows, columns
bc$get_samples()
#> # A tibble: 672 × 10
#> plates chips rows columns ID SampleType Race AgeGrp dummy_var1
#> <int> <int> <int> <int> <dbl> <fct> <fct> <fct> <dbl>
#> 1 1 1 1 1 563 Control Hispanic (30,40] 0.342
#> 2 1 1 1 2 488 Control European (30,40] 2.14
#> 3 1 1 2 1 215 Case European (50,60] -0.152
#> 4 1 1 2 2 268 Case Hispanic (40,50] 0.693
#> 5 1 1 3 1 165 Case European (60,100] 0.582
#> 6 1 1 3 2 86 Case Hispanic (60,100] 1.34
#> 7 1 1 4 1 264 Case European (60,100] 0.437
#> 8 1 1 4 2 451 Control European (30,40] -0.203
#> 9 1 1 5 1 250 Case Hispanic (60,100] -1.63
#> 10 1 1 5 2 NA <NA> <NA> <NA> NA
#> # ℹ 662 more rows
#> # ℹ 1 more variable: dummy_var2 <chr>
bc$get_samples(remove_empty_locations = TRUE)
#> # A tibble: 576 × 10
#> plates chips rows columns ID SampleType Race AgeGrp dummy_var1
#> <int> <int> <int> <int> <dbl> <fct> <fct> <fct> <dbl>
#> 1 1 1 1 1 563 Control Hispanic (30,40] 0.342
#> 2 1 1 1 2 488 Control European (30,40] 2.14
#> 3 1 1 2 1 215 Case European (50,60] -0.152
#> 4 1 1 2 2 268 Case Hispanic (40,50] 0.693
#> 5 1 1 3 1 165 Case European (60,100] 0.582
#> 6 1 1 3 2 86 Case Hispanic (60,100] 1.34
#> 7 1 1 4 1 264 Case European (60,100] 0.437
#> 8 1 1 4 2 451 Control European (30,40] -0.203
#> 9 1 1 5 1 250 Case Hispanic (60,100] -1.63
#> 10 1 1 6 1 191 Case Hispanic (60,100] 0.696
#> # ℹ 566 more rows
#> # ℹ 1 more variable: dummy_var2 <chr>
scoring_f <- list(
fc0 = function(samples) rnorm(1) + 2 * rexp(1),
fc1 = function(samples) rnorm(1, 100),
fc2 = function(samples) -7
)
bc$score(scoring_f)
#> fc0 fc1 fc2
#> 3.64428 100.78084 -7.00000
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.