## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(collapse = TRUE, comment = "#>",
                      fig.width = 7, fig.height = 4.5)

## ----setup--------------------------------------------------------------------
library(GSbench)

## ----sim----------------------------------------------------------------------
sim <- simulate_population(n = 300, m = 2000, n_qtl = 50, h2 = 0.5, seed = 1)
dim(sim$geno)
sim$h2          # realised heritability

## ----qc-----------------------------------------------------------------------
qc <- qc_markers(sim$geno, maf = 0.05, max_missing = 0.1)
qc$removed
geno <- qc$geno

## ----gblup--------------------------------------------------------------------
fit <- gblup(sim$pheno, geno)
fit
cor(fit$gebv, sim$bv)     # accuracy against the true breeding values

## ----predict------------------------------------------------------------------
train <- 1:240; test <- 241:300
fit_tr <- gs_fit(sim$pheno[train], geno[train, ], model = "gblup")
pred   <- predict(fit_tr, geno[test, ])
cor(pred, sim$bv[test])

## ----available----------------------------------------------------------------
available_models()

## ----models, eval = requireNamespace("glmnet", quietly = TRUE)----------------
en <- gs_fit(sim$pheno[train], geno[train, ], model = "elastic_net")
cor(predict(en, geno[test, ]), sim$bv[test])

## ----cv-----------------------------------------------------------------------
cv <- gs_cv(sim$pheno, geno, models = "gblup", k = 5, reps = 1, seed = 1)
cv

# leave-one-family-out, with a toy family structure
fam <- rep(1:6, length.out = nrow(geno))
gs_cv(sim$pheno, geno, models = "gblup",
      scheme = "leave_group_out", groups = fam)

## ----ensemble-----------------------------------------------------------------
ens <- gs_ensemble(sim$pheno, geno, base_models = "gblup", seed = 1)
ens$weights

## ----benchmark----------------------------------------------------------------
bench <- gs_benchmark(sim$pheno, geno, models = c("gblup", "ensemble"),
                      k = 5, seed = 1)
bench$accuracy

## ----benchplot----------------------------------------------------------------
plot(bench)

