The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
library(DataFusionGDM)
library(ggplot2)
library(vegan)
# Generate a synthetic full matrix in memory (no file I/O)
full_matrix <- simulate_genetic_distances(n_pops = 40, verbose = FALSE, seed = 42)$distance_matrix
k_values <- c(4, 8, 12)
num_tests <- 3
seed_base <- 42
prepare_matrices_from_matrix <- function(full_matrix, k, bias_mu = 0.1, noise_sd = 0.05, randomize_shared = TRUE, seed = 42) {
all_pop_names <- rownames(full_matrix)
np <- length(all_pop_names)
stopifnot(k >= 1, k <= np)
if (randomize_shared) {
ordering <- order(sin(seq_len(np) + seed), decreasing = TRUE)
shared_pop_names <- all_pop_names[ordering[seq_len(k)]]
} else {
shared_pop_names <- all_pop_names[seq_len(k)]
}
unique_pop_names <- setdiff(all_pop_names, shared_pop_names)
A <- full_matrix; B <- full_matrix
base_index <- seq_len(length(A)) + seed
A <- abs(A + matrix(sin(base_index) * noise_sd, nrow(A)))
B <- abs(B + matrix(cos(base_index) * noise_sd + bias_mu, nrow(B)))
diag(A) <- 0; diag(B) <- 0
label_A <- label_B <- rep("", np); names(label_A) <- names(label_B) <- all_pop_names
label_A[shared_pop_names] <- paste0("S_", shared_pop_names)
label_B[shared_pop_names] <- paste0("S_", shared_pop_names)
label_A[unique_pop_names] <- paste0("A_", unique_pop_names)
label_B[unique_pop_names] <- paste0("B_", unique_pop_names)
rownames(A) <- label_A[rownames(A)]; colnames(A) <- label_A[colnames(A)]
rownames(B) <- label_B[rownames(B)]; colnames(B) <- label_B[colnames(B)]
list(A = A, B = B, np = np, k = k)
}
results <- data.frame()
for (k in k_values) {
for (test_id in seq_len(num_tests)) {
test_seed <- seed_base + test_id * 100 + k
matrices <- prepare_matrices_from_matrix(full_matrix, k = k, seed = test_seed)
A <- matrices$A; B <- matrices$B
mds <- perform_mds(A, B)
X <- mds$X; Y <- mds$Y; d_opt <- mds$d_opt
pop_common <- intersect(rownames(A), rownames(B))
X_sub <- X[pop_common, 1:d_opt]
Y_sub <- Y[pop_common, 1:d_opt]
Yt <- apply_procrustes(X_sub, Y_sub, Y)
B_cal <- coords_to_distances(Yt)
the_prior <- mean((A - B)^2)
the_post <- mean((A - B_cal)^2)
improvement <- (the_prior - the_post) / the_prior * 100
results <- rbind(results, data.frame(k = k, test_id = test_id, the_prior = the_prior,
the_post = the_post, improvement = improvement))
}
}
agg <- aggregate(cbind(the_prior, the_post, improvement) ~ k, data = results,
FUN = function(x) c(mean = mean(x), sd = sd(x)))
agg <- do.call(data.frame, agg)
p <- ggplot(agg, aes(x = k)) +
geom_line(aes(y = improvement.mean), color = "blue") +
geom_point(aes(y = improvement.mean), color = "blue") +
geom_ribbon(aes(ymin = improvement.mean - improvement.sd,
ymax = improvement.mean + improvement.sd), fill = "blue", alpha = 0.2) +
labs(title = "Calibration improvement vs shared k",
x = "Shared populations (k)", y = "% Improvement") +
theme_minimal()
print(p)These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.