The hardware and bandwidth for this mirror is donated by dogado GmbH, the Webhosting and Full Service-Cloud Provider. Check out our Wordpress Tutorial.
If you wish to report a bug, or if you are interested in having us mirror your free-software or open-source project, please feel free to contact us at mirror[@]dogado.de.
library(tip)
#> Registered S3 method overwritten by 'GGally':
#> method from
#> +.gg ggplot2
# A function to generate random matrices from a matrix normal distribution
<- function(mu, num_rows, num_cols){
random_mat_normal ::rmatrixnorm(M = matrix(mu,
LaplacesDemonnrow = num_rows,
ncol = num_cols),
U = diag(num_rows),
V = diag(num_cols))
}
# Generate 3 clusters of matrices
<- 5
p <- 3
m <- lapply(1:10, function(x) random_mat_normal(mu = 0, num_rows = m, num_cols = p))
c1 <- lapply(1:10, function(x) random_mat_normal(mu = 5, num_rows = m, num_cols = p))
c2 <- lapply(1:10, function(x) random_mat_normal(mu = -5, num_rows = m, num_cols = p))
c3
# Put all the data into a list
<- c(c1,c2,c3)
data_list
# Create a vector of true labels. True labels are only necessary
# for constructing network graphs that incorporate the true labels;
# this is often useful for research.
<- c(rep("Cluster 1", length(c1)),
true_labels rep("Cluster 2", length(c2)),
rep("Cluster 3", length(c3)))
<- matrix(NA,
distance_matrix nrow = length(true_labels),
ncol = length(true_labels))
# Distance matrix
for(i in 1:length(true_labels)){
for(j in i:length(true_labels)){
<- SMFilter::FDist2(mX = data_list[[i]],
distance_matrix[i,j] mY = data_list[[j]])
<- distance_matrix[i,j]
distance_matrix[j,i]
}
}
# Compute the temperature parameter estiamte
<- 1/median(distance_matrix[upper.tri(distance_matrix)])
temperature
# For each subject, compute the point estimate for the number of similar
# subjects using univariate multiple change point detection (i.e.)
= get_cpt_neighbors(.distance_matrix = distance_matrix)
init_num_neighbors #> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
#> Warning in BINSEG(sumstat, pen = pen.value, cost_func = costfunc, minseglen
#> = minseglen, : The number of changepoints identified is Q, it is advised to
#> increase Q to make sure changepoints have not been missed.
# Set the number of burn-in iterations in the Gibbs samlper
# RECOMMENDATION: burn >= 1000
<- 10
burn
# Set the number of sampling iterations in the Gibbs sampler
# RECOMMENDATION: samples >= 1000
<- 10
samples
# Set the subject names
<- paste(1:dim(distance_matrix)[1])
names_subjects
# Run TIP clustering using only the prior
# --> That is, the likelihood function is constant
<- tip(.data = data_list,
tip1 .burn = burn,
.samples = samples,
.similarity_matrix = exp(-1.0*temperature*distance_matrix),
.init_num_neighbors = init_num_neighbors,
.likelihood_model = "CONSTANT",
.subject_names = names_subjects,
.num_cores = 1)
#> Bayesian Clustering: Table Invitation Prior Gibbs Sampler
#> burn-in: 10
#> samples: 10
#> Likelihood Model: CONSTANT
#>
|
| | 0%
|
|==== | 6%
|
|======== | 11%
|
|============ | 17%
|
|================ | 22%
|
|=================== | 28%
|
|======================= | 33%
|
|=========================== | 39%
|
|=============================== | 44%
|
|=================================== | 50%
|
|======================================= | 56%
|
|=========================================== | 61%
|
|=============================================== | 67%
|
|=================================================== | 72%
|
|====================================================== | 78%
|
|========================================================== | 83%
|
|============================================================== | 89%
|
|================================================================== | 94%
|
|======================================================================| 100%
# Produce plots for the Bayesian Clustering Model
<- plot(tip1) tip_plots
# View the posterior distribution of the number of clusters
$histogram_posterior_number_of_clusters tip_plots
# View the trace plot with respect to the posterior number of clusters
$trace_plot_posterior_number_of_clusters tip_plots
# Extract posterior cluster assignments using the Posterior Expected Adjusted Rand (PEAR) index
<- mcclust::maxpear(psm = tip1@posterior_similarity_matrix)$cl
cluster_assignments
# If the true labels are available, then show the cluster result via a contigency table
table(data.frame(true_label = true_labels,
cluster_assignment = cluster_assignments))
#> cluster_assignment
#> true_label 1 2 3 4
#> Cluster 1 4 5 1 0
#> Cluster 2 7 1 1 1
#> Cluster 3 0 9 0 1
# Create the one component graph with minimum entropy
<- partition_undirected_graph(.graph_matrix = tip1@posterior_similarity_matrix,
partition_list .num_components = 1,
.step_size = 0.001)
# Associate class labels and colors for the plot
<- c("Cluster 1" = "blue",
class_palette_colors "Cluster 2" = 'green',
"Cluster 3" = "red")
# Associate class labels and shapes for the plot
<- c("Cluster 1" = 19,
class_palette_shapes "Cluster 2" = 18,
"Cluster 3" = 17)
# Visualize the posterior similarity matrix by constructing a graph plot of
# the one-cluster graph. The true labels are used here (below they are not).
ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix,
.subject_names = NA,
.subject_class_names = true_labels,
.class_colors = class_palette_colors,
.class_shapes = class_palette_shapes,
.node_size = 2,
.add_node_labels = FALSE)
#> Warning: Duplicated override.aes is ignored.
# If true labels are not available, then construct a network plot
# of the one-cluster graph without any class labels.
# Note: Subject labels may be suppressed using .add_node_labels = FALSE.
ggnet2_network_plot(.matrix_graph = partition_list$partitioned_graph_matrix,
.subject_names = names_subjects,
.node_size = 2,
.add_node_labels = TRUE)
These binaries (installable software) and packages are in development.
They may not be fully stable and should be used with caution. We make no claims about them.
Health stats visible at Monitor.