| Type: | Package | 
| Title: | Network-Based Clustering | 
| Version: | 1.2.0 | 
| Description: | Network-based clustering using a Bayesian network mixture model with optional covariate adjustment. | 
| Depends: | R (≥ 3.5.0) | 
| Encoding: | UTF-8 | 
| License: | GPL-3 | 
| RoxygenNote: | 7.2.3 | 
| Suggests: | knitr, rmarkdown, ggraph, ggpubr, ggplot2, grDevices, reshape2, car, ks, testthat (≥ 3.0.0) | 
| VignetteBuilder: | knitr | 
| Imports: | BiDAG (≥ 2.0.2), pcalg, RBGL, parallel, clue, methods, graph, igraph | 
| Config/testthat/edition: | 3 | 
| NeedsCompilation: | no | 
| Packaged: | 2024-02-14 12:23:00 UTC; frbayer | 
| Author: | Fritz Bayer [aut, cre, cph], Jack Kuipers [ctb] | 
| Maintainer: | Fritz Bayer <frbayer@ethz.ch> | 
| Repository: | CRAN | 
| Date/Publication: | 2024-02-14 15:20:02 UTC | 
bestAICsearch
Description
best AIC search
Usage
bestAICsearch(
  binaryMatrix,
  minK = 2,
  maxK = 5,
  chiVec = c(0.001, 0.5, 1, 2, 3),
  startseed = 100,
  nIterations = 50,
  AICrange = 100,
  plot_heatmap = TRUE
)
Arguments
| binaryMatrix | Data to be clustered | 
| minK | Min number of clusters | 
| maxK | Max number of clusters | 
| chiVec | Vector of chi values | 
| startseed | Seed | 
| nIterations | Number of iterations | 
| AICrange | AIC range | 
| plot_heatmap | TRUE if plotting directly | 
Value
list of AIC scrores
density_plot
Description
Create 2d dimensionality reduction of sample fit to Bayesian network clusters
Usage
density_plot(cluster_results, var_selection = NULL, colourys = NULL)
Arguments
| cluster_results | Cluster results from function get_clusters | 
| var_selection | Selected variables to consider, e.g. c(1:5) for first five only | 
| colourys | A vector specifying the colors of each cluster (optional) | 
Value
A density plot of class recordedplot.
Examples
# Simulate data
sampled_data <- sampleData(n_vars = 15, n_samples = c(200,200,200))$sampled_data
# Learn clusters
cluster_results <- get_clusters(sampled_data)
# Load additional pacakges to create a 2d dimensionality reduction
library(car)
library(ks)
library(ggplot2)
library(graphics)
library(stats)
# Plot a 2d dimensionality reduction
density_plot(cluster_results)
get_classification
Description
Classification based on clustering
Usage
get_classification(cluster_results, data_classify)
Arguments
| cluster_results | Output from get_clusters() | 
| data_classify | Data that should be classified; colnames need to match the ones of cluster_results$data; missing cols are allowed | 
Value
a list containing the classification as "clustermembership" and the probabilities of belonging to the clusters as "allrelativeprobabs"
Examples
# choose data
sampled_data <- sampleData(n_vars = 15, n_samples = c(300,300,300))$sampled_data
# learn clusters
cluster_results <- get_clusters(sampled_data)
# visualize the networks
classification_results <- get_classification(cluster_results, sampled_data)
get_clusters
Description
Network-based clustering
Usage
get_clusters(
  myData,
  k_clust = 3,
  n_bg = 0,
  quick = TRUE,
  EMseeds = 1,
  edgepmat = NULL,
  blacklist = NULL,
  bdepar = list(chi = 0.5, edgepf = 8),
  newallrelativeprobabs = NULL
)
Arguments
| myData | Data to be clustered, must be either binary (with levels "0"/"1") or categorical (with levels "0"/"1"/"2"/...) | 
| k_clust | Number of clusters | 
| n_bg | Number of covariates to be adjusted for; the position of the covariates must be in the last column of the myData matrix | 
| quick | if TRUE, then the runtime is quick but accuracy is lower | 
| EMseeds | Seeds | 
| edgepmat | Matrix of penalized edges in the search space | 
| blacklist | Matrix of forbidden edges in the search space | 
| bdepar | Hyperparameters for structure learning (BDE score) | 
| newallrelativeprobabs | relative probability of cluster assignment of each sample | 
Value
a list containing the clusterMemberships and "assignprogress"
Examples
# choose data
sampled_data <- sampleData(n_vars = 15, n_samples = c(300,300,300))$sampled_data
# learn clusters
cluster_results <- get_clusters(sampled_data)
# visualize the networks
library(ggplot2)
library(ggraph)
library(igraph)
library(ggpubr)
plot_clusters(cluster_results)
get_clusters_bernoulli
Description
Categorical version of Bernoulli mixture model (binary clustering function BBMMclusterEM)
Usage
get_clusters_bernoulli(
  binaryMatrix,
  chi = 0.5,
  k_clust = 5,
  startseed = 100,
  nIterations = 10,
  verbose = FALSE
)
Arguments
| binaryMatrix | Data to be clustered | 
| chi | hyperparameter chi | 
| k_clust | Number of clusters | 
| startseed | Start seed | 
| nIterations | number of iterations | 
| verbose | set TRUE to display progress | 
Value
a list containing the clusterMemberships
nice_DAG_plot
Description
DAG visualization
Usage
nice_DAG_plot(
  my_DAG,
  print_direct = TRUE,
  node_size = NULL,
  CPDAG = TRUE,
  node_colours = "#fdae61",
  directed = TRUE
)
Arguments
| my_DAG | DAG | 
| print_direct | print DAG if TRUE | 
| node_size | node size vector | 
| CPDAG | if TRUE, then plot CPDAG instead of DAG | 
| node_colours | node colours | 
| directed | TRUE if nodes should be directed | 
Value
A plot of the DAG of class c("gg", "ggplot").
plot_clusters
Description
Plot clusters
Usage
plot_clusters(
  cluster_results,
  node_colours = "#fdae61",
  scale_entropy = FALSE,
  directed = TRUE
)
Arguments
| cluster_results | Cluster results | 
| node_colours | node colours | 
| scale_entropy | if true, entropy measure will be used to determine size of the nodes | 
| directed | TRUE if nodes should be directed | 
Value
A summary plot of all cluster networks of class c("gg", "ggplot", "ggarrange").
Examples
# Simulate data
sampled_data <- sampleData(n_vars = 15, n_bg = 0)$sampled_data
# learn clusters
cluster_results <- get_clusters(sampled_data)
# Load additional pacakges to visualize the networks
library(ggplot2)
library(ggraph)
library(igraph)
library(ggpubr)
# Visualize networks
plot_clusters(cluster_results)
sampleData
Description
Sample binary data from different Bayes nets
Usage
sampleData(
  k_clust = 3,
  n_vars = 20,
  n_bg = 0,
  n_samples = NULL,
  bgedges = "different",
  equal_cpt_bg = TRUE
)
Arguments
| k_clust | Number of clusters | 
| n_vars | Number of variables | 
| n_bg | number of conditioned covariates | 
| n_samples | number of samples | 
| bgedges | type of background edges | 
| equal_cpt_bg | specify if conditional probability table of the background edges is constant across clusters | 
Value
sampled binary data
Examples
# sample data
simulation_data <- sampleData(k_clust = 3, n_vars = 15, n_samples = c(200,200,200))
sampled_data <- simulation_data$sampled_data
head(sampled_data)