## ----include = FALSE---------------------------------------------------------- library(knitr) opts_chunk$set( collapse = TRUE, comment = "#>" ) # define a method for objects of the class data.frame knit_print.matrix = function(x, ...) { res = paste(c("", "", kable(x, row.names = TRUE)), collapse = "\n") asis_output(res) } # register the method registerS3method("knit_print", "matrix", knit_print.matrix) ## ----------------------------------------------------------------------------- library(anticlust) ## ----------------------------------------------------------------------------- data("schaper2019") # look at the data head(schaper2019) ## ----echo = FALSE------------------------------------------------------------- cols <- toString(paste0("\`", names(schaper2019)[3:6], "\`")) ## ----------------------------------------------------------------------------- schaper2019 <- within(schaper2019, { freq <- ifelse(frequency < 18, "high", NA) freq <- ifelse(frequency > 19, "low", freq) }) ## ----------------------------------------------------------------------------- schaper2019$freq ## ----------------------------------------------------------------------------- selected <- subset(schaper2019, !is.na(freq)) # see how many cases were selected: table(selected$freq) ## ----------------------------------------------------------------------------- # Match the conditions based on covariates covariates <- scale(selected[, 3:5]) selected$matches <- matching( covariates, match_between = selected$freq, match_within = selected$room, match_extreme_first = FALSE ) ## ----------------------------------------------------------------------------- subset(selected, matches == 1) subset(selected, matches == 2) ## ----------------------------------------------------------------------------- # Select the 8 best matches: final_selection <- subset(selected, matches <= 8) ## ----------------------------------------------------------------------------- # Check quality of the selection: mean_sd_tab( final_selection[, 3:6], final_selection$freq ) ## ----similarity-plot---------------------------------------------------------- plot_similarity( covariates, groups = selected$matches ) ## ----------------------------------------------------------------------------- # Reload the data for next example data("schaper2019") ## ----------------------------------------------------------------------------- schaper2019 <- within(schaper2019, { incon <- ifelse(rating_inconsistent < median(rating_inconsistent), "low incon", NA) incon <- ifelse(rating_inconsistent >= median(rating_inconsistent), "high incon", incon) con <- ifelse(rating_consistent <= median(rating_consistent), "low con", NA) con <- ifelse(rating_consistent > median(rating_consistent), "high con", con) }) ## ----------------------------------------------------------------------------- table(schaper2019$con, schaper2019$incon) ## ----------------------------------------------------------------------------- # Match the conditions based on covariates covariates <- scale(schaper2019[, c("frequency", "syllables")]) schaper2019$matches <- matching( covariates, match_between = schaper2019[, c("con", "incon")], match_extreme_first = FALSE ) ## ----------------------------------------------------------------------------- subset(schaper2019, matches == 1) ## ----------------------------------------------------------------------------- # Plot covariate similarity by match: plot_similarity(covariates, schaper2019$matches) ## ----------------------------------------------------------------------------- # Select the 5 best matches: final_selection <- subset(schaper2019, matches <= 10) ## ----------------------------------------------------------------------------- mean_sd_tab( subset(final_selection, select = 3:6), paste(final_selection$con, final_selection$incon) ) ## ----------------------------------------------------------------------------- # Reload the data for next example data("schaper2019") ## ----------------------------------------------------------------------------- ## Conduct anticlustering (assign all items to three similar groups) schaper2019$anticluster <- anticlustering( schaper2019[, 3:6], K = 3, objective = "variance" ) ## check out quality of the solution mean_sd_tab( subset(schaper2019, select = 3:6), schaper2019$anticluster ) ## ----------------------------------------------------------------------------- # Reload the data for next example data("schaper2019") ## ----------------------------------------------------------------------------- # First, identify triplets of similar word, within room covariates <- scale(schaper2019[, 3:6]) schaper2019$triplet <- matching( covariates, p = 3, match_within = schaper2019$room ) # check out the two most similar triplets: subset(schaper2019, triplet == 1) subset(schaper2019, triplet == 2) # Select the 10 best triplets best <- subset(schaper2019, triplet <= 10) ## ----------------------------------------------------------------------------- best$anticluster <- anticlustering( best[, 3:6], K = 3, categories = best$triplet, objective = "variance" ) ## ----------------------------------------------------------------------------- table(best$triplet, best$anticluster) ## ----------------------------------------------------------------------------- table(best$room, best$anticluster) ## ----------------------------------------------------------------------------- ## check out quality of the solution mean_sd_tab( subset(best, select = 3:6), best$anticluster )