[R] define number of clusters in kmeans/apcluster analysis
Luigi Marongiu
marongiu.luigi at gmail.com
Sun Dec 13 17:17:31 CET 2015
Dear all,
I am trying to do some cluster analysis, both with the base R and the
apcluster. Both methods give 2 clusters, which is what I am looking
for since I am interested in identifying positive and negative
results. However I could not find a way to fine-tuning the analysis
in order to properly allocate the points; essentially the negative
points should be all those in the lower left portion of the plot (see
example) but some in the top centre are also given to the negative
cluster.
So how can I change the parameters to get better results?
Thank you
L
>>>
x <- c(3.15, 3.07, 2, 3, 2.97, 45, 3.21, 45,
40.55, 2, 22.09, 2.47, 2.97, 2.77, 2.6, 7.35,
4.11, 37.12, 2.73, 36.36, 45, 2.33, 2.49, 45,
2.4, 2.74, 2.64, 45, 2.47, 38.1, 2.47, 37.4,
2.77, 2.37, 45, 2.69, 2.97, 2.7, 2, 2, 2.55,
11.86, 2.51, 2.68, 2.31, 2.6, 2.45, 2, 2.72,
2.57, 2.09, 3.04, 45, 45, 2.13, 43.82, 2.92,
4.94, 24.82, 2.64, 4.96, 3.65, 2.67, 2.64, 8.04,
4.56, 44.87, 37.42, 45, 6.2, 2.84, 4.08, 2,
5.03, 2.27, 44.89, 2.41, 2.47, 2.78, 37.47, 45,
2.76, 45, 2.51, 2.8, 44.8, 6.2, 2.87, 2.23,
18.32, 3.14, 2.1, 2.38, 2.72, 2, 2, 44.41,
3.15, 3.06, 4.8, 2.77, 2.8, 2.71, 44.77, 2.25,
2.69, 28.38, 2, 2.95, 45, 2.79, 2.46, 2.61,
2.78, 2.94, 38.47, 3.29, 2.89, 2.4, 2.23, 2.62,
4.21, 2.61, 2.81, 2.41, 41.98, 2.39, 36.41,
44.84, 4.73, 2, 2.66, 4.57, 3.01, 42.64, 2.04,
5.49, 15.48, 3.08, 2.7, 2, 2, 2.09, 2, 2.29,
2.92, 3.39, 3.1, 2, 6.14, 7.03, 4.77, 2.55,
32.36, 20.61, 3.09, 4.46, 44.75, 2, 2.73, 2,
36.05, 3.61, 34.84, 2.69, 5.28, 3.04, 45, 2.47,
2.58, 2.16, 2.59, 45, 44.08, 2, 37.05, 2.48,
2.46, 38.71, 7.32, 2.95, 2.8, 44.58, 42.24,
36.99, 13.84, 45, 2, 2, 2.38, 45, 45, 43.59,
2.69, 2.81, 3.05, 2.8, 4.65, 45, 41.46, 2.33,
7.12, 19.18, 4.82, 4.76, 2.51, 3.1, 2.74, 4.99,
38.06, 2.53, 2.94, 2.93, 6.59, 2.72, 2.94, 2.56,
2.91, 44.79, 2.98, 42.95, 45, 2.63, 38.44,
2.71, 2, 37.92, 2.69, 2.91, 2.65, 44.48, 6.35,
2.56, 21.94, 3.08, 2.6, 45, 2, 2.62, 2.47,
2.62, 2.73, 2.87, 2.83, 4.56, 44.22, 5.15, 5.13,
2.76, 7.02, 28.61, 4.87, 5.02, 44.35, 2.26,
2.89, 5.26, 38.01, 44.79, 39.26, 2.91, 4.59,
2.69, 2.61, 34.97, 3, 45, 2.81, 2, 2.65, 2,
37.33, 4.69, 3.26, 38.24, 4.97, 4.62, 2.47, 45,
4.52, 2.73, 15.66, 6.06, 2.79, 2.87, 45, 45,
45, 4.84, 3.05, 4.89, 4.64, 4.92, 2.74, 7.83,
42.31, 2.88, 6.89, 23.06, 2.94, 4.72, 4.55, 5.52,
4.48, 4.86, 3.12, 7.68, 43.89, 2.82, 2.64,
3.05, 42.95, 2.33, 3.55, 45, 2.79, 2.47, 45,
2.56, 38.33, 2.73, 2.87, 2.61, 3.01, 2.86, 2.74,
44.46, 44.54, 2.62, 16.94, 2.53, 2.24, 2.72, 2,
3.1, 2.88, 7.4, 4.64, 8.25, 3.01, 2.86, 2.46,
5.67, 44.52, 2.47, 2, 29.01, 2.61, 3.23, 12.3,
3.9, 2.91, 43.99, 36.99, 43.72, 42.29, 2.63,
3.03, 2.85, 2.58, 2.63, 2.73, 2.57, 2.37, 2.57,
2.75, 44.14, 39.4, 40.02, 3.08, 45, 4.96, 3,
2.83, 2.74, 2.8, 2.8, 18.88, 4.69, 2.51, 4.32,
2, 2.56, 2.81
)
y <- c(0.014, 0.04, 0.001, 0.023, 0.008, 0, 0.008,
0.001, -0.001, 0.002, 0.103, 0, 0.013, 0.005,
0.008, 0.001, 0.011, 0.076, 0.005, 0.045, -0.001,
0, 0.008, -0.002, 0.002, 0.016, 0.006, 0.001,
0.002, 0.001, 0.004, 0.086, 0.009, 0.011, 0.002,
0.013, 0.019, 0.007, 0, 0.002, 0.024, 0.119,
0.015, 0.009, 0.013, 0.017, 0.009, 0.009, 0.006,
0.012, 0.002, 0.015, 0, 0.001, 0.002, 0.001,
0.007, 0.004, 0.113, 0.016, 0.013, 0.004, 0.015,
0.005, 0.004, 0.007, 0, 0.081, 0.001, 0.002,
0.014, 0.002, 0, 0.01, 0.003, 0.002, 0.004,
0.004, 0.006, 0.064, 0, 0.014, 0, 0.01, 0.019,
0.002, 0.006, 0.005, 0.003, 0.103, 0.007, 0.008,
0.002, 0.013, 0.007, 0.004, 0.001, 0.04, 0.017,
0.018, 0.002, 0.006, 0.011, 0.003, 0.004, 0.008,
0.115, 0, 0.02, 0, 0.012, 0.009, 0.011, 0.013,
0.004, 0.058, 0.019, 0.006, 0.005, 0.004, 0.012,
0.003, 0.003, 0.004, 0.002, 0.001, 0.002, 0.102,
-0.001, 0.008, 0.002, 0.016, 0.023, 0.014, 0.053,
0.009, 0.001, 0.124, 0.009, 0.008, 0.002, 0.002,
0.013, 0.002, 0.001, 0.042, 0.011, 0.009, 0,
0.004, 0.003, 0.002, 0.005, 0, 0.101, 0.013,
0.009, 0.005, 0.002, 0.007, 0.008, 0.067, 0.002,
0.064, 0.028, 0.007, 0.006, 0, 0.007, 0.006, 0,
0.001, 0.001, 0.001, 0, 0.088, 0.005, 0.008,
0.098, 0.005, 0.019, 0.007, 0.05, -0.002, 0.002,
0.129, 0.001, 0.004, -0.001, 0.002, -0.001, 0,
0.043, 0.018, 0.019, 0.015, 0.003, 0.006, 0.002,
0.001, 0.002, 0.004, 0.097, 0.025, 0.022, 0.007,
0.011, 0.007, 0.013, 0.061, 0.008, 0.013, 0.028,
0.004, 0.013, 0.005, 0.01, 0.004, 0, 0.006,
-0.001, 0.001, 0.01, 0.061, 0.002, 0.004, 0,
0.011, 0.029, 0.018, 0, 0.003, 0.012, 0.085,
0.015, 0.007, 0.002, 0.003, 0.008, 0.002, 0.007,
0.02, 0.011, 0.02, 0.008, 0.001, 0.003, 0.01,
0.014, 0.001, 0.096, 0.027, 0.024, 0, 0.005,
0.006, 0.024, 0.087, 0.001, 0.083, 0.02, 0.009,
0.009, 0.001, 0, 0.019, 0, 0.003, -0.001, 0.002,
0, 0.089, 0.016, 0.01, 0.103, 0.003, 0.01,
0.002, 0.008, 0.005, 0.014, 0.1, 0.007, 0.009,
0.011, -0.001, 0, 0.002, 0.015, 0.036, 0.018,
0.026, 0.009, 0.008, 0.004, 0.001, 0.014, 0.009,
0.1, 0.026, 0.032, 0.008, 0.011, 0.004, 0.013,
0.019, 0.004, 0.02, 0.015, 0.005, 0.013, -0.001,
0.013, 0.012, 0, 0.01, 0.002, 0.001, 0.013,
0.066, 0.009, 0.005, 0.002, 0.013, 0.025, 0.006,
0, 0, 0.015, 0.121, 0.006, 0.003, 0.008, 0,
0.012, 0.011, 0.003, 0.022, 0.008, 0.032, 0.007,
0.002, 0.006, 0.007, 0, 0.003, 0.11, 0.01, 0.008,
0, 0.018, 0.008, 0.001, 0.087, 0, 0.028,
0.011, 0.014, 0.007, 0.001, 0.018, 0.033, 0.021,
0.003, 0.003, 0.007, -0.001, 0.07, 0.022, 0.009,
0.001, 0.007, 0.031, 0.008, 0.013, 0.01, 0.018,
0.125, 0.01, 0.015, 0.006, 0, 0.015, 0.019
)
z <- cbind(x, y)
k <- kmeans(z, 2)
plot(z, col=k$cluster)
library(apcluster)
m <- apclusterK(negDistMat(r=2), z, K=2, verbose=TRUE)
plot(m, z)
More information about the R-help
mailing list