[R] define number of clusters in kmeans/apcluster analysis

Luigi Marongiu marongiu.luigi at gmail.com
Sun Dec 13 17:17:31 CET 2015


Dear all,
I am trying to do some cluster analysis, both with the base R and the
apcluster. Both methods give 2 clusters, which is what I am looking
for since I am interested in identifying positive and negative
results. However  I could not find a way to fine-tuning the analysis
in order to properly allocate the points; essentially the negative
points should be all those in the lower left portion of the plot (see
example) but some in the top centre are also given to the negative
cluster.
So how can I change the parameters to get better results?
Thank you
L

>>>
x <- c(3.15,    3.07,    2,    3,    2.97,    45,    3.21,    45,
40.55,    2,    22.09,    2.47,    2.97,    2.77,    2.6,    7.35,
4.11,    37.12,    2.73,    36.36,    45,    2.33,    2.49,    45,
2.4,    2.74,    2.64,    45,    2.47,    38.1,    2.47,    37.4,
2.77,    2.37,    45,    2.69,    2.97,    2.7,    2,    2,    2.55,
 11.86,    2.51,    2.68,    2.31,    2.6,    2.45,    2,    2.72,
2.57,    2.09,    3.04,    45,    45,    2.13,    43.82,    2.92,
4.94,    24.82,    2.64,    4.96,    3.65,    2.67,    2.64,    8.04,
  4.56,    44.87,    37.42,    45,    6.2,    2.84,    4.08,    2,
5.03,    2.27,    44.89,    2.41,    2.47,    2.78,    37.47,    45,
 2.76,    45,    2.51,    2.8,    44.8,    6.2,    2.87,    2.23,
18.32,    3.14,    2.1,    2.38,    2.72,    2,    2,    44.41,
3.15,    3.06,    4.8,    2.77,    2.8,    2.71,    44.77,    2.25,
2.69,    28.38,    2,    2.95,    45,    2.79,    2.46,    2.61,
2.78,    2.94,    38.47,    3.29,    2.89,    2.4,    2.23,    2.62,
 4.21,    2.61,    2.81,    2.41,    41.98,    2.39,    36.41,
44.84,    4.73,    2,    2.66,    4.57,    3.01,    42.64,    2.04,
5.49,    15.48,    3.08,    2.7,    2,    2,    2.09,    2,    2.29,
 2.92,    3.39,    3.1,    2,    6.14,    7.03,    4.77,    2.55,
32.36,    20.61,    3.09,    4.46,    44.75,    2,    2.73,    2,
36.05,    3.61,    34.84,    2.69,    5.28,    3.04,    45,    2.47,
 2.58,    2.16,    2.59,    45,    44.08,    2,    37.05,    2.48,
2.46,    38.71,    7.32,    2.95,    2.8,    44.58,    42.24,
36.99,    13.84,    45,    2,    2,    2.38,    45,    45,    43.59,
 2.69,    2.81,    3.05,    2.8,    4.65,    45,    41.46,    2.33,
7.12,    19.18,    4.82,    4.76,    2.51,    3.1,    2.74,    4.99,
 38.06,    2.53,    2.94,    2.93,    6.59,    2.72,    2.94,    2.56,
   2.91,    44.79,    2.98,    42.95,    45,    2.63,    38.44,
2.71,    2,    37.92,    2.69,    2.91,    2.65,    44.48,    6.35,
2.56,    21.94,    3.08,    2.6,    45,    2,    2.62,    2.47,
2.62,    2.73,    2.87,    2.83,    4.56,    44.22,    5.15,    5.13,
  2.76,    7.02,    28.61,    4.87,    5.02,    44.35,    2.26,
2.89,    5.26,    38.01,    44.79,    39.26,    2.91,    4.59,
2.69,    2.61,    34.97,    3,    45,    2.81,    2,    2.65,    2,
37.33,    4.69,    3.26,    38.24,    4.97,    4.62,    2.47,    45,
 4.52,    2.73,    15.66,    6.06,    2.79,    2.87,    45,    45,
45,    4.84,    3.05,    4.89,    4.64,    4.92,    2.74,    7.83,
42.31,    2.88,    6.89,    23.06,    2.94,    4.72,    4.55,    5.52,
   4.48,    4.86,    3.12,    7.68,    43.89,    2.82,    2.64,
3.05,    42.95,    2.33,    3.55,    45,    2.79,    2.47,    45,
2.56,    38.33,    2.73,    2.87,    2.61,    3.01,    2.86,    2.74,
  44.46,    44.54,    2.62,    16.94,    2.53,    2.24,    2.72,    2,
   3.1,    2.88,    7.4,    4.64,    8.25,    3.01,    2.86,    2.46,
  5.67,    44.52,    2.47,    2,    29.01,    2.61,    3.23,    12.3,
  3.9,    2.91,    43.99,    36.99,    43.72,    42.29,    2.63,
3.03,    2.85,    2.58,    2.63,    2.73,    2.57,    2.37,    2.57,
 2.75,    44.14,    39.4,    40.02,    3.08,    45,    4.96,    3,
2.83,    2.74,    2.8,    2.8,    18.88,    4.69,    2.51,    4.32,
2,    2.56,    2.81
)
y <- c(0.014,    0.04,    0.001,    0.023,    0.008,    0,    0.008,
 0.001,    -0.001,    0.002,    0.103,    0,    0.013,    0.005,
0.008,    0.001,    0.011,    0.076,    0.005,    0.045,    -0.001,
0,    0.008,    -0.002,    0.002,    0.016,    0.006,    0.001,
0.002,    0.001,    0.004,    0.086,    0.009,    0.011,    0.002,
0.013,    0.019,    0.007,    0,    0.002,    0.024,    0.119,
0.015,    0.009,    0.013,    0.017,    0.009,    0.009,    0.006,
0.012,    0.002,    0.015,    0,    0.001,    0.002,    0.001,
0.007,    0.004,    0.113,    0.016,    0.013,    0.004,    0.015,
0.005,    0.004,    0.007,    0,    0.081,    0.001,    0.002,
0.014,    0.002,    0,    0.01,    0.003,    0.002,    0.004,
0.004,    0.006,    0.064,    0,    0.014,    0,    0.01,    0.019,
0.002,    0.006,    0.005,    0.003,    0.103,    0.007,    0.008,
0.002,    0.013,    0.007,    0.004,    0.001,    0.04,    0.017,
0.018,    0.002,    0.006,    0.011,    0.003,    0.004,    0.008,
0.115,    0,    0.02,    0,    0.012,    0.009,    0.011,    0.013,
0.004,    0.058,    0.019,    0.006,    0.005,    0.004,    0.012,
0.003,    0.003,    0.004,    0.002,    0.001,    0.002,    0.102,
-0.001,    0.008,    0.002,    0.016,    0.023,    0.014,    0.053,
0.009,    0.001,    0.124,    0.009,    0.008,    0.002,    0.002,
0.013,    0.002,    0.001,    0.042,    0.011,    0.009,    0,
0.004,    0.003,    0.002,    0.005,    0,    0.101,    0.013,
0.009,    0.005,    0.002,    0.007,    0.008,    0.067,    0.002,
0.064,    0.028,    0.007,    0.006,    0,    0.007,    0.006,    0,
 0.001,    0.001,    0.001,    0,    0.088,    0.005,    0.008,
0.098,    0.005,    0.019,    0.007,    0.05,    -0.002,    0.002,
0.129,    0.001,    0.004,    -0.001,    0.002,    -0.001,    0,
0.043,    0.018,    0.019,    0.015,    0.003,    0.006,    0.002,
0.001,    0.002,    0.004,    0.097,    0.025,    0.022,    0.007,
0.011,    0.007,    0.013,    0.061,    0.008,    0.013,    0.028,
0.004,    0.013,    0.005,    0.01,    0.004,    0,    0.006,
-0.001,    0.001,    0.01,    0.061,    0.002,    0.004,    0,
0.011,    0.029,    0.018,    0,    0.003,    0.012,    0.085,
0.015,    0.007,    0.002,    0.003,    0.008,    0.002,    0.007,
0.02,    0.011,    0.02,    0.008,    0.001,    0.003,    0.01,
0.014,    0.001,    0.096,    0.027,    0.024,    0,    0.005,
0.006,    0.024,    0.087,    0.001,    0.083,    0.02,    0.009,
0.009,    0.001,    0,    0.019,    0,    0.003,    -0.001,    0.002,
  0,    0.089,    0.016,    0.01,    0.103,    0.003,    0.01,
0.002,    0.008,    0.005,    0.014,    0.1,    0.007,    0.009,
0.011,    -0.001,    0,    0.002,    0.015,    0.036,    0.018,
0.026,    0.009,    0.008,    0.004,    0.001,    0.014,    0.009,
0.1,    0.026,    0.032,    0.008,    0.011,    0.004,    0.013,
0.019,    0.004,    0.02,    0.015,    0.005,    0.013,    -0.001,
0.013,    0.012,    0,    0.01,    0.002,    0.001,    0.013,
0.066,    0.009,    0.005,    0.002,    0.013,    0.025,    0.006,
0,    0,    0.015,    0.121,    0.006,    0.003,    0.008,    0,
0.012,    0.011,    0.003,    0.022,    0.008,    0.032,    0.007,
0.002,    0.006,    0.007,    0,    0.003,    0.11,    0.01,    0.008,
   0,    0.018,    0.008,    0.001,    0.087,    0,    0.028,
0.011,    0.014,    0.007,    0.001,    0.018,    0.033,    0.021,
0.003,    0.003,    0.007,    -0.001,    0.07,    0.022,    0.009,
0.001,    0.007,    0.031,    0.008,    0.013,    0.01,    0.018,
0.125,    0.01,    0.015,    0.006,    0,    0.015,    0.019
)
z <- cbind(x, y)
k <- kmeans(z, 2)
plot(z, col=k$cluster)

library(apcluster)
m <- apclusterK(negDistMat(r=2), z, K=2, verbose=TRUE)
plot(m, z)



More information about the R-help mailing list