[R] define number of clusters in kmeans/apcluster analysis

William Dunlap wdunlap at tibco.com
Sun Dec 13 19:28:07 CET 2015


In addition to the other fine replies, you should also know that
kmeans's results
depend on the relative scales of the data columns (since it is based
on distances
between points).  Your x and y have quite different scales so the distance is
essentially determined only by the differences in the variable with
the larger scale.

E.g., look at the difference in clustering when x is scaled by
different amounts:

par(mfrow=c(2,2), mar=c(3,3,0,0))
for(i in 1:4){k <- kmeans(zs<-cbind(x/(i*5),y/.05), 2); plot(zs,
col=k$cluster, asp=1)}

Bill Dunlap
TIBCO Software
wdunlap tibco.com


On Sun, Dec 13, 2015 at 8:17 AM, Luigi Marongiu
<marongiu.luigi at gmail.com> wrote:
> Dear all,
> I am trying to do some cluster analysis, both with the base R and the
> apcluster. Both methods give 2 clusters, which is what I am looking
> for since I am interested in identifying positive and negative
> results. However  I could not find a way to fine-tuning the analysis
> in order to properly allocate the points; essentially the negative
> points should be all those in the lower left portion of the plot (see
> example) but some in the top centre are also given to the negative
> cluster.
> So how can I change the parameters to get better results?
> Thank you
> L
>
>>>>
> x <- c(3.15,    3.07,    2,    3,    2.97,    45,    3.21,    45,
> 40.55,    2,    22.09,    2.47,    2.97,    2.77,    2.6,    7.35,
> 4.11,    37.12,    2.73,    36.36,    45,    2.33,    2.49,    45,
> 2.4,    2.74,    2.64,    45,    2.47,    38.1,    2.47,    37.4,
> 2.77,    2.37,    45,    2.69,    2.97,    2.7,    2,    2,    2.55,
>  11.86,    2.51,    2.68,    2.31,    2.6,    2.45,    2,    2.72,
> 2.57,    2.09,    3.04,    45,    45,    2.13,    43.82,    2.92,
> 4.94,    24.82,    2.64,    4.96,    3.65,    2.67,    2.64,    8.04,
>   4.56,    44.87,    37.42,    45,    6.2,    2.84,    4.08,    2,
> 5.03,    2.27,    44.89,    2.41,    2.47,    2.78,    37.47,    45,
>  2.76,    45,    2.51,    2.8,    44.8,    6.2,    2.87,    2.23,
> 18.32,    3.14,    2.1,    2.38,    2.72,    2,    2,    44.41,
> 3.15,    3.06,    4.8,    2.77,    2.8,    2.71,    44.77,    2.25,
> 2.69,    28.38,    2,    2.95,    45,    2.79,    2.46,    2.61,
> 2.78,    2.94,    38.47,    3.29,    2.89,    2.4,    2.23,    2.62,
>  4.21,    2.61,    2.81,    2.41,    41.98,    2.39,    36.41,
> 44.84,    4.73,    2,    2.66,    4.57,    3.01,    42.64,    2.04,
> 5.49,    15.48,    3.08,    2.7,    2,    2,    2.09,    2,    2.29,
>  2.92,    3.39,    3.1,    2,    6.14,    7.03,    4.77,    2.55,
> 32.36,    20.61,    3.09,    4.46,    44.75,    2,    2.73,    2,
> 36.05,    3.61,    34.84,    2.69,    5.28,    3.04,    45,    2.47,
>  2.58,    2.16,    2.59,    45,    44.08,    2,    37.05,    2.48,
> 2.46,    38.71,    7.32,    2.95,    2.8,    44.58,    42.24,
> 36.99,    13.84,    45,    2,    2,    2.38,    45,    45,    43.59,
>  2.69,    2.81,    3.05,    2.8,    4.65,    45,    41.46,    2.33,
> 7.12,    19.18,    4.82,    4.76,    2.51,    3.1,    2.74,    4.99,
>  38.06,    2.53,    2.94,    2.93,    6.59,    2.72,    2.94,    2.56,
>    2.91,    44.79,    2.98,    42.95,    45,    2.63,    38.44,
> 2.71,    2,    37.92,    2.69,    2.91,    2.65,    44.48,    6.35,
> 2.56,    21.94,    3.08,    2.6,    45,    2,    2.62,    2.47,
> 2.62,    2.73,    2.87,    2.83,    4.56,    44.22,    5.15,    5.13,
>   2.76,    7.02,    28.61,    4.87,    5.02,    44.35,    2.26,
> 2.89,    5.26,    38.01,    44.79,    39.26,    2.91,    4.59,
> 2.69,    2.61,    34.97,    3,    45,    2.81,    2,    2.65,    2,
> 37.33,    4.69,    3.26,    38.24,    4.97,    4.62,    2.47,    45,
>  4.52,    2.73,    15.66,    6.06,    2.79,    2.87,    45,    45,
> 45,    4.84,    3.05,    4.89,    4.64,    4.92,    2.74,    7.83,
> 42.31,    2.88,    6.89,    23.06,    2.94,    4.72,    4.55,    5.52,
>    4.48,    4.86,    3.12,    7.68,    43.89,    2.82,    2.64,
> 3.05,    42.95,    2.33,    3.55,    45,    2.79,    2.47,    45,
> 2.56,    38.33,    2.73,    2.87,    2.61,    3.01,    2.86,    2.74,
>   44.46,    44.54,    2.62,    16.94,    2.53,    2.24,    2.72,    2,
>    3.1,    2.88,    7.4,    4.64,    8.25,    3.01,    2.86,    2.46,
>   5.67,    44.52,    2.47,    2,    29.01,    2.61,    3.23,    12.3,
>   3.9,    2.91,    43.99,    36.99,    43.72,    42.29,    2.63,
> 3.03,    2.85,    2.58,    2.63,    2.73,    2.57,    2.37,    2.57,
>  2.75,    44.14,    39.4,    40.02,    3.08,    45,    4.96,    3,
> 2.83,    2.74,    2.8,    2.8,    18.88,    4.69,    2.51,    4.32,
> 2,    2.56,    2.81
> )
> y <- c(0.014,    0.04,    0.001,    0.023,    0.008,    0,    0.008,
>  0.001,    -0.001,    0.002,    0.103,    0,    0.013,    0.005,
> 0.008,    0.001,    0.011,    0.076,    0.005,    0.045,    -0.001,
> 0,    0.008,    -0.002,    0.002,    0.016,    0.006,    0.001,
> 0.002,    0.001,    0.004,    0.086,    0.009,    0.011,    0.002,
> 0.013,    0.019,    0.007,    0,    0.002,    0.024,    0.119,
> 0.015,    0.009,    0.013,    0.017,    0.009,    0.009,    0.006,
> 0.012,    0.002,    0.015,    0,    0.001,    0.002,    0.001,
> 0.007,    0.004,    0.113,    0.016,    0.013,    0.004,    0.015,
> 0.005,    0.004,    0.007,    0,    0.081,    0.001,    0.002,
> 0.014,    0.002,    0,    0.01,    0.003,    0.002,    0.004,
> 0.004,    0.006,    0.064,    0,    0.014,    0,    0.01,    0.019,
> 0.002,    0.006,    0.005,    0.003,    0.103,    0.007,    0.008,
> 0.002,    0.013,    0.007,    0.004,    0.001,    0.04,    0.017,
> 0.018,    0.002,    0.006,    0.011,    0.003,    0.004,    0.008,
> 0.115,    0,    0.02,    0,    0.012,    0.009,    0.011,    0.013,
> 0.004,    0.058,    0.019,    0.006,    0.005,    0.004,    0.012,
> 0.003,    0.003,    0.004,    0.002,    0.001,    0.002,    0.102,
> -0.001,    0.008,    0.002,    0.016,    0.023,    0.014,    0.053,
> 0.009,    0.001,    0.124,    0.009,    0.008,    0.002,    0.002,
> 0.013,    0.002,    0.001,    0.042,    0.011,    0.009,    0,
> 0.004,    0.003,    0.002,    0.005,    0,    0.101,    0.013,
> 0.009,    0.005,    0.002,    0.007,    0.008,    0.067,    0.002,
> 0.064,    0.028,    0.007,    0.006,    0,    0.007,    0.006,    0,
>  0.001,    0.001,    0.001,    0,    0.088,    0.005,    0.008,
> 0.098,    0.005,    0.019,    0.007,    0.05,    -0.002,    0.002,
> 0.129,    0.001,    0.004,    -0.001,    0.002,    -0.001,    0,
> 0.043,    0.018,    0.019,    0.015,    0.003,    0.006,    0.002,
> 0.001,    0.002,    0.004,    0.097,    0.025,    0.022,    0.007,
> 0.011,    0.007,    0.013,    0.061,    0.008,    0.013,    0.028,
> 0.004,    0.013,    0.005,    0.01,    0.004,    0,    0.006,
> -0.001,    0.001,    0.01,    0.061,    0.002,    0.004,    0,
> 0.011,    0.029,    0.018,    0,    0.003,    0.012,    0.085,
> 0.015,    0.007,    0.002,    0.003,    0.008,    0.002,    0.007,
> 0.02,    0.011,    0.02,    0.008,    0.001,    0.003,    0.01,
> 0.014,    0.001,    0.096,    0.027,    0.024,    0,    0.005,
> 0.006,    0.024,    0.087,    0.001,    0.083,    0.02,    0.009,
> 0.009,    0.001,    0,    0.019,    0,    0.003,    -0.001,    0.002,
>   0,    0.089,    0.016,    0.01,    0.103,    0.003,    0.01,
> 0.002,    0.008,    0.005,    0.014,    0.1,    0.007,    0.009,
> 0.011,    -0.001,    0,    0.002,    0.015,    0.036,    0.018,
> 0.026,    0.009,    0.008,    0.004,    0.001,    0.014,    0.009,
> 0.1,    0.026,    0.032,    0.008,    0.011,    0.004,    0.013,
> 0.019,    0.004,    0.02,    0.015,    0.005,    0.013,    -0.001,
> 0.013,    0.012,    0,    0.01,    0.002,    0.001,    0.013,
> 0.066,    0.009,    0.005,    0.002,    0.013,    0.025,    0.006,
> 0,    0,    0.015,    0.121,    0.006,    0.003,    0.008,    0,
> 0.012,    0.011,    0.003,    0.022,    0.008,    0.032,    0.007,
> 0.002,    0.006,    0.007,    0,    0.003,    0.11,    0.01,    0.008,
>    0,    0.018,    0.008,    0.001,    0.087,    0,    0.028,
> 0.011,    0.014,    0.007,    0.001,    0.018,    0.033,    0.021,
> 0.003,    0.003,    0.007,    -0.001,    0.07,    0.022,    0.009,
> 0.001,    0.007,    0.031,    0.008,    0.013,    0.01,    0.018,
> 0.125,    0.01,    0.015,    0.006,    0,    0.015,    0.019
> )
> z <- cbind(x, y)
> k <- kmeans(z, 2)
> plot(z, col=k$cluster)
>
> library(apcluster)
> m <- apclusterK(negDistMat(r=2), z, K=2, verbose=TRUE)
> plot(m, z)
>
> ______________________________________________
> R-help at r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.



More information about the R-help mailing list