[R] Plots with k-means
eduardo san miguel
eduardosanmi at gmail.com
Mon Nov 2 21:43:29 CET 2009
I send r-code in an attached file.
2009/11/2 Iuri Gavronski <iuri at proxima.adm.br>:
> Eduardo,
>
> Would you mind sending me the R code in an attached file. Your code didn't
> work here and I am not sure it is because of line breaks from the email
> program.
>
> Iuri.
>
> On Mon, Nov 2, 2009 at 10:53 AM, eduardo san miguel <
eduardosanmi at gmail.com>
> wrote:
>>
>> Hello all,
>>
>> I have almost finished the development of a new package where ideas
>> from Tamara Munzner, George Furnas and Costa and Venturini are
>> implemented.
>>
>> 1.- Da Costa, David & Venturini, Gilles (2006). An Interactive
>> Visualization Environment for Data Exploration Using Points of
>> Interest. adma 2006: 416-423
>>
>> 2.- Furnas, George (1986). Generalized Fisheye Views. Human Factors in
>> computing systems, CHI '86 conference proceedings, ACM, New York, pp.
>> 16-23.
>>
>> 3.- Heidi Lam, Ronald A. Rensink, and Tamara Munzner (2006). Effects
>> of 2D Geometric Transformations on Visual Memory. Proc. Applied
>> Perception in Graphics and Visualization (APGV 2006), 119-126, 2006.
>>
>> 4.- Keith Lau, Ron Rensink, and Tamara Munzner (2004). Perceptual
>> Invariance of Nonlinear Focus+Context Transformations. Proc. First
>> Symposium on Applied Perception in Graphics and Visualization (APGV
>> 04) 2004, pp 65-72.
>>
>> This is a sample with some basic functionality and a VERY BASIC
>> example with kmeans plotting.
>>
>> Comments will be greatly appreciated.
>>
>> Regards
>>
>> -- R CODE
>> require(methods)
>>
>> setClass(Class = 'POI',
>> representation(matrizSim = 'matrix',cos.query.docs = 'vector',
>> wordsInQuery = 'ANY',docs = 'matrix', objeto = 'matrix', objetoC
>> = 'matrix',
>> Pcoords = 'matrix', PcoordsFI = 'matrix', newPcoords = 'matrix',
>> newcoords = 'numeric' ,
>> newcoords_1 = 'numeric', M = 'numeric', poisTextCol =
>> 'character' , colores = 'vector' ,
>> poisCircleCol = 'character' , linesCol = 'character', itemsCol =
>> 'character',
>> LABELS = 'logical', vscale = 'numeric', hscale = 'numeric',
>> circleCol = 'character',
>> plotCol = 'character', itemsFamily = 'character', lenteDefault
>> = 'numeric',
>> zoomDefault = 'numeric' , rateDefault = 'numeric' ,
>> topKDefault = 'numeric' ,
>> pal = 'character', selected = 'numeric' , circRadio =
>> 'numeric' , IncVscale = 'numeric',
>> cgnsphrFont = 'numeric', xClick_old = 'numeric', yClick_old =
>> 'numeric',
>> wordsInQueryFull = 'character' ),
>> prototype(cos.query.docs = 0, colores = 0, newcoords = 0,
>> newcoords_1 = 0, M = 3,
>> vscale = 0.5 , hscale = 1.5 , circleCol = 'black' ,
>> itemsCol = 'white',
>> poisTextCol = '#fff5ee', poisCircleCol = '#fff5ee',
>> linesCol = 'white',
>> plotCol = 'black', itemsFamily = 'sans', lenteDefault =
>> 1, zoomDefault = 15 ,
>> rateDefault = 0.1 , topKDefault = 25, pal = 'topo' ,
>> selected = 1 ,
>> circRadio = 0.25 , IncVscale = 0.05 , cgnsphrFont =
>> 1.01, LABELS = T)
>> )
>>
>> setGeneric("puntosMedios" ,
>> function(Pcoords, detalle = 5){standardGeneric
>> ("puntosMedios")})
>>
>> setMethod("puntosMedios" ,
>> signature = "matrix",
>> function(Pcoords, detalle = 5){
>>
>> for (i in 1:detalle){
>> new_pcoords = matrix(rep(0,4*nrow(Pcoords)), nrow = 2*
>> nrow(Pcoords), byrow = T )
>> cont = 0
>> for (i in 1:nrow(Pcoords)){
>> if (i == nrow(Pcoords)) {
>> cont = cont + 1
>> new_pcoords[cont,] = Pcoords[i,]
>> cont = cont + 1
>> new_pcoords[cont,] = Pcoords[i,] -
>> ((Pcoords[i,]-Pcoords[1,])/2)
>> }else{
>> cont = cont + 1
>> new_pcoords[cont,] = Pcoords[i,]
>> cont = cont + 1
>> new_pcoords[cont,] = Pcoords[i,] -
>> ((Pcoords[i,]-Pcoords[i+1,])/2)}}
>> Pcoords = new_pcoords}
>> return(Pcoords)
>>
>> }
>> )
>>
>> setGeneric("fishIout" ,
>> function(x, value){standardGeneric ("fishIout")})
>>
>> setMethod("fishIout" ,
>> signature = "numeric",
>> function(x, value){
>>
>> d = value
>> if (x > 0){
>> signo = 1
>> }else{
>> signo = -1
>> }
>> x = abs(x)
>> return(signo*(-(x/((d*x)-d-1))))
>> }
>> )
>>
>> setGeneric("fishIin" ,
>> function(x, value){standardGeneric ("fishIin")})
>>
>> setMethod("fishIin" ,
>> signature = "numeric",
>> function(x, value){
>>
>> d = value
>> if (x > 0){
>> signo = 1
>> }else{
>> signo = -1
>> }
>> x = abs(x)
>>
>> return(signo*(((d+1)*x)/(d*x+1)))
>> }
>> )
>>
>> setGeneric("toPolar" ,
>> function(x, y){standardGeneric ("toPolar")})
>>
>> setMethod("toPolar" ,
>> signature = "numeric",
>> function(x, y){
>>
>> t1 = atan2(y,x)
>> rP = sqrt(x^2+y^2)
>> return(c(t1 = t1,rP = rP))
>>
>> }
>> )
>>
>> setGeneric("toCartesian" ,
>> function(t1, rP){standardGeneric ("toCartesian")})
>>
>> setMethod("toCartesian" ,
>> signature = "numeric",
>> function(t1, rP){
>>
>> x1 = rP*cos(t1)
>> y1 = rP*sin(t1)
>> return(c(x = x1,y = y1))
>>
>> }
>> )
>>
>> setGeneric("circulo" ,
>> function(cx, cy, r, circleCol, PLOT =
>> TRUE){standardGeneric ("circulo")})
>>
>> setMethod("circulo" ,
>> signature = "numeric",
>> function(cx, cy, r, circleCol, PLOT = TRUE){
>>
>> t = seq(0,2*pi,length=100)
>> circle = t(rbind(cx+sin(t)*r,cy+cos(t)*r))
>> if (PLOT == TRUE)
>> plot(circle,type='l',,ylim=c(-1.15,1.15),xlim=c(-1.15,1.15),
>> ann=FALSE, axes=F, col = circleCol)
>> return(circle)
>>
>> }
>> )
>>
>> setGeneric("circulin" ,
>> function(cx, cy, r = 0.045,
>> objeto, col = 'blue', PLOT = TRUE, label = 0){
>> standardGeneric ("circulin")})
>>
>> setMethod("circulin" ,
>> signature = "ANY",
>> function(cx, cy, r = 0.045, objeto, col = 'blue', PLOT =
>> TRUE, label = 0){
>>
>> t = seq(0,2*pi,length=100)
>> circle = t(rbind(cx+sin(t)*r,cy+cos(t)*r))
>> points(circle,type='l', col = col)
>> if (label != 0) text(cx,cy,label,cex = .7)
>> insiders <-
>> apply(objeto,1,function(co)(cx-co[1])^2+(cy-co[2])^2<r^2)
>> assign('insiders', insiders , envir = POI.env)
>>
>> }
>> )
>>
>> setGeneric("addNoise" ,
>> function(m, tamanyo = 0.01){standardGeneric ("addNoise")})
>>
>> setMethod("addNoise" ,
>> signature = "matrix",
>> function(m, tamanyo = 0.01){
>>
>> noise = function(m, t = tamanyo){
>> ruido = rnorm(length(m), 0,t)
>> return(m+ruido)
>> }
>> noised = noise(m)
>> unicos = which(duplicated(m) == FALSE)
>> m[-unicos,] = noised[-unicos,]
>> return(m)
>>
>> }
>> )
>>
>> setGeneric("toHiperbolico" ,
>> function(objeto, M = 1 , cx = 0, cy = 0, r = 1){
>> standardGeneric ("toHiperbolico")})
>>
>> setMethod("toHiperbolico" ,
>> signature = "matrix",
>> function(objeto, M = 1 , cx = 0, cy = 0, r = 1){
>>
>> insiders =
>> apply(objeto,1,function(co)(cx-co[1])^2+(cy-co[2])^2<r^2)
>> outers = which(insiders < 1)
>> objetoP = matrix(toPolar(objeto[,1],objeto[,2]),nc=2)
>> if (length(outers)){
>> objetoP[outers,2] = 1
>> }
>> objetoP[,2] = sapply(objetoP[,2],fishIin,M)
>> objetoC = matrix(toCartesian(objetoP[,1],objetoP[,2]),nc=2)
>> return(list(objetoC = objetoC,
>> objetoP = objetoP))
>>
>> }
>> )
>>
>> setGeneric("POIcoords<-" , function(object, value){standardGeneric
>> ("POIcoords<-")})
>>
>> setReplaceMethod( f ="POIcoords",
>> signature = 'POI',
>> definition = function(object, value){
>> object at Pcoords <- value$Pcoords
>> object at PcoordsFI <- value$PcoordsFI
>> object at newPcoords <- value$newPcoords
>> object at objeto <- value$objeto
>>
>> return(object)
>> }
>> )
>>
>> setGeneric("POICalc" ,
>> function(objeto, NC, cx=0, cy=0, r=1,
>> ...){standardGeneric ("POICalc")})
>>
>> setMethod("POICalc" ,
>> signature = "POI",
>> function(objeto, NC, cx=0, cy=0, r=1, ...){
>>
>> MatrizSim = objeto at matrizSim
>> secuencia = seq(2/NC,2,2/NC)
>> Pcoords = matrix(rep(0,NC*2),nc=2)
>> n = 1
>> for (i in secuencia){
>> Pcoords[n,] = c(r * cos(i*pi), r * sin(i*pi))
>> n = n+1
>> }
>> PcoordsFI = matrix(toPolar(Pcoords[,1],Pcoords[,2]),nc=2)
>> PcoordsFI[,2] = PcoordsFI[,2]+.15
>> PcoordsFI = matrix(toCartesian(PcoordsFI[,1],PcoordsFI[,2]),nc=2)
>>
>> if (nrow(Pcoords) != 1){
>> newPcoords = puntosMedios(Pcoords)
>> } else {
>> newPcoords = Pcoords
>> }
>>
>> MatrizSim[is.nan(MatrizSim/rowSums(MatrizSim))] <- 0
>>
>> W = MatrizSim / rowSums(MatrizSim)
>> W[is.nan(W)] <- 0
>> nwords = nrow(W)
>> objeto = matrix(rep(0,2*nwords),nc=2)
>> for (j in 1:nwords){
>> for (nPOI in 1:NC){
>> objeto[j,1] = objeto[j,1]+(W[j,nPOI]*Pcoords[nPOI,1])
>> objeto[j,2] = objeto[j,2]+(W[j,nPOI]*Pcoords[nPOI,2])
>> }
>> }
>>
>> objeto = addNoise(objeto)
>>
>> return(list(Pcoords = Pcoords,
>> PcoordsFI = PcoordsFI,
>> newPcoords = newPcoords,
>> objeto = objeto))
>>
>> }
>> )
>>
>> setGeneric("POIPlot" ,
>> function(POI){standardGeneric ("POIPlot")})
>>
>> setMethod("POIPlot" ,
>> signature = "POI",
>> function(POI){
>>
>> par(bg=POI at plotCol, mar = c(0.1,0.1,0.1,0.1), family = POI at itemsFamily)
>>
>>
>> if (exists('POI.env')) {
>> if (exists('POI', envir = POI.env)) {
>> POI <- get('POI', envir = POI.env)
>> }
>> }
>>
>> selected = POI at selected
>> objeto = POI at objeto
>> newcoords = POI at newcoords
>> newcoords_1 = POI at newcoords_1
>> NC = length(POI at wordsInQuery)
>> cx=0
>> cy=0
>> r=1
>> etiq2 = POI at docs[,1]
>> etiq = POI at wordsInQuery
>> fishEYE = TRUE
>> M = POI at M
>> poisTextCol = POI at poisTextCol
>> colores = POI at colores[POI at docs]
>> poisCircleCol = POI at poisCircleCol
>> linesCol = POI at linesCol
>> itemsCol = POI at itemsCol
>> circleCol = POI at circleCol
>> LABELS = POI at LABELS
>> Pcoords = POI at Pcoords
>> newPcoords = POI at newPcoords
>> cgnsphrFont = POI at cgnsphrFont
>>
>> newcoords_par = newcoords
>>
>> newcoords_Pcoords = matrix(rep( c(newcoords,newcoords_1 ),
>> nrow(Pcoords)),nc=2,byrow=TRUE)
>>
>> newcoords_puntosMediosPcoords = matrix(rep( c(newcoords,newcoords_1),
>>
>> nrow(newPcoords)),nc=2,byrow=TRUE)
>>
>> newcoords = matrix(rep( c(newcoords,newcoords_1),
>> nrow(objeto)),nc=2,byrow=TRUE)
>>
>> objeto = objeto+newcoords
>> objetoH = toHiperbolico(objeto, M)
>> objetoC = objetoH$objetoC
>> objetoP = objetoH$objetoP
>>
>> Pcoords = Pcoords + newcoords_Pcoords
>> PcoordsH = toHiperbolico(Pcoords, M)
>> PcoordsC = PcoordsH$objetoC
>> PcoordsP = PcoordsH$objetoP
>>
>> newPcoords = newPcoords + newcoords_puntosMediosPcoords
>> newPcoordsH = toHiperbolico(newPcoords, M)
>> Pcoords_objetoC = newPcoordsH$objetoC
>>
>> if (LABELS) {
>> PcoordsFI = matrix(toPolar(PcoordsC[,1],PcoordsC[,2]),nc=2)
>> PcoordsFI[,2] = 1 +.15
>> PcoordsFI = matrix(toCartesian(PcoordsFI[,1],PcoordsFI[,2]),nc=2)
>> }
>>
>> plot(circulo(0,0,1, circleCol, PLOT =
>> FALSE),cex=.5,ylim=c(-1.15,1.15),xlim=c(-1.15,1.15),
>> ann=FALSE, axes=F,type='l', col = circleCol)
>>
>> points(objetoC, pch=19, col = colores, cex = 1.5 - objetoP[,2])
>>
>> text(objetoC[,1], objetoC[,2], labels = etiq2, cex = cgnsphrFont -
>> objetoP[,2],
>> pos = 3, col = itemsCol)
>>
>> abline(h = cx, col = 'grey', lty = 'dashed')
>> abline(v = cy, col = 'grey', lty = 'dashed')
>>
>>
>> points(PcoordsC,cex = 2, col = poisCircleCol)
>>
>> lines(Pcoords_objetoC, col = linesCol)
>>
>>
>>
segments(Pcoords_objetoC[nrow(Pcoords_objetoC),1],Pcoords_objetoC[nrow(Pcoords_objetoC),2],
>> Pcoords_objetoC[1,1],Pcoords_objetoC[1,2], col = linesCol)
>>
>> if (LABELS) {
>> text(PcoordsFI[,1],PcoordsFI[,2],toupper(etiq),cex=.75, col =
>> poisTextCol)
>> }
>>
>> if (selected != 1) {
>> circulin(0,0, .5, objeto = objetoC) # probando
>> }
>>
>> if (!exists('POI.env')){
>> POI.env <<- new.env()
>> }
>> poiCOPY = POI
>> poiCOPY at objeto <- objeto
>> poiCOPY at objetoC <- objetoC
>> poiCOPY at newPcoords <- newPcoords
>> poiCOPY at Pcoords <- Pcoords
>> assign('POI',poiCOPY , envir = POI.env)
>>
>> }
>> )
>>
>>
>> # *strong*VERY*strong* basic kmeans example with 6 clusters and 10
>> variables
>> x <- matrix(rnorm(100, mean = 1, sd = .3), ncol = 10)
>> x <- rbind(x,matrix(rnorm(200, mean = 5, sd = .3), ncol = 10))
>> x <- rbind(x,matrix(rnorm(100, mean = 10, sd = .3), ncol = 10))
>> x <- rbind(x,matrix(rnorm(100, mean = 15, sd = .3), ncol = 10))
>> x <- rbind(x,matrix(rnorm(200, mean = 20, sd = .3), ncol = 10))
>> x <- rbind(x,matrix(rnorm(100, mean = 25, sd = .3), ncol = 10))
>>
>> cl <- kmeans(x, 6, iter.max = 100 ,nstart = 25)
>>
>> # *strong*VERY*strong* basic way of reordering cluster output for
>> better plotting
>> # here we reorder using just the first cluster
>> reorder.cl <- as.numeric(names(sort(rank((as.matrix(dist(cl$centers,
>> diag = T)))[,1]))))
>> cl$centers <- cl$centers[reorder.cl, ]
>> cl$size <- cl$size[reorder.cl]
>>
>> # distance matrix between each element and its cluster center
>> matrizSim = matrix(rep(0, nrow(cl$centers) * nrow(x)), ncol =
>> nrow(cl$centers))
>> for (n in 1:nrow(cl$centers)){
>> for (i in 1:nrow(x)) {
>> a = x[i,]
>> b = cl$centers[n,]
>> matrizSim[[i,n]] = dist(rbind(a,b)) # eucl dist
>> }
>> }
>>
>> # From dist to similarity (0 - 1)
>> matrizSim = 1 - (matrizSim / rowSums(matrizSim) )
>> # exagerate similarity
>> matrizSim = matrizSim^3
>>
>> # Create POI plot
>> clusterPOI = new('POI')
>> clusterPOI at M = 1 # no fisheye distorsion
>> clusterPOI at matrizSim <- matrizSim
>> clusterPOI at wordsInQuery <- paste('"',
>> as.character(round(cl$centers[,1]),2),'"', '
>> size',as.character(cl$size))
>> POIcoords(clusterPOI) <- POICalc(clusterPOI
>> ,length(clusterPOI at wordsInQuery))
>> clusterPOI at docs <-
>>
>> cbind(matrix(seq(1:nrow(clusterPOI at objeto
))),matrix(seq(1:nrow(clusterPOI at objeto))))
>> clusterPOI at colores <- cl$cluster + 1
>> clusterPOI at cos.query.docs <- rep(1, length(cl$cluster))
>> POI.env <<- new.env()
>> POIPlot(clusterPOI)
>
>
More information about the R-help
mailing list