[R] Plots with k-means

eduardo san miguel eduardosanmi at gmail.com
Mon Nov 2 21:43:29 CET 2009


I send r-code in an attached file.

2009/11/2 Iuri Gavronski <iuri at proxima.adm.br>:
> Eduardo,
>
> Would you mind sending me the R code in an attached file. Your code didn't
> work here and I am not sure it is because of line breaks from the email
> program.
>
> Iuri.
>
> On Mon, Nov 2, 2009 at 10:53 AM, eduardo san miguel <
eduardosanmi at gmail.com>
> wrote:
>>
>> Hello all,
>>
>> I have almost finished the development of a new package where ideas
>> from Tamara Munzner, George Furnas and Costa and Venturini are
>> implemented.
>>
>> 1.- Da Costa, David & Venturini, Gilles (2006). An Interactive
>> Visualization Environment for Data Exploration Using Points of
>> Interest. adma 2006: 416-423
>>
>> 2.- Furnas, George (1986). Generalized Fisheye Views. Human Factors in
>> computing systems, CHI '86 conference proceedings, ACM, New York, pp.
>> 16-23.
>>
>> 3.- Heidi Lam, Ronald A. Rensink, and Tamara Munzner (2006). Effects
>> of 2D Geometric Transformations on Visual Memory. Proc. Applied
>> Perception in Graphics and Visualization (APGV 2006), 119-126, 2006.
>>
>> 4.- Keith Lau, Ron Rensink, and Tamara Munzner (2004). Perceptual
>> Invariance of Nonlinear Focus+Context Transformations. Proc. First
>> Symposium on Applied Perception in Graphics and Visualization (APGV
>> 04) 2004, pp 65-72.
>>
>> This is a sample with some basic functionality and a VERY BASIC
>> example with kmeans plotting.
>>
>> Comments will be greatly appreciated.
>>
>> Regards
>>
>> -- R CODE
>> require(methods)
>>
>>  setClass(Class = 'POI',
>>        representation(matrizSim = 'matrix',cos.query.docs = 'vector',
>>      wordsInQuery = 'ANY',docs = 'matrix', objeto = 'matrix', objetoC
>> = 'matrix',
>>      Pcoords = 'matrix', PcoordsFI = 'matrix', newPcoords = 'matrix',
>> newcoords = 'numeric' ,
>>      newcoords_1 = 'numeric',  M = 'numeric', poisTextCol =
>> 'character' , colores = 'vector' ,
>>      poisCircleCol = 'character' , linesCol = 'character', itemsCol =
>> 'character',
>>      LABELS =  'logical',  vscale = 'numeric',  hscale = 'numeric',
>> circleCol = 'character',
>>      plotCol = 'character',  itemsFamily = 'character',  lenteDefault
>> = 'numeric',
>>      zoomDefault = 'numeric' ,  rateDefault = 'numeric' ,
>> topKDefault = 'numeric'  ,
>>      pal = 'character',  selected = 'numeric' ,  circRadio =
>> 'numeric' , IncVscale = 'numeric',
>>      cgnsphrFont = 'numeric', xClick_old = 'numeric',  yClick_old =
>> 'numeric',
>>      wordsInQueryFull = 'character' ),
>>      prototype(cos.query.docs = 0, colores = 0, newcoords = 0,
>> newcoords_1 = 0, M = 3,
>>               vscale = 0.5 , hscale = 1.5 , circleCol = 'black' ,
>> itemsCol = 'white',
>>               poisTextCol =  '#fff5ee',  poisCircleCol = '#fff5ee',
>> linesCol = 'white',
>>               plotCol = 'black', itemsFamily = 'sans', lenteDefault =
>> 1, zoomDefault = 15 ,
>>               rateDefault = 0.1 , topKDefault = 25,  pal = 'topo' ,
>> selected = 1 ,
>>               circRadio = 0.25  , IncVscale = 0.05  ,  cgnsphrFont =
>> 1.01, LABELS = T)
>>  )
>>
>>  setGeneric("puntosMedios" ,
>>             function(Pcoords, detalle = 5){standardGeneric
>> ("puntosMedios")})
>>
>>  setMethod("puntosMedios" ,
>>            signature = "matrix",
>>            function(Pcoords, detalle = 5){
>>
>>  for (i in 1:detalle){
>>    new_pcoords = matrix(rep(0,4*nrow(Pcoords)), nrow = 2*
>> nrow(Pcoords), byrow = T )
>>    cont = 0
>>    for (i in 1:nrow(Pcoords)){
>>           if (i == nrow(Pcoords)) {
>>                cont = cont + 1
>>                new_pcoords[cont,] = Pcoords[i,]
>>                cont = cont + 1
>>                new_pcoords[cont,] = Pcoords[i,] -
>> ((Pcoords[i,]-Pcoords[1,])/2)
>>        }else{
>>                cont = cont + 1
>>                new_pcoords[cont,] = Pcoords[i,]
>>                cont = cont + 1
>>                new_pcoords[cont,] = Pcoords[i,] -
>> ((Pcoords[i,]-Pcoords[i+1,])/2)}}
>>    Pcoords = new_pcoords}
>>    return(Pcoords)
>>
>>   }
>>  )
>>
>>  setGeneric("fishIout" ,
>>             function(x, value){standardGeneric ("fishIout")})
>>
>>  setMethod("fishIout" ,
>>            signature = "numeric",
>>            function(x, value){
>>
>>  d = value
>>        if (x > 0){
>>                signo = 1
>>        }else{
>>                signo = -1
>>        }
>>        x = abs(x)
>>        return(signo*(-(x/((d*x)-d-1))))
>>   }
>>  )
>>
>>  setGeneric("fishIin" ,
>>             function(x, value){standardGeneric ("fishIin")})
>>
>>  setMethod("fishIin" ,
>>            signature = "numeric",
>>            function(x, value){
>>
>>  d = value
>>        if (x > 0){
>>                signo = 1
>>        }else{
>>                signo = -1
>>        }
>>        x = abs(x)
>>
>>        return(signo*(((d+1)*x)/(d*x+1)))
>>   }
>>  )
>>
>>  setGeneric("toPolar" ,
>>             function(x, y){standardGeneric ("toPolar")})
>>
>>  setMethod("toPolar" ,
>>            signature = "numeric",
>>            function(x, y){
>>
>>        t1 = atan2(y,x)
>>        rP = sqrt(x^2+y^2)
>>        return(c(t1 = t1,rP = rP))
>>
>>   }
>>  )
>>
>>  setGeneric("toCartesian" ,
>>             function(t1, rP){standardGeneric ("toCartesian")})
>>
>>  setMethod("toCartesian" ,
>>            signature = "numeric",
>>            function(t1, rP){
>>
>>        x1 = rP*cos(t1)
>>        y1 = rP*sin(t1)
>>        return(c(x = x1,y = y1))
>>
>>   }
>>  )
>>
>>  setGeneric("circulo" ,
>>             function(cx, cy, r, circleCol, PLOT =
>> TRUE){standardGeneric ("circulo")})
>>
>>  setMethod("circulo" ,
>>            signature = "numeric",
>>            function(cx, cy, r, circleCol, PLOT = TRUE){
>>
>>        t = seq(0,2*pi,length=100)
>>        circle = t(rbind(cx+sin(t)*r,cy+cos(t)*r))
>>        if (PLOT == TRUE)
>> plot(circle,type='l',,ylim=c(-1.15,1.15),xlim=c(-1.15,1.15),
>>                ann=FALSE, axes=F, col = circleCol)
>>        return(circle)
>>
>>   }
>>  )
>>
>>  setGeneric("circulin" ,
>>             function(cx, cy, r = 0.045,
>>                      objeto, col = 'blue', PLOT = TRUE, label = 0){
>>                      standardGeneric ("circulin")})
>>
>>  setMethod("circulin" ,
>>            signature = "ANY",
>>            function(cx, cy, r = 0.045, objeto, col = 'blue', PLOT =
>> TRUE, label = 0){
>>
>>        t = seq(0,2*pi,length=100)
>>        circle = t(rbind(cx+sin(t)*r,cy+cos(t)*r))
>>        points(circle,type='l', col = col)
>>        if (label != 0) text(cx,cy,label,cex = .7)
>>        insiders <-
>> apply(objeto,1,function(co)(cx-co[1])^2+(cy-co[2])^2<r^2)
>>  assign('insiders', insiders , envir = POI.env)
>>
>>   }
>>  )
>>
>>  setGeneric("addNoise" ,
>>             function(m, tamanyo = 0.01){standardGeneric ("addNoise")})
>>
>>  setMethod("addNoise" ,
>>            signature = "matrix",
>>            function(m, tamanyo = 0.01){
>>
>>        noise = function(m, t = tamanyo){
>>                ruido = rnorm(length(m), 0,t)
>>                return(m+ruido)
>>        }
>>        noised = noise(m)
>>        unicos = which(duplicated(m) == FALSE)
>>        m[-unicos,] = noised[-unicos,]
>>        return(m)
>>
>>   }
>>  )
>>
>>  setGeneric("toHiperbolico" ,
>>             function(objeto, M = 1 , cx = 0, cy = 0, r = 1){
>>             standardGeneric ("toHiperbolico")})
>>
>>  setMethod("toHiperbolico" ,
>>            signature = "matrix",
>>            function(objeto, M = 1 , cx = 0, cy = 0, r = 1){
>>
>>        insiders =
>> apply(objeto,1,function(co)(cx-co[1])^2+(cy-co[2])^2<r^2)
>>        outers = which(insiders < 1)
>>        objetoP = matrix(toPolar(objeto[,1],objeto[,2]),nc=2)
>>        if (length(outers)){
>>                        objetoP[outers,2] = 1
>>        }
>>        objetoP[,2] = sapply(objetoP[,2],fishIin,M)
>>        objetoC = matrix(toCartesian(objetoP[,1],objetoP[,2]),nc=2)
>>  return(list(objetoC = objetoC,
>>              objetoP = objetoP))
>>
>>   }
>>  )
>>
>>  setGeneric("POIcoords<-" , function(object, value){standardGeneric
>> ("POIcoords<-")})
>>
>>  setReplaceMethod( f ="POIcoords",
>>                   signature = 'POI',
>>                   definition = function(object, value){
>>                                   object at Pcoords <- value$Pcoords
>>                                   object at PcoordsFI <- value$PcoordsFI
>>                                   object at newPcoords <- value$newPcoords
>>                                   object at objeto <- value$objeto
>>
>>                                   return(object)
>>                                }
>>  )
>>
>>  setGeneric("POICalc" ,
>>             function(objeto, NC, cx=0, cy=0, r=1,
>> ...){standardGeneric ("POICalc")})
>>
>>  setMethod("POICalc" ,
>>            signature = "POI",
>>            function(objeto, NC, cx=0, cy=0, r=1, ...){
>>
>>   MatrizSim = objeto at matrizSim
>>   secuencia = seq(2/NC,2,2/NC)
>>   Pcoords = matrix(rep(0,NC*2),nc=2)
>>   n = 1
>>   for (i in secuencia){
>>      Pcoords[n,] = c(r * cos(i*pi), r * sin(i*pi))
>>      n = n+1
>>   }
>>   PcoordsFI = matrix(toPolar(Pcoords[,1],Pcoords[,2]),nc=2)
>>   PcoordsFI[,2] = PcoordsFI[,2]+.15
>>   PcoordsFI = matrix(toCartesian(PcoordsFI[,1],PcoordsFI[,2]),nc=2)
>>
>>   if (nrow(Pcoords) != 1){
>>   newPcoords = puntosMedios(Pcoords)
>>   } else {
>>      newPcoords = Pcoords
>>   }
>>
>>   MatrizSim[is.nan(MatrizSim/rowSums(MatrizSim))] <- 0
>>
>>   W = MatrizSim / rowSums(MatrizSim)
>>   W[is.nan(W)] <- 0
>>   nwords = nrow(W)
>>   objeto = matrix(rep(0,2*nwords),nc=2)
>>   for (j in 1:nwords){
>>      for (nPOI in 1:NC){
>>         objeto[j,1] = objeto[j,1]+(W[j,nPOI]*Pcoords[nPOI,1])
>>         objeto[j,2] = objeto[j,2]+(W[j,nPOI]*Pcoords[nPOI,2])
>>      }
>>   }
>>
>>   objeto = addNoise(objeto)
>>
>>   return(list(Pcoords = Pcoords,
>>               PcoordsFI = PcoordsFI,
>>               newPcoords = newPcoords,
>>               objeto = objeto))
>>
>>   }
>>  )
>>
>>  setGeneric("POIPlot" ,
>>             function(POI){standardGeneric ("POIPlot")})
>>
>>  setMethod("POIPlot" ,
>>            signature = "POI",
>>            function(POI){
>>
>>   par(bg=POI at plotCol, mar = c(0.1,0.1,0.1,0.1), family = POI at itemsFamily)
>>
>>
>>   if (exists('POI.env')) {
>>      if (exists('POI', envir = POI.env)) {
>>        POI <- get('POI', envir = POI.env)
>>      }
>>   }
>>
>>   selected = POI at selected
>>   objeto = POI at objeto
>>   newcoords = POI at newcoords
>>   newcoords_1 = POI at newcoords_1
>>   NC = length(POI at wordsInQuery)
>>   cx=0
>>   cy=0
>>   r=1
>>   etiq2 = POI at docs[,1]
>>   etiq = POI at wordsInQuery
>>   fishEYE = TRUE
>>   M = POI at M
>>   poisTextCol = POI at poisTextCol
>>   colores = POI at colores[POI at docs]
>>   poisCircleCol = POI at poisCircleCol
>>   linesCol = POI at linesCol
>>   itemsCol = POI at itemsCol
>>   circleCol = POI at circleCol
>>   LABELS =  POI at LABELS
>>   Pcoords = POI at Pcoords
>>   newPcoords = POI at newPcoords
>>   cgnsphrFont = POI at cgnsphrFont
>>
>>   newcoords_par = newcoords
>>
>>   newcoords_Pcoords = matrix(rep( c(newcoords,newcoords_1 ),
>>                              nrow(Pcoords)),nc=2,byrow=TRUE)
>>
>>   newcoords_puntosMediosPcoords = matrix(rep( c(newcoords,newcoords_1),
>>
>>  nrow(newPcoords)),nc=2,byrow=TRUE)
>>
>>   newcoords = matrix(rep( c(newcoords,newcoords_1),
>>                      nrow(objeto)),nc=2,byrow=TRUE)
>>
>>   objeto = objeto+newcoords
>>   objetoH = toHiperbolico(objeto, M)
>>   objetoC = objetoH$objetoC
>>   objetoP = objetoH$objetoP
>>
>>   Pcoords = Pcoords + newcoords_Pcoords
>>   PcoordsH = toHiperbolico(Pcoords, M)
>>   PcoordsC = PcoordsH$objetoC
>>   PcoordsP = PcoordsH$objetoP
>>
>>   newPcoords = newPcoords + newcoords_puntosMediosPcoords
>>   newPcoordsH = toHiperbolico(newPcoords, M)
>>   Pcoords_objetoC = newPcoordsH$objetoC
>>
>>   if (LABELS) {
>>      PcoordsFI = matrix(toPolar(PcoordsC[,1],PcoordsC[,2]),nc=2)
>>      PcoordsFI[,2] = 1 +.15
>>      PcoordsFI = matrix(toCartesian(PcoordsFI[,1],PcoordsFI[,2]),nc=2)
>>   }
>>
>>   plot(circulo(0,0,1, circleCol, PLOT =
>> FALSE),cex=.5,ylim=c(-1.15,1.15),xlim=c(-1.15,1.15),
>>                  ann=FALSE, axes=F,type='l', col = circleCol)
>>
>>   points(objetoC, pch=19, col = colores, cex = 1.5 - objetoP[,2])
>>
>>   text(objetoC[,1], objetoC[,2], labels = etiq2, cex = cgnsphrFont -
>> objetoP[,2],
>>        pos = 3, col = itemsCol)
>>
>>   abline(h = cx, col = 'grey', lty = 'dashed')
>>   abline(v = cy, col = 'grey', lty = 'dashed')
>>
>>
>>   points(PcoordsC,cex = 2, col = poisCircleCol)
>>
>>   lines(Pcoords_objetoC, col = linesCol)
>>
>>
>>
segments(Pcoords_objetoC[nrow(Pcoords_objetoC),1],Pcoords_objetoC[nrow(Pcoords_objetoC),2],
>>            Pcoords_objetoC[1,1],Pcoords_objetoC[1,2], col = linesCol)
>>
>>   if (LABELS) {
>>      text(PcoordsFI[,1],PcoordsFI[,2],toupper(etiq),cex=.75, col =
>> poisTextCol)
>>   }
>>
>>   if (selected != 1) {
>>      circulin(0,0, .5, objeto = objetoC)   # probando
>>   }
>>
>>   if (!exists('POI.env')){
>>      POI.env <<- new.env()
>>   }
>>   poiCOPY = POI
>>   poiCOPY at objeto <- objeto
>>   poiCOPY at objetoC <- objetoC
>>   poiCOPY at newPcoords <- newPcoords
>>   poiCOPY at Pcoords <- Pcoords
>>   assign('POI',poiCOPY , envir = POI.env)
>>
>>   }
>>  )
>>
>>
>> # *strong*VERY*strong* basic kmeans example with 6 clusters and 10
>> variables
>> x <- matrix(rnorm(100, mean = 1, sd = .3), ncol = 10)
>> x <- rbind(x,matrix(rnorm(200, mean = 5, sd = .3), ncol = 10))
>> x <- rbind(x,matrix(rnorm(100, mean = 10, sd = .3), ncol = 10))
>> x <- rbind(x,matrix(rnorm(100, mean = 15, sd = .3), ncol = 10))
>> x <- rbind(x,matrix(rnorm(200, mean = 20, sd = .3), ncol = 10))
>> x <- rbind(x,matrix(rnorm(100, mean = 25, sd = .3), ncol = 10))
>>
>> cl <- kmeans(x, 6, iter.max = 100 ,nstart = 25)
>>
>> # *strong*VERY*strong* basic way of reordering cluster output for
>> better plotting
>> # here we reorder using just the first cluster
>> reorder.cl <- as.numeric(names(sort(rank((as.matrix(dist(cl$centers,
>> diag = T)))[,1]))))
>> cl$centers <- cl$centers[reorder.cl, ]
>> cl$size    <- cl$size[reorder.cl]
>>
>> # distance matrix between each element and its cluster center
>> matrizSim = matrix(rep(0, nrow(cl$centers) * nrow(x)), ncol =
>> nrow(cl$centers))
>> for (n in 1:nrow(cl$centers)){
>>  for (i in 1:nrow(x)) {
>>    a = x[i,]
>>    b = cl$centers[n,]
>>    matrizSim[[i,n]] = dist(rbind(a,b)) # eucl dist
>>  }
>> }
>>
>> # From dist to similarity (0 - 1)
>> matrizSim = 1 - (matrizSim / rowSums(matrizSim) )
>> # exagerate similarity
>> matrizSim  = matrizSim^3
>>
>> # Create POI plot
>> clusterPOI = new('POI')
>> clusterPOI at M = 1          # no fisheye distorsion
>> clusterPOI at matrizSim <- matrizSim
>> clusterPOI at wordsInQuery <- paste('"',
>> as.character(round(cl$centers[,1]),2),'"', '
>> size',as.character(cl$size))
>> POIcoords(clusterPOI) <- POICalc(clusterPOI
>> ,length(clusterPOI at wordsInQuery))
>> clusterPOI at docs <-
>>
>> cbind(matrix(seq(1:nrow(clusterPOI at objeto
))),matrix(seq(1:nrow(clusterPOI at objeto))))
>> clusterPOI at colores <- cl$cluster  + 1
>> clusterPOI at cos.query.docs <- rep(1, length(cl$cluster))
>> POI.env <<- new.env()
>> POIPlot(clusterPOI)
>
>


More information about the R-help mailing list