[R] Plots with k-means
eduardo san miguel
eduardosanmi at gmail.com
Mon Nov 2 13:53:16 CET 2009
Hello all,
I have almost finished the development of a new package where ideas
from Tamara Munzner, George Furnas and Costa and Venturini are
implemented.
1.- Da Costa, David & Venturini, Gilles (2006). An Interactive
Visualization Environment for Data Exploration Using Points of
Interest. adma 2006: 416-423
2.- Furnas, George (1986). Generalized Fisheye Views. Human Factors in
computing systems, CHI '86 conference proceedings, ACM, New York, pp.
16-23.
3.- Heidi Lam, Ronald A. Rensink, and Tamara Munzner (2006). Effects
of 2D Geometric Transformations on Visual Memory. Proc. Applied
Perception in Graphics and Visualization (APGV 2006), 119-126, 2006.
4.- Keith Lau, Ron Rensink, and Tamara Munzner (2004). Perceptual
Invariance of Nonlinear Focus+Context Transformations. Proc. First
Symposium on Applied Perception in Graphics and Visualization (APGV
04) 2004, pp 65-72.
This is a sample with some basic functionality and a VERY BASIC
example with kmeans plotting.
Comments will be greatly appreciated.
Regards
-- R CODE
require(methods)
setClass(Class = 'POI',
representation(matrizSim = 'matrix',cos.query.docs = 'vector',
wordsInQuery = 'ANY',docs = 'matrix', objeto = 'matrix', objetoC
= 'matrix',
Pcoords = 'matrix', PcoordsFI = 'matrix', newPcoords = 'matrix',
newcoords = 'numeric' ,
newcoords_1 = 'numeric', M = 'numeric', poisTextCol =
'character' , colores = 'vector' ,
poisCircleCol = 'character' , linesCol = 'character', itemsCol =
'character',
LABELS = 'logical', vscale = 'numeric', hscale = 'numeric',
circleCol = 'character',
plotCol = 'character', itemsFamily = 'character', lenteDefault
= 'numeric',
zoomDefault = 'numeric' , rateDefault = 'numeric' ,
topKDefault = 'numeric' ,
pal = 'character', selected = 'numeric' , circRadio =
'numeric' , IncVscale = 'numeric',
cgnsphrFont = 'numeric', xClick_old = 'numeric', yClick_old = 'numeric',
wordsInQueryFull = 'character' ),
prototype(cos.query.docs = 0, colores = 0, newcoords = 0,
newcoords_1 = 0, M = 3,
vscale = 0.5 , hscale = 1.5 , circleCol = 'black' ,
itemsCol = 'white',
poisTextCol = '#fff5ee', poisCircleCol = '#fff5ee',
linesCol = 'white',
plotCol = 'black', itemsFamily = 'sans', lenteDefault =
1, zoomDefault = 15 ,
rateDefault = 0.1 , topKDefault = 25, pal = 'topo' ,
selected = 1 ,
circRadio = 0.25 , IncVscale = 0.05 , cgnsphrFont =
1.01, LABELS = T)
)
setGeneric("puntosMedios" ,
function(Pcoords, detalle = 5){standardGeneric ("puntosMedios")})
setMethod("puntosMedios" ,
signature = "matrix",
function(Pcoords, detalle = 5){
for (i in 1:detalle){
new_pcoords = matrix(rep(0,4*nrow(Pcoords)), nrow = 2*
nrow(Pcoords), byrow = T )
cont = 0
for (i in 1:nrow(Pcoords)){
if (i == nrow(Pcoords)) {
cont = cont + 1
new_pcoords[cont,] = Pcoords[i,]
cont = cont + 1
new_pcoords[cont,] = Pcoords[i,] - ((Pcoords[i,]-Pcoords[1,])/2)
}else{
cont = cont + 1
new_pcoords[cont,] = Pcoords[i,]
cont = cont + 1
new_pcoords[cont,] = Pcoords[i,] - ((Pcoords[i,]-Pcoords[i+1,])/2)}}
Pcoords = new_pcoords}
return(Pcoords)
}
)
setGeneric("fishIout" ,
function(x, value){standardGeneric ("fishIout")})
setMethod("fishIout" ,
signature = "numeric",
function(x, value){
d = value
if (x > 0){
signo = 1
}else{
signo = -1
}
x = abs(x)
return(signo*(-(x/((d*x)-d-1))))
}
)
setGeneric("fishIin" ,
function(x, value){standardGeneric ("fishIin")})
setMethod("fishIin" ,
signature = "numeric",
function(x, value){
d = value
if (x > 0){
signo = 1
}else{
signo = -1
}
x = abs(x)
return(signo*(((d+1)*x)/(d*x+1)))
}
)
setGeneric("toPolar" ,
function(x, y){standardGeneric ("toPolar")})
setMethod("toPolar" ,
signature = "numeric",
function(x, y){
t1 = atan2(y,x)
rP = sqrt(x^2+y^2)
return(c(t1 = t1,rP = rP))
}
)
setGeneric("toCartesian" ,
function(t1, rP){standardGeneric ("toCartesian")})
setMethod("toCartesian" ,
signature = "numeric",
function(t1, rP){
x1 = rP*cos(t1)
y1 = rP*sin(t1)
return(c(x = x1,y = y1))
}
)
setGeneric("circulo" ,
function(cx, cy, r, circleCol, PLOT =
TRUE){standardGeneric ("circulo")})
setMethod("circulo" ,
signature = "numeric",
function(cx, cy, r, circleCol, PLOT = TRUE){
t = seq(0,2*pi,length=100)
circle = t(rbind(cx+sin(t)*r,cy+cos(t)*r))
if (PLOT == TRUE) plot(circle,type='l',,ylim=c(-1.15,1.15),xlim=c(-1.15,1.15),
ann=FALSE, axes=F, col = circleCol)
return(circle)
}
)
setGeneric("circulin" ,
function(cx, cy, r = 0.045,
objeto, col = 'blue', PLOT = TRUE, label = 0){
standardGeneric ("circulin")})
setMethod("circulin" ,
signature = "ANY",
function(cx, cy, r = 0.045, objeto, col = 'blue', PLOT =
TRUE, label = 0){
t = seq(0,2*pi,length=100)
circle = t(rbind(cx+sin(t)*r,cy+cos(t)*r))
points(circle,type='l', col = col)
if (label != 0) text(cx,cy,label,cex = .7)
insiders <- apply(objeto,1,function(co)(cx-co[1])^2+(cy-co[2])^2<r^2)
assign('insiders', insiders , envir = POI.env)
}
)
setGeneric("addNoise" ,
function(m, tamanyo = 0.01){standardGeneric ("addNoise")})
setMethod("addNoise" ,
signature = "matrix",
function(m, tamanyo = 0.01){
noise = function(m, t = tamanyo){
ruido = rnorm(length(m), 0,t)
return(m+ruido)
}
noised = noise(m)
unicos = which(duplicated(m) == FALSE)
m[-unicos,] = noised[-unicos,]
return(m)
}
)
setGeneric("toHiperbolico" ,
function(objeto, M = 1 , cx = 0, cy = 0, r = 1){
standardGeneric ("toHiperbolico")})
setMethod("toHiperbolico" ,
signature = "matrix",
function(objeto, M = 1 , cx = 0, cy = 0, r = 1){
insiders = apply(objeto,1,function(co)(cx-co[1])^2+(cy-co[2])^2<r^2)
outers = which(insiders < 1)
objetoP = matrix(toPolar(objeto[,1],objeto[,2]),nc=2)
if (length(outers)){
objetoP[outers,2] = 1
}
objetoP[,2] = sapply(objetoP[,2],fishIin,M)
objetoC = matrix(toCartesian(objetoP[,1],objetoP[,2]),nc=2)
return(list(objetoC = objetoC,
objetoP = objetoP))
}
)
setGeneric("POIcoords<-" , function(object, value){standardGeneric
("POIcoords<-")})
setReplaceMethod( f ="POIcoords",
signature = 'POI',
definition = function(object, value){
object at Pcoords <- value$Pcoords
object at PcoordsFI <- value$PcoordsFI
object at newPcoords <- value$newPcoords
object at objeto <- value$objeto
return(object)
}
)
setGeneric("POICalc" ,
function(objeto, NC, cx=0, cy=0, r=1,
...){standardGeneric ("POICalc")})
setMethod("POICalc" ,
signature = "POI",
function(objeto, NC, cx=0, cy=0, r=1, ...){
MatrizSim = objeto at matrizSim
secuencia = seq(2/NC,2,2/NC)
Pcoords = matrix(rep(0,NC*2),nc=2)
n = 1
for (i in secuencia){
Pcoords[n,] = c(r * cos(i*pi), r * sin(i*pi))
n = n+1
}
PcoordsFI = matrix(toPolar(Pcoords[,1],Pcoords[,2]),nc=2)
PcoordsFI[,2] = PcoordsFI[,2]+.15
PcoordsFI = matrix(toCartesian(PcoordsFI[,1],PcoordsFI[,2]),nc=2)
if (nrow(Pcoords) != 1){
newPcoords = puntosMedios(Pcoords)
} else {
newPcoords = Pcoords
}
MatrizSim[is.nan(MatrizSim/rowSums(MatrizSim))] <- 0
W = MatrizSim / rowSums(MatrizSim)
W[is.nan(W)] <- 0
nwords = nrow(W)
objeto = matrix(rep(0,2*nwords),nc=2)
for (j in 1:nwords){
for (nPOI in 1:NC){
objeto[j,1] = objeto[j,1]+(W[j,nPOI]*Pcoords[nPOI,1])
objeto[j,2] = objeto[j,2]+(W[j,nPOI]*Pcoords[nPOI,2])
}
}
objeto = addNoise(objeto)
return(list(Pcoords = Pcoords,
PcoordsFI = PcoordsFI,
newPcoords = newPcoords,
objeto = objeto))
}
)
setGeneric("POIPlot" ,
function(POI){standardGeneric ("POIPlot")})
setMethod("POIPlot" ,
signature = "POI",
function(POI){
par(bg=POI at plotCol, mar = c(0.1,0.1,0.1,0.1), family = POI at itemsFamily)
if (exists('POI.env')) {
if (exists('POI', envir = POI.env)) {
POI <- get('POI', envir = POI.env)
}
}
selected = POI at selected
objeto = POI at objeto
newcoords = POI at newcoords
newcoords_1 = POI at newcoords_1
NC = length(POI at wordsInQuery)
cx=0
cy=0
r=1
etiq2 = POI at docs[,1]
etiq = POI at wordsInQuery
fishEYE = TRUE
M = POI at M
poisTextCol = POI at poisTextCol
colores = POI at colores[POI at docs]
poisCircleCol = POI at poisCircleCol
linesCol = POI at linesCol
itemsCol = POI at itemsCol
circleCol = POI at circleCol
LABELS = POI at LABELS
Pcoords = POI at Pcoords
newPcoords = POI at newPcoords
cgnsphrFont = POI at cgnsphrFont
newcoords_par = newcoords
newcoords_Pcoords = matrix(rep( c(newcoords,newcoords_1 ),
nrow(Pcoords)),nc=2,byrow=TRUE)
newcoords_puntosMediosPcoords = matrix(rep( c(newcoords,newcoords_1),
nrow(newPcoords)),nc=2,byrow=TRUE)
newcoords = matrix(rep( c(newcoords,newcoords_1),
nrow(objeto)),nc=2,byrow=TRUE)
objeto = objeto+newcoords
objetoH = toHiperbolico(objeto, M)
objetoC = objetoH$objetoC
objetoP = objetoH$objetoP
Pcoords = Pcoords + newcoords_Pcoords
PcoordsH = toHiperbolico(Pcoords, M)
PcoordsC = PcoordsH$objetoC
PcoordsP = PcoordsH$objetoP
newPcoords = newPcoords + newcoords_puntosMediosPcoords
newPcoordsH = toHiperbolico(newPcoords, M)
Pcoords_objetoC = newPcoordsH$objetoC
if (LABELS) {
PcoordsFI = matrix(toPolar(PcoordsC[,1],PcoordsC[,2]),nc=2)
PcoordsFI[,2] = 1 +.15
PcoordsFI = matrix(toCartesian(PcoordsFI[,1],PcoordsFI[,2]),nc=2)
}
plot(circulo(0,0,1, circleCol, PLOT =
FALSE),cex=.5,ylim=c(-1.15,1.15),xlim=c(-1.15,1.15),
ann=FALSE, axes=F,type='l', col = circleCol)
points(objetoC, pch=19, col = colores, cex = 1.5 - objetoP[,2])
text(objetoC[,1], objetoC[,2], labels = etiq2, cex = cgnsphrFont -
objetoP[,2],
pos = 3, col = itemsCol)
abline(h = cx, col = 'grey', lty = 'dashed')
abline(v = cy, col = 'grey', lty = 'dashed')
points(PcoordsC,cex = 2, col = poisCircleCol)
lines(Pcoords_objetoC, col = linesCol)
segments(Pcoords_objetoC[nrow(Pcoords_objetoC),1],Pcoords_objetoC[nrow(Pcoords_objetoC),2],
Pcoords_objetoC[1,1],Pcoords_objetoC[1,2], col = linesCol)
if (LABELS) {
text(PcoordsFI[,1],PcoordsFI[,2],toupper(etiq),cex=.75, col = poisTextCol)
}
if (selected != 1) {
circulin(0,0, .5, objeto = objetoC) # probando
}
if (!exists('POI.env')){
POI.env <<- new.env()
}
poiCOPY = POI
poiCOPY at objeto <- objeto
poiCOPY at objetoC <- objetoC
poiCOPY at newPcoords <- newPcoords
poiCOPY at Pcoords <- Pcoords
assign('POI',poiCOPY , envir = POI.env)
}
)
# *strong*VERY*strong* basic kmeans example with 6 clusters and 10 variables
x <- matrix(rnorm(100, mean = 1, sd = .3), ncol = 10)
x <- rbind(x,matrix(rnorm(200, mean = 5, sd = .3), ncol = 10))
x <- rbind(x,matrix(rnorm(100, mean = 10, sd = .3), ncol = 10))
x <- rbind(x,matrix(rnorm(100, mean = 15, sd = .3), ncol = 10))
x <- rbind(x,matrix(rnorm(200, mean = 20, sd = .3), ncol = 10))
x <- rbind(x,matrix(rnorm(100, mean = 25, sd = .3), ncol = 10))
cl <- kmeans(x, 6, iter.max = 100 ,nstart = 25)
# *strong*VERY*strong* basic way of reordering cluster output for
better plotting
# here we reorder using just the first cluster
reorder.cl <- as.numeric(names(sort(rank((as.matrix(dist(cl$centers,
diag = T)))[,1]))))
cl$centers <- cl$centers[reorder.cl, ]
cl$size <- cl$size[reorder.cl]
# distance matrix between each element and its cluster center
matrizSim = matrix(rep(0, nrow(cl$centers) * nrow(x)), ncol = nrow(cl$centers))
for (n in 1:nrow(cl$centers)){
for (i in 1:nrow(x)) {
a = x[i,]
b = cl$centers[n,]
matrizSim[[i,n]] = dist(rbind(a,b)) # eucl dist
}
}
# From dist to similarity (0 - 1)
matrizSim = 1 - (matrizSim / rowSums(matrizSim) )
# exagerate similarity
matrizSim = matrizSim^3
# Create POI plot
clusterPOI = new('POI')
clusterPOI at M = 1 # no fisheye distorsion
clusterPOI at matrizSim <- matrizSim
clusterPOI at wordsInQuery <- paste('"',
as.character(round(cl$centers[,1]),2),'"', '
size',as.character(cl$size))
POIcoords(clusterPOI) <- POICalc(clusterPOI ,length(clusterPOI at wordsInQuery))
clusterPOI at docs <-
cbind(matrix(seq(1:nrow(clusterPOI at objeto))),matrix(seq(1:nrow(clusterPOI at objeto))))
clusterPOI at colores <- cl$cluster + 1
clusterPOI at cos.query.docs <- rep(1, length(cl$cluster))
POI.env <<- new.env()
POIPlot(clusterPOI)
More information about the R-help
mailing list