[R] graphically representing frequency of words in a speech?
Brown, Tony Nicholas
tony.n.brown at Vanderbilt.Edu
Sun Jun 7 23:02:47 CEST 2009
Thank you so much Mark and Gregor. The basic information, suggestions,
and R code that you provided is most helpful.
Tony
-----Original Message-----
From: Gorjanc Gregor [mailto:Gregor.Gorjanc at bfro.uni-lj.si]
Sent: Sunday, June 07, 2009 2:17 PM
To: Marc Schwartz; Brown, Tony Nicholas
Cc: rhelp help
Subject: RE: [R] graphically representing frequency of words in a
speech?
> The only thing that I found for R is by Gregor Gorjanc, but the
> information seems to be dated:
>
> http://www.bfro.uni-lj.si/MR/ggorjan/software/R/index.html#tagCloud
Hi,
Yes, I have tried to create a tag cloud plot in R, but I abandoned the
project
due to other things. The main obstacle was that in R we need to take
care of the fontsizes and placement of words, while this is very easy
with
say browsers, who do all the renderind. I tracked the last version of
the R file
which is pasted bellow. I must say that I do not remember the status of
the
code so use it as you wish. If anyone wishes to take this project
further, please
do so!
gg
### tagCloud.R
###---------------------------------------------------------------------
---
### What: Tag cloud plot functions
### Time-stamp: <2006-09-10 02:53:29 ggorjan>
###---------------------------------------------------------------------
---
tagCloud <- function(x, n=100, decreasing=TRUE,
threshold=NULL, fontsize=c(12, 36),
align=TRUE, expandRow=TRUE,
justRow="bottom", title,
textGpar=gpar(col="navy"),
rectGpar=gpar(col="white"),
titleGpar=gpar(), viewGpar=gpar(),
mar=c(1, 1, 1, 1))
{
UseMethod("tagCloud")
}
tagCloud.default <- function(x, n=100, decreasing=TRUE,
threshold=NULL, fontsize=c(12, 36),
align=TRUE, expandRow=TRUE,
justRow="bottom", title,
textGpar=gpar(col="navy"),
rectGpar=gpar(col="white"),
titleGpar=gpar(), viewGpar=gpar(),
mar=c(1, 1, 1, 1))
{
if(!is.null(dim(x))) stop("'x' must be a vector")
tagCloud.table(table(x), n=n, decreasing=decreasing,
fontsize=fontsize,
threshold=threshold, align=align, expandRow=expandRow,
justRow=justRow, title=title, textGpar=textGpar,
rectGpar=rectGpar, titleGpar=titleGpar,
viewGpar=viewGpar,
mar=mar)
}
tagCloud.table <- function(x, n=100, decreasing=TRUE,
threshold=NULL, fontsize=c(12, 36),
align=TRUE, expandRow=TRUE,
justRow="bottom", title,
textGpar=gpar(col="navy"),
rectGpar=gpar(col="white"),
titleGpar=gpar(), viewGpar=gpar(),
mar=c(1, 1, 1, 1))
{
## --- Check ---
if(length(dim(x)) != 1)
stop("'x' must be one dimensional table")
## --- Threshold ---
if(!is.null(threshold)) x <- x[x >= threshold]
## --- Number of units ---
N <- length(x) ## length of table
if(is.null(n)) { ## if n=NULL, plot all units
n <- N
} else {
if(n > N) n <- N ## if n is to big, decrease it
if(n < 1) n <- round(N * n) ## if n is percentage of units
}
fontsizeLength <- length(fontsize)
if(fontsizeLength != 2)
stop("'fontsize' must be of length two")
## --- Sort and subset ---
if(n < N) { ## only if we want to plot subset of units
tmp <- sort(x, decreasing=decreasing)
x <- x[names(x) %in% names(tmp[1:n])]
}
## --- Get relative freq ---
x <- prop.table(x)
## --- Fontsize ---
fontsizeDiff <- diff(fontsize)
xDiff <- max(x) - min(x)
if(xDiff != 0) {
off <- ifelse(fontsizeDiff > 0, min(x), max(x))
fontsize <- (x - off) / xDiff * fontsizeDiff + min(fontsize)
} else { ## all units have the same frequency
fontsize <- rep(min(fontsize), times=n)
}
## --- Viewport and rectangle ---
grid.newpage()
width <- unit(1, "npc")
height <- unit(1, "npc")
vp <- viewport(y=unit(mar[1], "lines"), x=unit(mar[2], "lines"), ,
width=width - unit(mar[2] + mar[4], "lines"),
height=height - unit(mar[1] + mar[3], "lines"),
just=c("left", "bottom"), gp=viewGpar, name="main")
pushViewport(vp)
if(!missing(title))
grid.text(title, y=height, gp=titleGpar, name="title")
grid.rect(gp=rectGpar, name="cloud")
## --- Grobs ---
tag <- vector(mode="list", length=4)
names(tag) <- c("fontsize", "grob", "width", "height")
tag[[1]] <- tag[[2]] <- tag[[3]] <- tag[[4]] <- vector(mode="list",
length=n)
for(i in 1:n) {
tag$fontsize[[i]] <- fontsize[i]
tag$grob[[i]] <- textGrob(names(x[i]),
gp=gpar(fontsize=fontsize[i]))
tag$width[[i]] <- convertWidth(grobWidth(tag$grob[[i]]),
unitTo="npc",
valueOnly=TRUE)
tag$height[[i]] <- convertHeight(grobHeight(tag$grob[[i]]),
unitTo="npc",
valueOnly=TRUE)
}
## --- Split lines ---
row <- colWidth <- vector(length=n)
row[1] <- 1
colWidth[1] <- 0
lineWidth <- tag$width[[1]]
j <- 1
gapWidth <- convertWidth(stringWidth(" "), unitTo="npc",
valueOnly=TRUE)
maxWidth <- convertWidth(width, unitTo="npc", valueOnly=TRUE)
for(i in 2:length(tag$width)) {
test <- lineWidth + gapWidth + tag$width[[i]]
if(test < maxWidth) {
row[i] <- row[i - 1]
colWidth[i] <- lineWidth + gapWidth
lineWidth <- test
j <- j + 1
} else {
if(align) { ## Align units in previous row
free <- maxWidth - lineWidth
if(j == 1) {
colWidth[i - 1] <- maxWidth / 2 - tag$width[[i - 1]] / 2
} else {
gapWidthAlign <- free / j
start <- i - (j - 1)
end <- start + j - 2
colWidth[start:end] <- colWidth[start:end] +
cumsum(rep(gapWidthAlign, times=(j - 1)))
}
}
row[i] <- row[i - 1] + 1
lineWidth <- tag$width[[i]]
colWidth[i] <- 0
j <- 1
}
}
rowHeight <- tapply(unlist(tag$height), list(row), max)
## --- Is there to many rows for given dimension of a rectangle ---
sumRowHeight <- sum(rowHeight)
heightNum <- convertWidth(height, unitTo="npc", valueOnly=TRUE)
if(sumRowHeight > heightNum) {
msg <- c("can not fit into defined dimension;",
"adjust dimension, fontsize or number of units;",
"keeping else constant, height should be at least",
sumRowHeight)
stop(cat(msg, fill=TRUE))
} else {
if(expandRow) { ## increase height of row to fit nicely
heightDiff <- heightNum - sumRowHeight
heightDiff <- heightDiff / max(row)
rowHeight <- rowHeight + heightDiff
}
}
## We have to plot from top to bottom and text should be in the bottom
## or center of the line
rowHeightCenter<- ifelse(justRow == "bottom", 0, rowHeight / 2)
rowHeight <- heightNum - (cumsum(rowHeight) - rowHeightCenter)
rowHeight <- rowHeight[row]
textGpar$fontsize <- unlist(tag$fontsize)
grid.text(label=names(x), gp=textGpar,
x=unit(colWidth, units="npc"),
y=unit(rowHeight, units="npc"), just=c("left", justRow),
name="tag")
}
## getNames()
## grid.edit("tag", gp=gpar(col="red"))
###---------------------------------------------------------------------
---
### tagCloud.R ends here
More information about the R-help
mailing list