[R] Creating a Clustered-Stacked Column Chart

Mon Oct 12 11:46:31 CEST 2009

On 10/12/2009 01:53 AM, zhijie zhang wrote:
> Thanks. I think there may be no easy method to achive it.
>    library(lattice)
>    barchart(Titanic, scales = list(x = "free"),auto.key = list(title
> ="Survived"),layout=c(4,1),horizontal = FALSE)
> The above method generates four graphs, two graphs in the left are for
> children's male and female,respectively and the right two graphs are for
> adult's male and female,respectively .
>    Actually, i hope to generate two graphs finally. Say the right two graphs
> for adult are overlaid with the left two graphs for children,respectively.
> Take the  "1st of x variable" as an example, in the place of "1st", the
> stacked bar for both children and adult should be displayed. Maybe the data
> for children and adult should be first shifted certain values to different
> directions and then applying the overlay function to get it.
>    My above ideas to display a data may be bad. Anyway, thanks a lot.
>    
Hi Zhijie,
This looked like an interesting challenge, so I bent the barp function a 
bit to do it. The barp3 function attached will accept a 3D array in 
which the rows represent groups, the columns subgroups and the files 
sub-subgroups. The arrangement of bars is that of the example you gave. 
In order to translate the 2D matrix of the example to the 3D array, I 
have written a little conversion function df2array. Try this:

clustack<-structure(list(Country = structure(c(3L, 2L, 1L),
  .Label = c("Asia","Europe", "N Amer"), class = "factor"),
  Q1.pencils = c(16L, 14L,18L), Q1.pens = c(12L, 9L, 14L),
  Q2.pencils = c(18L, 15L, 18L), Q2.pens = c(14L, 11L, 15L),
  Q3.pencils = c(17L, 11L, 20L), Q3.pens = c(11L, 8L, 15L),
  Q4.pencils = c(20L, 14L, 21L), Q4.pens = c(14L, 12L, 16L)),
  .Names = c("Country", "Q1.pencils","Q1.pens", "Q2.pencils", "Q2.pens", 
"Q3.pencils", "Q3.pens","Q4.pencils", "Q4.pens"),
  class = "data.frame", row.names = c(NA,-3L))

# x is the original data frame with the group names in the
# first column and the column names as in the example
# I read it in as a CSV file.
# depth is the length of the "files" (3rd dimension)
df2array<-function(x,depth) {
  dimx<-dim(x)
  if(dimx[2]%%depth) stop("depth must divide number of columns without 
remainder")
  column.order<-NULL
  for(d in 1:depth)
   column.order<-c(column.order,seq(d,dimx[2]-(depth-d),by=depth))
  return(array(unlist(x[,column.order]),c(dimx[1],dimx[2]/depth,depth)))
}

# this converts the data frame to the array
pp.array<-df2array(clustack[,2:9],2)

# this function is pretty much the same as barp
barp3<-function(height,width=0.4,names.arg=NULL,
  legend.lab=NULL,legend.pos="e",col=NULL,border=par("fg"),
  main=NULL,xlab="",ylab="",xlim=NULL,ylim=NULL,
  staxx=FALSE,staxy=FALSE,height.at=NULL,
  height.lab=NULL,cex.axis=par("cex.axis"),
  do.first=NULL) {

  if(is.data.frame(height)) its_ok<-is.numeric(unlist(height))
  else its_ok<-is.numeric(height)
  if(!its_ok) stop("barp3 can only display bars with numeric heights")
  hdim<-dim(height)
  if(is.null(hdim) || length(hdim) != 3)
   stop("barp3 can only plot 3 dimensional arrays")
  ngroups<-hdim[1]
  if(length(col)==hdim[3])
   barcol<-array(rep(col,each=hdim[1]*hdim[2]),hdim)
  else barcol<-col
  if(is.null(xlim)) xlim<-c(0.4,ngroups+0.6)
  if(any(height<0,na.rm=TRUE))
   stop("Can't have negative bar heights in barp3")
  if(is.null(ylim)) {
   maxstack<-0
   for(group in 1:hdim[1]) {
    for(subgroup in 1:hdim[2]) {
     thistack<-sum(height[group,subgroup,],na.rm=TRUE)
     if(thistack > maxstack) maxstack<-thistack
    }
   }
   ylim<-c(0,maxstack*1.05)
  }
  plot(0,type="n",main=main,xlab=xlab,ylab=ylab,
   axes=FALSE,xlim=xlim,ylim=ylim,xaxs="i",yaxs="i")
  if(!is.null(do.first)) eval(do.first)
  if(is.null(names.arg)) names.arg<-1:ngroups
  if(staxx) {
   axis(1,at=1:ngroups,labels=rep("",ngroups),
    cex.axis=cex.axis)
   staxlab(1,at=1:ngroups,labels=names.arg,cex=cex.axis)
  }
  else axis(1,at=1:ngroups,labels=names.arg,cex.axis=cex.axis)
  if(is.null(height.at)) height.at<-pretty(ylim)
  if(is.null(height.lab)) height.lab<-pretty(ylim)
  if(staxy) {
   axis(2,at=height.at,labels=rep("",length(height.lab)),
    cex.axis=cex.axis)
   staxlab(2,at=height.at,labels=height.lab,cex=cex.axis)
  }
  else axis(2,at=height.at,labels=height.lab,cex.axis=cex.axis)
  barwidth<-2*width/hdim[2]
  for(group in 1:hdim[1]) {
   left<-group-hdim[2]*barwidth/2
   for(subgroup in 1:hdim[2]) {
    bottom<-0
    for(subsub in 1:hdim[3]) {
     rect(left,bottom,left+barwidth,
      bottom+height[group,subgroup,subsub],
      col=barcol[group,subgroup,subsub],border=border)
     bottom<-bottom+height[group,subgroup,subsub]
    }
    left<-left+barwidth
   }
  }
  if(!is.null(legend.lab)) {
   xjust<-yjust<-0.5
   if(is.null(legend.pos)) {
    cat("Click at the lower left corner of the legend\n")
    legend.pos<-locator(1)
    xjust<-yjust<-0
   }
   if(legend.pos[1] == "e")
    legend.pos<-emptyspace(barpinfo,bars=TRUE)
   legend(legend.pos,legend=legend.lab,fill=col,
    xjust=xjust,yjust=yjust)
  }
  box()
}

# the colors should be an array of the same form as the data
barp3(pp.array,col=array(rep(1:8,each=3),c(3,4,2)),
  names.arg=clustack[,1])

Have fun.

Jim