[R] distribution graph

Wentzel-Larsen, Tore tore.wentzel-larsen at helse-bergen.no
Tue Jun 12 15:10:20 CEST 2007


The following gives two functions for producing distribution graphs:

distribution-graph

produces a single graph, and

multiple.distribution.graph

produces a number of graphs side by side.

Regards,
Tore Wentzel-Larsen
statistician
Centre for Clinical research
Armauer Hansen house 
Haukeland University Hospital
N-5021 Bergen
tlf   +47 55 97 55 39 (a)
faks  +47 55 97 60 88 (a)
email tore.wentzel-larsen at helse-bergen.no 


Documentation:

distribution.graph

Description

distribution.graph produces a distribution graph of the data values. 

Usage

distribution.graph(xx, grouping=FALSE,
	ngroups=10, xplace=c(0,1,.5),  halfband=.25,
	xlab='', ylab='', pch=16,
	lines=FALSE, lty='solid')

Arguments

xx		numeric, a vector of values for which to produce the 
		distribution graph. Missing values are allowed, and are 
		disregarded.

grouping	logical, if FALSE (the default) the actual values are graphed,
		if TRUE the values are grouped before being plotted.

ngroups	the number of groups (default 10) if grouping=TRUE.

xplace	vector with three components. The first two components define
		the horizontal plotting range. The last component defines the
		horizontal placement of the centre of the distribution graph.

halfband	Half-length of the maximal horizontal band in the distribution
		Graph, from the centre outwards. The bands should be within the 
		Horizontal plotting range.

xlab, 
ylab		x and y axis labels, as in plot.default.

pch		plotting symbol, default 16 (solid circle).

lines		logical, if FALSE (the default) only points are plotted, if
		TRUE the points are connected by lines.

lty		line type, as in plot.default.


Value

A frequency table for the values actually plotted.

Examples

# a simple distribution graph with no grouping:
distribution.graph(floor(runif(100, 200, 310)))

# a similar graph with vertical bars only:
distribution.graph(floor(runif(100, 200, 310)), lines=TRUE, pch='')

# a distribution graph with grouping (points or line bars):
distribution.graph(runif(1000 ,0, 3), grouping=TRUE)
distribution.graph(runif(1000, 0, 3), grouping=TRUE, lines=TRUE, pch='')

# a distribution graph with grouping, 5 groups:
distribution.graph(runif(1000, 0, 10), grouping=TRUE, ngroups=5)
distribution.graph(rbinom(1000, 20, .7), grouping=TRUE, ngroups=5)


- - - - - - - - - - - - - - -

multiple.distribution.graph

Description

multiple.distribution.graph produces a number of distribution graphs of the data values, side by side. 

Usage

multiple.distribution.graph(xx, grouping=FALSE,
	ngroups=10, xleft=0, xright=1, xmiddle=.5, xband=.5,
	xlab=c(1:length(xx)), ylab='', pch=16,
	lines=FALSE, lty='solid')
Arguments

xx		list of numeric variables, a vector of values for which to produce the distribution 		graph. Missing values are allowed, and are disregarded.

grouping	logical, if FALSE (the default) the actual values are graphed,
		if TRUE the values are grouped before being plotted.

ngroups	the number of groups (default 10) if grouping=TRUE.

xleft
xright
xmiddle	xleft and xright define the horizontal plotting range within
		each distribution graph. xmiddle defines the horizontal placement of the centre of each 		distribution graph.

xband		the part actually used for plotting, of the horizontal range
		allocated top each individual graph.

xlab, 
ylab		x and y axis labels, as in plot.default.

pch		plotting symbol, default 16 (solid circle).

lines		logical, if FALSE (the default) only points are plotted, if
		TRUE the points are connected by lines.

lty		line type, as in plot.default.


Value

A list of frequency tables for the values actually plotted.


Examples

par(ask=TRUE)
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))))
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
	grouping=TRUE)
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
	grouping=TRUE,ngroups=3)
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
	grouping=TRUE,ngroups=3,lines=TRUE)
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
	grouping=TRUE,ngroups=3,lines=TRUE,pch='')
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
	grouping=TRUE,ngroups=5,lines=TRUE,pch='')
par(ask=FALSE)

# a more complicated list of numeric vectors:
xx <- as.list(as.list(data.frame(matrix(runif(72,10,45),ncol=9))))
xx[[1]][c(1,3,4,8)]<- NA
xx[[2]][c(2,4)]<- NA
xx[[4]][c(3)]<- NA
xx[[6]][c(2,5,8)]<- NA
xx[[8]][c(1,2,8)]<- NA
xx <- lapply(xx,stripmiss)
xx[[1]][c(3)]<- NA
xx[[3]][c(1,3,4,5)]<- NA
xx[[4]][c(2,3)]<- NA
xx[[8]][c(3,4)]<- NA

multiple.distribution.graph(xx)
multiple.distribution.graph(xx,grouping=TRUE,ngroups=3,lines=TRUE,
	pch='')
multiple.distribution.graph(xx,grouping=TRUE,ngroups=3,lines=TRUE,
	pch='.',lty='blank')






Code:
- - -

# auxiliary functions: stripmiss and grouping.v :

# function for deleting missing values from a vector:

stripmiss <- function(xx) xx[is.na(xx)==0]

# grouping of a vector into a specified number of
#	intervals of equal size:

grouping.v <- function(xx,ngroups=10,eps=.001) {
minx<- min(xx)
maxx <-max(xx)
if (minx == maxx | ngroups == 1) x2 <- xx
if(ngroups==1) x2 <- mean(x2)
if (minx < maxx & ngroups > 1) {
x1 <- round(.5+eps + (xx - minx)*(ngroups - 2*eps)/(maxx - minx))
x2 <- minx + (x1 - 1) * (maxx - minx)/(ngroups -1)
} # end if
x2
} # end function grouping.v (grouping of a vector)


# function for a single distribution graph:

distribution.graph <- function(xx, grouping=FALSE,
	ngroups=10, xplace=c(0,1,.5),  halfband=.25,
	xlab='', ylab='', pch=16,
	lines=FALSE, lty='solid') {
x1 <- stripmiss(xx)
if (grouping) x1 <- grouping.v(x1,ngroups=ngroups)
xv <- as.numeric(names(table(x1))) # actual values
minxv <-min(xv)
maxxv <-max(xv)
xn <- as.numeric(table(x1)) # number of occurences
nx <- length(xv)
maxn <- max(xn)
plot(x=xplace[1]+(xv-minxv)*(xplace[2]-xplace[1])/
	(maxxv-minxv),y=xv,xlab='',ylab='',
	axes=FALSE,col='white')
box()
axis(1,at=xplace[3],labels=xlab)
axis(2)
for (value.nr in 1:nx) {
n.act <- xn[value.nr]
if (n.act==1) xpositions.act <- xplace[3]
if (n.act > 1) {
halfband.act <- halfband * n.act/maxn
left.act  <- xplace[3] - halfband.act
right.act <- xplace[3] + halfband.act
xpositions.act <- left.act + (0:(n.act-1)) * 
	(right.act - left.act)/(n.act-1)
} # end if n.act > 1
if (!lines)
points(x=xpositions.act,y=rep(xv[value.nr],n.act),pch=pch)
if (lines)
points(x=xpositions.act,y=rep(xv[value.nr],n.act),
	pch=pch,type='o',lty=lty)
} # end for xvalue
distribution <- x1
table(distribution)
} # end function distribution.graph

par(ask=TRUE)
distribution.graph(floor(runif(100,200,310)))
distribution.graph(floor(runif(100,200,310)),lines=TRUE,pch='.')
distribution.graph(runif(1000,0,3),grouping=TRUE)
distribution.graph(runif(1000,0,3),grouping=TRUE,lines=TRUE,pch='')
distribution.graph(runif(1000,0,10),grouping=TRUE,ngroups=5)
distribution.graph(rbinom(1000,20,.7),grouping=TRUE,ngroups=5)
par(ask=FALSE)

# function for several distribution graphs in the same plot:

multiple.distribution.graph <- function(xx, grouping=FALSE,
	ngroups=10, xleft=0, xright=1, xmiddle=.5, xband=.5,
	xlab=c(1:length(xx)), ylab='', pch=16,
	lines=FALSE, lty='solid') {
xx <- lapply(xx,stripmiss) # remove missing values
if (grouping) xx <- lapply(xx,grouping.v,ngroups=ngroups)
xtable <- lapply(xx,table)
xtable.values <- lapply(lapply(xtable,names),as.numeric)
xtable.freq <- lapply(xtable,as.numeric)
max.freq <- max(as.numeric(lapply(xtable.freq,max)),na.rm=TRUE)
min.value <- min(as.numeric(lapply(xtable.values,min)),na.rm=TRUE)
max.value <- max(as.numeric(lapply(xtable.values,max)),na.rm=TRUE)
ncomp <- length(xx)
plot.xtotal<- xleft + c(0,ncomp) * (xright - xleft)
plot.ytotal<- c(min.value,max.value)
plot.mids <- xleft + c(0:(ncomp-1)) * (xright - xleft) + xmiddle
plot(x=plot.xtotal,y=plot.ytotal,xlab='',ylab=ylab,
	xlim=plot.xtotal,ylim=plot.ytotal,
	axes=FALSE,col='white')
box()
axis(1,at=plot.mids,labels=xlab)
axis(2)
for (comp in 1:ncomp) {
left.outer <- xleft + (comp - 1) * (xright - xleft)
right.outer <- xleft + comp * (xright - xleft)
mid <- plot.mids[comp]
max.freq.comp <- max(xtable.freq[[comp]],na.rm=TRUE)
values.comp <- xtable.values[[comp]]
nvalues.comp <- length(values.comp)
freq.comp <- xtable.freq[[comp]]
maxband.comp <- xband * ((xright - xleft)/2) * 
	max.freq.comp / max.freq
if (comp==1) abline(v=left.outer,lty=lty)
abline(v=right.outer,lty=lty)
for (nr in 1:nvalues.comp) {
value.nr <- values.comp[nr]
freq.nr <- freq.comp[nr]
left.nr <- mid - maxband.comp * freq.nr/max.freq.comp 
right.nr <- mid + maxband.comp * freq.nr/max.freq.comp 
if (freq.nr==1) points(x=mid,y=value.nr,pch=pch)
if (freq.nr>1 & !lines) points(x=left.nr + c(0:(freq.nr-1))*
	(right.nr-left.nr)/(freq.nr-1),y=rep(value.nr,freq.nr),pch=pch)
if (freq.nr>1 & lines) points(x=left.nr + c(0:(freq.nr-1))*
	(right.nr-left.nr)/(freq.nr-1),y=rep(value.nr,freq.nr),
	pch=pch,type='o',lty=lty)
} # end for value.nr
} # end for comp
lapply(xx,table)
} # end function multiple.distribution.graph



More information about the R-help mailing list