[R] distribution graph
Wentzel-Larsen, Tore
tore.wentzel-larsen at helse-bergen.no
Tue Jun 12 15:10:20 CEST 2007
The following gives two functions for producing distribution graphs:
distribution-graph
produces a single graph, and
multiple.distribution.graph
produces a number of graphs side by side.
Regards,
Tore Wentzel-Larsen
statistician
Centre for Clinical research
Armauer Hansen house
Haukeland University Hospital
N-5021 Bergen
tlf +47 55 97 55 39 (a)
faks +47 55 97 60 88 (a)
email tore.wentzel-larsen at helse-bergen.no
Documentation:
distribution.graph
Description
distribution.graph produces a distribution graph of the data values.
Usage
distribution.graph(xx, grouping=FALSE,
ngroups=10, xplace=c(0,1,.5), halfband=.25,
xlab='', ylab='', pch=16,
lines=FALSE, lty='solid')
Arguments
xx numeric, a vector of values for which to produce the
distribution graph. Missing values are allowed, and are
disregarded.
grouping logical, if FALSE (the default) the actual values are graphed,
if TRUE the values are grouped before being plotted.
ngroups the number of groups (default 10) if grouping=TRUE.
xplace vector with three components. The first two components define
the horizontal plotting range. The last component defines the
horizontal placement of the centre of the distribution graph.
halfband Half-length of the maximal horizontal band in the distribution
Graph, from the centre outwards. The bands should be within the
Horizontal plotting range.
xlab,
ylab x and y axis labels, as in plot.default.
pch plotting symbol, default 16 (solid circle).
lines logical, if FALSE (the default) only points are plotted, if
TRUE the points are connected by lines.
lty line type, as in plot.default.
Value
A frequency table for the values actually plotted.
Examples
# a simple distribution graph with no grouping:
distribution.graph(floor(runif(100, 200, 310)))
# a similar graph with vertical bars only:
distribution.graph(floor(runif(100, 200, 310)), lines=TRUE, pch='')
# a distribution graph with grouping (points or line bars):
distribution.graph(runif(1000 ,0, 3), grouping=TRUE)
distribution.graph(runif(1000, 0, 3), grouping=TRUE, lines=TRUE, pch='')
# a distribution graph with grouping, 5 groups:
distribution.graph(runif(1000, 0, 10), grouping=TRUE, ngroups=5)
distribution.graph(rbinom(1000, 20, .7), grouping=TRUE, ngroups=5)
- - - - - - - - - - - - - - -
multiple.distribution.graph
Description
multiple.distribution.graph produces a number of distribution graphs of the data values, side by side.
Usage
multiple.distribution.graph(xx, grouping=FALSE,
ngroups=10, xleft=0, xright=1, xmiddle=.5, xband=.5,
xlab=c(1:length(xx)), ylab='', pch=16,
lines=FALSE, lty='solid')
Arguments
xx list of numeric variables, a vector of values for which to produce the distribution graph. Missing values are allowed, and are disregarded.
grouping logical, if FALSE (the default) the actual values are graphed,
if TRUE the values are grouped before being plotted.
ngroups the number of groups (default 10) if grouping=TRUE.
xleft
xright
xmiddle xleft and xright define the horizontal plotting range within
each distribution graph. xmiddle defines the horizontal placement of the centre of each distribution graph.
xband the part actually used for plotting, of the horizontal range
allocated top each individual graph.
xlab,
ylab x and y axis labels, as in plot.default.
pch plotting symbol, default 16 (solid circle).
lines logical, if FALSE (the default) only points are plotted, if
TRUE the points are connected by lines.
lty line type, as in plot.default.
Value
A list of frequency tables for the values actually plotted.
Examples
par(ask=TRUE)
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))))
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
grouping=TRUE)
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
grouping=TRUE,ngroups=3)
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
grouping=TRUE,ngroups=3,lines=TRUE)
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
grouping=TRUE,ngroups=3,lines=TRUE,pch='')
multiple.distribution.graph(as.list(data.frame(matrix(runif(72),ncol=9))),
grouping=TRUE,ngroups=5,lines=TRUE,pch='')
par(ask=FALSE)
# a more complicated list of numeric vectors:
xx <- as.list(as.list(data.frame(matrix(runif(72,10,45),ncol=9))))
xx[[1]][c(1,3,4,8)]<- NA
xx[[2]][c(2,4)]<- NA
xx[[4]][c(3)]<- NA
xx[[6]][c(2,5,8)]<- NA
xx[[8]][c(1,2,8)]<- NA
xx <- lapply(xx,stripmiss)
xx[[1]][c(3)]<- NA
xx[[3]][c(1,3,4,5)]<- NA
xx[[4]][c(2,3)]<- NA
xx[[8]][c(3,4)]<- NA
multiple.distribution.graph(xx)
multiple.distribution.graph(xx,grouping=TRUE,ngroups=3,lines=TRUE,
pch='')
multiple.distribution.graph(xx,grouping=TRUE,ngroups=3,lines=TRUE,
pch='.',lty='blank')
Code:
- - -
# auxiliary functions: stripmiss and grouping.v :
# function for deleting missing values from a vector:
stripmiss <- function(xx) xx[is.na(xx)==0]
# grouping of a vector into a specified number of
# intervals of equal size:
grouping.v <- function(xx,ngroups=10,eps=.001) {
minx<- min(xx)
maxx <-max(xx)
if (minx == maxx | ngroups == 1) x2 <- xx
if(ngroups==1) x2 <- mean(x2)
if (minx < maxx & ngroups > 1) {
x1 <- round(.5+eps + (xx - minx)*(ngroups - 2*eps)/(maxx - minx))
x2 <- minx + (x1 - 1) * (maxx - minx)/(ngroups -1)
} # end if
x2
} # end function grouping.v (grouping of a vector)
# function for a single distribution graph:
distribution.graph <- function(xx, grouping=FALSE,
ngroups=10, xplace=c(0,1,.5), halfband=.25,
xlab='', ylab='', pch=16,
lines=FALSE, lty='solid') {
x1 <- stripmiss(xx)
if (grouping) x1 <- grouping.v(x1,ngroups=ngroups)
xv <- as.numeric(names(table(x1))) # actual values
minxv <-min(xv)
maxxv <-max(xv)
xn <- as.numeric(table(x1)) # number of occurences
nx <- length(xv)
maxn <- max(xn)
plot(x=xplace[1]+(xv-minxv)*(xplace[2]-xplace[1])/
(maxxv-minxv),y=xv,xlab='',ylab='',
axes=FALSE,col='white')
box()
axis(1,at=xplace[3],labels=xlab)
axis(2)
for (value.nr in 1:nx) {
n.act <- xn[value.nr]
if (n.act==1) xpositions.act <- xplace[3]
if (n.act > 1) {
halfband.act <- halfband * n.act/maxn
left.act <- xplace[3] - halfband.act
right.act <- xplace[3] + halfband.act
xpositions.act <- left.act + (0:(n.act-1)) *
(right.act - left.act)/(n.act-1)
} # end if n.act > 1
if (!lines)
points(x=xpositions.act,y=rep(xv[value.nr],n.act),pch=pch)
if (lines)
points(x=xpositions.act,y=rep(xv[value.nr],n.act),
pch=pch,type='o',lty=lty)
} # end for xvalue
distribution <- x1
table(distribution)
} # end function distribution.graph
par(ask=TRUE)
distribution.graph(floor(runif(100,200,310)))
distribution.graph(floor(runif(100,200,310)),lines=TRUE,pch='.')
distribution.graph(runif(1000,0,3),grouping=TRUE)
distribution.graph(runif(1000,0,3),grouping=TRUE,lines=TRUE,pch='')
distribution.graph(runif(1000,0,10),grouping=TRUE,ngroups=5)
distribution.graph(rbinom(1000,20,.7),grouping=TRUE,ngroups=5)
par(ask=FALSE)
# function for several distribution graphs in the same plot:
multiple.distribution.graph <- function(xx, grouping=FALSE,
ngroups=10, xleft=0, xright=1, xmiddle=.5, xband=.5,
xlab=c(1:length(xx)), ylab='', pch=16,
lines=FALSE, lty='solid') {
xx <- lapply(xx,stripmiss) # remove missing values
if (grouping) xx <- lapply(xx,grouping.v,ngroups=ngroups)
xtable <- lapply(xx,table)
xtable.values <- lapply(lapply(xtable,names),as.numeric)
xtable.freq <- lapply(xtable,as.numeric)
max.freq <- max(as.numeric(lapply(xtable.freq,max)),na.rm=TRUE)
min.value <- min(as.numeric(lapply(xtable.values,min)),na.rm=TRUE)
max.value <- max(as.numeric(lapply(xtable.values,max)),na.rm=TRUE)
ncomp <- length(xx)
plot.xtotal<- xleft + c(0,ncomp) * (xright - xleft)
plot.ytotal<- c(min.value,max.value)
plot.mids <- xleft + c(0:(ncomp-1)) * (xright - xleft) + xmiddle
plot(x=plot.xtotal,y=plot.ytotal,xlab='',ylab=ylab,
xlim=plot.xtotal,ylim=plot.ytotal,
axes=FALSE,col='white')
box()
axis(1,at=plot.mids,labels=xlab)
axis(2)
for (comp in 1:ncomp) {
left.outer <- xleft + (comp - 1) * (xright - xleft)
right.outer <- xleft + comp * (xright - xleft)
mid <- plot.mids[comp]
max.freq.comp <- max(xtable.freq[[comp]],na.rm=TRUE)
values.comp <- xtable.values[[comp]]
nvalues.comp <- length(values.comp)
freq.comp <- xtable.freq[[comp]]
maxband.comp <- xband * ((xright - xleft)/2) *
max.freq.comp / max.freq
if (comp==1) abline(v=left.outer,lty=lty)
abline(v=right.outer,lty=lty)
for (nr in 1:nvalues.comp) {
value.nr <- values.comp[nr]
freq.nr <- freq.comp[nr]
left.nr <- mid - maxband.comp * freq.nr/max.freq.comp
right.nr <- mid + maxband.comp * freq.nr/max.freq.comp
if (freq.nr==1) points(x=mid,y=value.nr,pch=pch)
if (freq.nr>1 & !lines) points(x=left.nr + c(0:(freq.nr-1))*
(right.nr-left.nr)/(freq.nr-1),y=rep(value.nr,freq.nr),pch=pch)
if (freq.nr>1 & lines) points(x=left.nr + c(0:(freq.nr-1))*
(right.nr-left.nr)/(freq.nr-1),y=rep(value.nr,freq.nr),
pch=pch,type='o',lty=lty)
} # end for value.nr
} # end for comp
lapply(xx,table)
} # end function multiple.distribution.graph
More information about the R-help
mailing list