# R-beta: bug in hist() (0.60/0.61)

Albrecht Gebhardt agebhard at zidsrv.sci.uni-klu.ac.at
Tue Dec 23 19:45:57 CET 1997

```I discovered a bug in hist().
Try the following:

x<-c(-5,-4,-4,-4,-3,-3,-3,-3,-2,-2,0,0,0,0,1,1,1,3,3,5,6)

# Note that sum(x)<0:

sum(x)
#  -13

hist(x)
# looks ok

hist(x,freq=F)
# negative bars !!

# and finally this gives not 1:
sum(hist(x,plot=F)\$rel.freqs)

#  -0.8076923

The reason is, that "sum(x)" is used instead of "length(x)" in the following
line near the end of hist() (of course it works with 0-1 variables):

...
}
else TRUE
rel.freqs <- counts/(sum(x) * diff(breaks))
if (missing(ylab))
ylab <- paste(if (!freq)
...

Additionally the returned values "rel.freqs" have to be multiplied by
diff(breaks), to get sum=1 .

Here is a modified version of hist, which fixes this. I have also added a
"labels=T/F" option, allowing for labels on top of the bars, which makes
sense in the case of non equally spaced breaks (because y-axis labels refer
only to bars with width 1).

An additional return value contains the bar heights (which may differ from
the (rel) frequencies) for individual labelling.

"hist" <-
function (x, breaks, freq = NULL, col = NULL, border = par("fg"),
main = paste("Histogram of", deparse(substitute(x))),
xlim = range(breaks), ylim = range(y, 0), xlab = deparse(substitute(x)),
ylab, axes = TRUE, plot = TRUE, labels = FALSE, ...)
{
if (!is.numeric(x))
stop("hist: x must be numeric")
eval(main)
eval(xlab)
n <- length(x <- x[!is.na(x)])
use.br <- !missing(breaks) && length(breaks) > 1
breaks <- if (use.br)
sort(breaks)
else {
rx <- range(x)
pretty(rx + c(0, diff(rx)/1000), n = if (missing(breaks))
1 + log2(n)
else {
# breaks = `nclass'
if (is.na(breaks) | breaks < 2)
stop("invalid number of breaks")
breaks
})
}
nB <- length(breaks)
counts <- .C("bincount", as.double(x), n, as.double(breaks),
nB, counts = integer(nB - 1), NAOK = FALSE)\$counts
if (any(counts < 0))
stop("negative `counts'. Internal Error in C-code for \"bincount\"")
if (is.null(freq))
freq <- if (use.br) {
##-- Do frequencies if breaks are evenly spaced
h <- diff(breaks)
diff(range(h)) < 1e-10 * mean(h)
}
else TRUE
rel.freqs <- counts/(length(x) * diff(breaks))
mids <- 0.5 * (breaks[-1] + breaks[-nB])
heights <- if (freq)
counts/diff(breaks) * min(diff(breaks))
else rel.freqs
if (missing(ylab))
ylab <- paste(if (!freq)
"Relative ", "Frequency", sep = "")
if (plot) {
#-> ylim's default
y <- heights
plot.new()
plot.window(xlim, ylim, "")
title(main = main, xlab = xlab, ylab = ylab,
...)
if (axes) {
axis(1, ...)
axis(2, ...)
}
rect(breaks[-nB], 0, breaks[-1], heights, col = col,
border = border)
if (labels) {
if (freq)
text(mids, heights, counts, adj = c(0.5,
-0.5))
else text(mids, heights, round(rel.freqs *
diff(breaks), 3), adj = c(0.5,
-0.5))
}
}
invisible(list(breaks = breaks, counts = counts, rel.freqs = rel.freqs *
diff(breaks), mids = mids, heights = heights))
}

For this version of hist() (if accepted)  also the help page examples should be
rewritten ( "text(r\$mids, r\$rel.fr, " --> "text(r\$mids, r\$heights, ")

A.Gebhardt

------------------------------------------------------------------
Albrecht Gebhardt           email: albrecht.gebhardt at uni-klu.ac.at
Institut fuer Mathematik    Tel. : (++43 463) 2700/837
Universitaet Klagenfurt     Fax  : (++43 463) 2700/834
Villacher Str. 161
A-9020 Klagenfurt, Austria
------------------------------------------------------------------

-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._

```