[R] Adding Functionality to stat.table in Epi
rab45+@pitt.edu
rab45+ at pitt.edu
Tue Aug 9 20:04:55 CEST 2005
> After you copy stat.table to stat.table2 and modify stat.table2
> try:
>
>> environment(stat.table2) <- environment(stat.table)
>
> (you should only need to do that 1 time after creating/editing
> stat.table2).
>
> hope this helps,
>
> Greg Snow, Ph.D.
> Statistical Data Center, LDS Hospital
> Intermountain Health Care
> greg.snow at ihc.com
> (801) 408-8111
>
>>>> <rab45+ at pitt.edu> 08/09/05 11:16AM >>>
> The stat.table function in the Epi package won't do standard
> deviations.
> It didn't seem that it would be difficult to add an "sd" function to
> the
> stat.table function. Following the example for the mean, I set up a
> similar function for the sd (and included it as an options) but it
> just
> won't work. (I tried sending messages to the Epi mailing list after
> subscribing but my mail is always returned. I don't have the exact
> error
> messages at the moment or I would post them.)
>
> Even if I just copy stat.table to stat.table2 and try to run
> stat.table2,
> I get:
>
>>
> stat.table2(index=list(race,gender),list(count(),percent(race)),margins=TRUE)
> Error: couldn't find function "array.subset"
>
> I can't find any "array.subset" function, yet the original stat.table
> works just fine.
>
> I've copied other functions and made changes to them and they would
> work
> just fine. I must be missing something here.
>
> Any insights would be appreciated.
>
> Rick B.
>
> ______________________________________________
> R-help at stat.math.ethz.ch mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide!
> http://www.R-project.org/posting-guide.html
>
> ______________________________________________
> R-help at stat.math.ethz.ch mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide!
> http://www.R-project.org/posting-guide.html
>
Thanks Greg. That helps but I still get the following error message:
> stat.table2(index=list(race),list(count(),sd(age.at.scanning)),margins=TRUE)
Error in if (digits < 0) digits <- 6 : missing value where TRUE/FALSE needed
Rick
Below is the code (sorry it's kind of long). The mean function works but
the sd function produces the error message:
stat.table2 <- function (index, contents = count(), data, margins = FALSE)
{
index.sub <- substitute(index)
index <- if (missing(data))
eval(index)
else eval(index.sub, data)
deparse.name <- function(x) if (is.symbol(x))
as.character(x)
else ""
if (is.list(index)) {
if (is.call(index.sub)) {
index.names <- names(index.sub)
fixup <- if (is.null(index.names))
seq(along = index.sub)
else index.names == ""
dep <- sapply(index.sub[fixup], deparse.name)
if (is.null(index.names))
index.labels <- dep
else {
index.labels <- index.names
index.labels[fixup] <- dep
}
index.labels <- index.labels[-1]
}
else {
index.labels <- if (!is.null(names(index))) {
names(index)
}
else {
rep("", length(index))
}
}
}
else {
index.labels <- deparse.name(index.sub)
}
if (!is.list(index))
index <- list(index)
index <- lapply(index, as.factor)
contents <- substitute(contents)
if (!identical(deparse(contents[[1]]), "list")) {
contents <- call("list", contents)
}
valid.functions <- c("count", "mean", "sd","weighted.mean", "sum",
"quantile", "median", "IQR", "max", "min", "ratio", "percent")
table.fun <- character(length(contents) - 1)
for (i in 2:length(contents)) {
if (!is.call(contents[[i]]))
stop("contents must be a list of function calls")
FUN <- deparse(contents[[i]][[1]])
if (!FUN %in% valid.functions)
stop(paste("Function", FUN, "not permitted in stat.table"))
else table.fun[i - 1] <- FUN
}
stat.labels <- sapply(contents, deparse)[-1]
content.names <- names(contents)
if (!is.null(content.names)) {
for (i in 2:length(content.names)) {
if (nchar(content.names[i]) > 0)
stat.labels[i - 1] <- content.names[i]
}
}
count <- function(id) {
if (missing(id)) {
id <- seq(along = index[[1]])
}
y <- tapply(id, INDEX = subindex, FUN = function(x)
length(unique(x)))
y[is.na(y)] <- 0
return(y)
}
mean <- function(x, trim = 0, na.rm = TRUE) {
tapply(x, INDEX = subindex, FUN = base::mean, trim = trim,
na.rm = na.rm)
}
sd <- function(x, na.rm = TRUE) {
tapply(x, INDEX = subindex, FUN = stats::sd,
na.rm = na.rm)
}
weighted.mean <- function(x, w, na.rm = TRUE) {
tapply(x, INDEX = subindex, FUN = stats::weighted.mean,
w = w, na.rm = na.rm)
}
sum <- function(..., na.rm = TRUE) {
tapply(..., INDEX = subindex, FUN = base::sum, na.rm = na.rm)
}
quantile <- function(x, probs, na.rm = TRUE, names = TRUE,
type = 7, ...) {
if (length(probs > 1))
stop("The quantile function only accepts scalar prob values
within stat.table")
tapply(x, INDEX = subindex, FUN = stats::quantile, probs = prob,
na.rm = na.rm, names = names, type = type, ...)
}
median <- function(x, na.rm = TRUE) {
tapply(x, INDEX = subindex, FUN = stats::median, na.rm = na.rm)
}
IQR <- function(x, na.rm = TRUE) {
tapply(x, INDEX = subindex, FUN = stats::IQR, na.rm = na.rm)
}
max <- function(..., na.rm = TRUE) {
tapply(..., INDEX = subindex, FUN = base::max, na.rm = na.rm)
}
min <- function(..., na.rm = TRUE) {
tapply(..., INDEX = subindex, FUN = base::min, na.rm = na.rm)
}
ratio <- function(d, y, scale = 1, na.rm = TRUE) {
if (length(scale) != 1)
stop("Scale parameter must be a scalar")
if (na.rm) {
w <- (!is.na(d) & !is.na(y))
tab1 <- tapply(d * w, INDEX = subindex, FUN = base::sum,
na.rm = TRUE)
tab2 <- tapply(y * w, INDEX = subindex, FUN = base::sum,
na.rm = TRUE)
}
else {
tab1 <- tapply(d, INDEX = subindex, FUN = base::sum,
na.rm = FALSE)
tab2 <- tapply(y, INDEX = subindex, FUN = base::sum,
na.rm = FALSE)
}
return(scale * tab1/tab2)
}
percent <- function(...) {
x <- list(...)
if (length(x) == 0)
stop("No variables to calculate percent")
n <- count()
sweep.index <- logical(length(subindex))
for (i in seq(along = subindex)) {
sweep.index[i] <- !any(sapply(x, identical, subindex[[i]]))
}
if (!any(sweep.index)) {
return(100 * n/base::sum(n, na.rm = TRUE))
}
else {
margin <- apply(n, which(sweep.index), base::sum,
na.rm = TRUE)
margin[margin == 0] <- NA
return(100 * sweep(n, which(sweep.index), margin,
"/"))
}
}
n.dim <- length(index)
tab.dim <- sapply(index, nlevels)
if (length(margins) == 1)
margins <- rep(margins, n.dim)
else if (length(margins) != n.dim)
stop("Incorrect length for margins argument")
fac.list <- vector("list", n.dim)
for (i in 1:n.dim) {
fac.list[[i]] <- if (margins[i])
c(0, 1)
else 1
}
subtable.grid <- as.matrix(expand.grid(fac.list))
ans.dim <- c(length(contents) - 1, tab.dim + margins)
ans <- numeric(prod(ans.dim))
for (i in 1:nrow(subtable.grid)) {
in.subtable <- as.logical(subtable.grid[i, ])
llim <- rep(1, n.dim) + ifelse(in.subtable, rep(0, n.dim),
tab.dim)
ulim <- tab.dim + ifelse(in.subtable, rep(0, n.dim),
rep(1, n.dim))
subindex <- index[in.subtable]
subtable.list <- if (missing(data))
eval(contents)
else eval(as.expression(contents), data)
for (j in 1:length(subtable.list)) {
ans[array.subset(ans.dim, c(j, llim), c(j, ulim))] <-
subtable.list[[j]]
}
}
ans <- array(ans, dim = ans.dim)
ans.dimnames <- lapply(index, levels)
names(ans.dimnames) <- index.labels
for (i in 1:length(index)) {
if (margins[i])
ans.dimnames[[i]] <- c(ans.dimnames[[i]], "Total")
}
dimnames(ans) <- c(list(contents = stat.labels), ans.dimnames)
attr(ans, "table.fun") <- table.fun
class(ans) <- c("stat.table", class(ans))
return(ans)
}
environment(stat.table2) <- environment(stat.table)
stat.table2(index=list(race),list(count(),mean(age.at.scanning)),margins=TRUE)
stat.table2(index=list(race),list(count(),sd(age.at.scanning)),margins=TRUE)
More information about the R-help
mailing list