[R] Adding Functionality to stat.table in Epi

Tue Aug 9 20:04:55 CEST 2005

> After you copy stat.table to stat.table2 and modify stat.table2
> try:
>
>> environment(stat.table2) <- environment(stat.table)
>
> (you should only need to do that 1 time after creating/editing
> stat.table2).
>
> hope this helps,
>
> Greg Snow, Ph.D.
> Statistical Data Center, LDS Hospital
> Intermountain Health Care
> greg.snow at ihc.com
> (801) 408-8111
>
>>>> <rab45+ at pitt.edu> 08/09/05 11:16AM >>>
> The stat.table function in the Epi package won't do standard
> deviations.
> It didn't seem that it would be difficult to add an "sd" function to
> the
> stat.table function. Following the example for the mean, I set up a
> similar function for the sd (and included it as an options) but it
> just
> won't work. (I tried sending messages to the Epi mailing list after
> subscribing but my mail is always returned. I don't have the exact
> error
> messages at the moment or I would post them.)
>
> Even if I just copy stat.table to stat.table2 and try to run
> stat.table2,
> I get:
>
>>
> stat.table2(index=list(race,gender),list(count(),percent(race)),margins=TRUE)
> Error: couldn't find function "array.subset"
>
> I can't find any "array.subset" function, yet the original stat.table
> works just fine.
>
> I've copied other functions and made changes to them and they would
> work
> just fine. I must be missing something here.
>
> Any insights would be appreciated.
>
> Rick B.
>
> ______________________________________________
> R-help at stat.math.ethz.ch mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide!
> http://www.R-project.org/posting-guide.html
>
> ______________________________________________
> R-help at stat.math.ethz.ch mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide!
> http://www.R-project.org/posting-guide.html
>

Thanks Greg. That helps but I still get the following error message:

> stat.table2(index=list(race),list(count(),sd(age.at.scanning)),margins=TRUE)
Error in if (digits < 0) digits <- 6 : missing value where TRUE/FALSE needed

Rick

Below is the code (sorry it's kind of long). The mean function works but
the sd function produces the error message:

stat.table2 <- function (index, contents = count(), data, margins = FALSE)
{
    index.sub <- substitute(index)
    index <- if (missing(data))
        eval(index)
    else eval(index.sub, data)
    deparse.name <- function(x) if (is.symbol(x))
        as.character(x)
    else ""
    if (is.list(index)) {
        if (is.call(index.sub)) {
            index.names <- names(index.sub)
            fixup <- if (is.null(index.names))
                seq(along = index.sub)
            else index.names == ""
            dep <- sapply(index.sub[fixup], deparse.name)
            if (is.null(index.names))
                index.labels <- dep
            else {
                index.labels <- index.names
                index.labels[fixup] <- dep
            }
            index.labels <- index.labels[-1]
        }
        else {
            index.labels <- if (!is.null(names(index))) {
                names(index)
            }
            else {
                rep("", length(index))
            }
        }
    }
    else {
        index.labels <- deparse.name(index.sub)
    }
    if (!is.list(index))
        index <- list(index)
    index <- lapply(index, as.factor)
    contents <- substitute(contents)
    if (!identical(deparse(contents[[1]]), "list")) {
        contents <- call("list", contents)
    }
    valid.functions <- c("count", "mean", "sd","weighted.mean", "sum",
        "quantile", "median", "IQR", "max", "min", "ratio", "percent")
    table.fun <- character(length(contents) - 1)
    for (i in 2:length(contents)) {
        if (!is.call(contents[[i]]))
            stop("contents must be a list of function calls")
        FUN <- deparse(contents[[i]][[1]])
        if (!FUN %in% valid.functions)
            stop(paste("Function", FUN, "not permitted in stat.table"))
        else table.fun[i - 1] <- FUN
    }
    stat.labels <- sapply(contents, deparse)[-1]
    content.names <- names(contents)
    if (!is.null(content.names)) {
        for (i in 2:length(content.names)) {
            if (nchar(content.names[i]) > 0)
                stat.labels[i - 1] <- content.names[i]
        }
    }
    count <- function(id) {
        if (missing(id)) {
            id <- seq(along = index[[1]])
        }
        y <- tapply(id, INDEX = subindex, FUN = function(x)
length(unique(x)))
        y[is.na(y)] <- 0
        return(y)
    }
    mean <- function(x, trim = 0, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = base::mean, trim = trim,
            na.rm = na.rm)
    }
    sd <- function(x, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = stats::sd,
            na.rm = na.rm)
    }

    weighted.mean <- function(x, w, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = stats::weighted.mean,
            w = w, na.rm = na.rm)
    }
    sum <- function(..., na.rm = TRUE) {
        tapply(..., INDEX = subindex, FUN = base::sum, na.rm = na.rm)
    }
    quantile <- function(x, probs, na.rm = TRUE, names = TRUE,
        type = 7, ...) {
        if (length(probs > 1))
            stop("The quantile function only accepts scalar prob values
within stat.table")
        tapply(x, INDEX = subindex, FUN = stats::quantile, probs = prob,
            na.rm = na.rm, names = names, type = type, ...)
    }
    median <- function(x, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = stats::median, na.rm = na.rm)
    }
    IQR <- function(x, na.rm = TRUE) {
        tapply(x, INDEX = subindex, FUN = stats::IQR, na.rm = na.rm)
    }
    max <- function(..., na.rm = TRUE) {
        tapply(..., INDEX = subindex, FUN = base::max, na.rm = na.rm)
    }
    min <- function(..., na.rm = TRUE) {
        tapply(..., INDEX = subindex, FUN = base::min, na.rm = na.rm)
    }
    ratio <- function(d, y, scale = 1, na.rm = TRUE) {
        if (length(scale) != 1)
            stop("Scale parameter must be a scalar")
        if (na.rm) {
            w <- (!is.na(d) & !is.na(y))
            tab1 <- tapply(d * w, INDEX = subindex, FUN = base::sum,
                na.rm = TRUE)
            tab2 <- tapply(y * w, INDEX = subindex, FUN = base::sum,
                na.rm = TRUE)
        }
        else {
            tab1 <- tapply(d, INDEX = subindex, FUN = base::sum,
                na.rm = FALSE)
            tab2 <- tapply(y, INDEX = subindex, FUN = base::sum,
                na.rm = FALSE)
        }
        return(scale * tab1/tab2)
    }
    percent <- function(...) {
        x <- list(...)
        if (length(x) == 0)
            stop("No variables to calculate percent")
        n <- count()
        sweep.index <- logical(length(subindex))
        for (i in seq(along = subindex)) {
            sweep.index[i] <- !any(sapply(x, identical, subindex[[i]]))
        }
        if (!any(sweep.index)) {
            return(100 * n/base::sum(n, na.rm = TRUE))
        }
        else {
            margin <- apply(n, which(sweep.index), base::sum,
                na.rm = TRUE)
            margin[margin == 0] <- NA
            return(100 * sweep(n, which(sweep.index), margin,
                "/"))
        }
    }
    n.dim <- length(index)
    tab.dim <- sapply(index, nlevels)
    if (length(margins) == 1)
        margins <- rep(margins, n.dim)
    else if (length(margins) != n.dim)
        stop("Incorrect length for margins argument")
    fac.list <- vector("list", n.dim)
    for (i in 1:n.dim) {
        fac.list[[i]] <- if (margins[i])
            c(0, 1)
        else 1
    }
    subtable.grid <- as.matrix(expand.grid(fac.list))
    ans.dim <- c(length(contents) - 1, tab.dim + margins)
    ans <- numeric(prod(ans.dim))
    for (i in 1:nrow(subtable.grid)) {
        in.subtable <- as.logical(subtable.grid[i, ])
        llim <- rep(1, n.dim) + ifelse(in.subtable, rep(0, n.dim),
            tab.dim)
        ulim <- tab.dim + ifelse(in.subtable, rep(0, n.dim),
            rep(1, n.dim))
        subindex <- index[in.subtable]
        subtable.list <- if (missing(data))
            eval(contents)
        else eval(as.expression(contents), data)
        for (j in 1:length(subtable.list)) {
            ans[array.subset(ans.dim, c(j, llim), c(j, ulim))] <-
subtable.list[[j]]
        }
    }
    ans <- array(ans, dim = ans.dim)
    ans.dimnames <- lapply(index, levels)
    names(ans.dimnames) <- index.labels
    for (i in 1:length(index)) {
        if (margins[i])
            ans.dimnames[[i]] <- c(ans.dimnames[[i]], "Total")
    }
    dimnames(ans) <- c(list(contents = stat.labels), ans.dimnames)
    attr(ans, "table.fun") <- table.fun
    class(ans) <- c("stat.table", class(ans))
    return(ans)
}
environment(stat.table2) <- environment(stat.table)

stat.table2(index=list(race),list(count(),mean(age.at.scanning)),margins=TRUE)

stat.table2(index=list(race),list(count(),sd(age.at.scanning)),margins=TRUE)