[Rd] Using 'dimname names' in aperm() and apply()

Thu Jul 29 21:31:28 CEST 2010

I think that the "dimname names" of tables and arrays could make
aperm() and apply() (and probably some other functions) easier to use.
(dimname names are, for example, created by table() )

The use would be something like:
--
x <-table( from=sample(3,100,rep=T), to=sample(5,100,rep=T))
trans <- x / apply(x,"from",sum)

y <- aperm( trans, c("from","to") )
z <- aperm(y, c("to","from") )

res <-apply( y, "to", sum)
--

This makes the array much easier to handle than having to keep track
which dimension currently means what.

For aperm and apply, the change seems very simple - one new function,
and an additional line in each.
----------
dimnum.from.dimnamename <- function(A, dimensions)
{

  if( is.character(dimensions) ) {
    n <- names(dimnames(A))
    if( !is.null(n) ) {
        dimnum <- seq( along=n)
        names(dimnum) <-  n
        dimensions <- dimnum[dimensions]
      }
  }
  dimensions
}

aperm <- function (a, perm, resize = TRUE)
{
    if (missing(perm))
        perm <- integer(0L)
    perm <- dimnum.from.dimnamename( a, perm) # this line was added to aperm
    .Internal(aperm(a, perm, resize))
}

apply <-  function (X, MARGIN, FUN, ...)
{
    FUN <- match.fun(FUN)
    d <- dim(X)
    dl <- length(d)
    if (dl == 0L)
        stop("dim(X) must have a positive length")
    ds <- 1L:dl
    if (length(oldClass(X)))
        X <- if (dl == 2)
            as.matrix(X)
        else as.array(X)
    d <- dim(X)
    dn <- dimnames(X)

    MARGIN <- dimnum.from.dimnamename( X,MARGIN ) # this line was added to apply

    s.call <- ds[-MARGIN]
    s.ans <- ds[MARGIN]
    d.call <- d[-MARGIN]
    d.ans <- d[MARGIN]
    dn.call <- dn[-MARGIN]
    dn.ans <- dn[MARGIN]
    d2 <- prod(d.ans)
    if (d2 == 0L) {
        newX <- array(vector(typeof(X), 1L), dim = c(prod(d.call),
            1L))
        ans <- FUN(if (length(d.call) < 2L)
            newX[, 1]
        else array(newX[, 1L], d.call, dn.call), ...)
        return(if (is.null(ans)) ans else if (length(d.ans) <
            2L) ans[1L][-1L] else array(ans, d.ans, dn.ans))
    }
    newX <- aperm(X, c(s.call, s.ans))
    dim(newX) <- c(prod(d.call), d2)
    ans <- vector("list", d2)
    if (length(d.call) < 2L) {
        if (length(dn.call))
            dimnames(newX) <- c(dn.call, list(NULL))
        for (i in 1L:d2) {
            tmp <- FUN(newX[, i], ...)
            if (!is.null(tmp))
                ans[[i]] <- tmp
        }
    }
    else for (i in 1L:d2) {
        tmp <- FUN(array(newX[, i], d.call, dn.call), ...)
        if (!is.null(tmp))
            ans[[i]] <- tmp
    }
    ans.list <- is.recursive(ans[[1L]])
    l.ans <- length(ans[[1L]])
    ans.names <- names(ans[[1L]])
    if (!ans.list)
        ans.list <- any(unlist(lapply(ans, length)) != l.ans)
    if (!ans.list && length(ans.names)) {
        all.same <- sapply(ans, function(x) identical(names(x),
            ans.names))
        if (!all(all.same))
            ans.names <- NULL
    }
    len.a <- if (ans.list)
        d2
    else length(ans <- unlist(ans, recursive = FALSE))
    if (length(MARGIN) == 1L && len.a == d2) {
        names(ans) <- if (length(dn.ans[[1L]]))
            dn.ans[[1L]]
        return(ans)
    }
    if (len.a == d2)
        return(array(ans, d.ans, dn.ans))
    if (len.a && len.a%%d2 == 0L) {
        if (is.null(dn.ans))
            dn.ans <- vector(mode = "list", length(d.ans))
        dn.ans <- c(list(ans.names), dn.ans)
        return(array(ans, c(len.a%/%d2, d.ans), if (!all(sapply(dn.ans,
            is.null))) dn.ans))
    }
    return(ans)
}
----------

Thanks,

Michael

--
Michael Lachmann, Max Planck institute of evolutionary anthropology
Deutscher Platz. 6, 04103 Leipzig, Germany
Tel: +49-341-3550521, Fax: +49-341-3550555