summarize <- function(X, by, FUN, ..., stat.name=deparse(substitute(X)), type=c('variables','matrix')) { .R. <- TRUE oldUnclass <- unclass type <- match.arg(type) if(missing(stat.name) && length(stat.name)>1) stat.name <- 'X' # 2Mar00 if(!is.list(by)) { nameby <- deparse(substitute(by)) by <- list(by) names(by) <- if(length(nameby)==1) nameby else 'by' # 2Mar00 } nby <- length(by) bylabel <- sapply(by, label) # bylabel[bylabel==''] <- names(by) 21Mar00 bylabel <- ifelse(bylabel=='', names(by), bylabel) typical.computation <- FUN(X, ...) nc <- length(typical.computation) xlabel <- deparse(substitute(X)) if(length(xlabel)!=1) xlabel <- 'X' # 2Mar00 if(length(xlab <- attr(X,'label'))) xlabel <- xlab if(!.R.) # 21Mar01: S-Plus converts factor to integer during paste for(i in 1:nby) if(is.category(by[[i]])) by[[i]] <- as.character(by[[i]]) ## is.category added 9May01 byc <- do.call('paste',c(by,sep='|')) ## split does not handle matrices msplit <- function(x, group) { if(is.matrix(x)) { group <- as.factor(group) l <- levels(group) res <- vector('list', length(l)) names(res) <- l for(j in l) res[[j]] <- x[group==j,,drop=F] res } else split(x, group) } if(nc==1) r <- sapply(msplit(X, byc), FUN, ..., simplify=TRUE) else { r <- sapply(msplit(X, byc), FUN, ..., simplify=TRUE) r <- matrix(unlist(r), nrow=nc, dimnames=dimnames(r)) ## 2Mar00: added unlist because sapply was creating an array of ## lists in S+2000 } if(.R.) { # someday can use unpaste defined in Misc.s ans <- strsplit(if(nc==1)names(r) else dimnames(r)[[2]],'\\|') ## strsplit returns list "transpose" of unpaste bb <- matrix(unlist(ans), nrow=nby) ans <- vector('list', nby) for(jj in 1:nby) ans[[jj]] <- bb[jj,] } else { ans <- if(nc==1)names(r) else dimnames(r)[[2]] if(nby==1) ans <- list(ans) else # nby==1 9May01 ans <- unpaste(ans, sep='|') # 21Mar01 nby>1 9May01 } names(ans) <- names(by) if(nc>1 && (nc != nrow(r))) stop('program logic error') snames <- names(typical.computation) ## if(!missing(stat.name) | (missing(stat.name) & length(snames)==0)) ## snames <- if(length(stat.name)==nc)stat.name else ## paste(stat.name[1],1:nc,sep='') if(!length(snames)) snames <- paste(stat.name,1:nc,sep='') if(length(stat.name)==1)snames[1] <- stat.name else snames <- stat.name # wrn <- .Options$warn # .Options$warn <- -1 6Aug00 oldopt <- options(warn=-1) on.exit(options(oldopt)) notna <- rep(TRUE, length(ans[[1]])) for(i in 1:length(by)) { byi <- by[[i]] ansi <- ans[[i]] if(is.category(byi)) { if(!is.character(ansi)) stop('program logic error:ansi not character') # ansi <- structure(as.numeric(ansi), 21Mar01 # levels=levels(byi), class='factor') } else if(is.numeric(byi)) ansi <- as.numeric(ansi) names(ansi) <- NULL label(ansi) <- bylabel[i] ans[[i]] <- ansi notna <- notna & !is.na(ansi) } if(type=='matrix' || nc==1) { ans[[stat.name]] <- if(nc==1) structure(r,names=NULL) else structure(t(r), dimnames=list(NULL, snames), names=NULL) label(ans[[stat.name]]) <- xlabel } else { snames <- make.names(snames) for(i in 1:length(snames)) { ans[[snames[i]]] <- structure(r[i,], names=NULL) label(ans[[snames[i]]]) <- xlabel } } notna <- notna & !is.na(if(nc==1) r else t(r) %*% rep(1,nc)) aaa <- structure(ans, class='data.frame', row.names=1:length(ans[[1]])) ans <- structure(ans, class='data.frame', row.names=1:length(ans[[1]]))[notna,] iorder <- do.call('order', structure(oldUnclass(ans)[1:nby],names=NULL)) ## order can bomb if data frame given (preserves names) ans[iorder,] }