[R] xtabs
Tomas Aragon
aragon at medepi.org
Thu Feb 14 06:04:43 CET 2002
At 04:29 PM 2/13/02 -0700, Michael Lynn Fugate wrote:
>Hi,
>
>In Splus if I call the function crosstabs() the output is a contigency
>table; in each cell of the table is printed: N, N/RowTotal,
>N/ColTotal, N/Total. N is the number of observations in each cell.
>
>The same call to xtabs() in R will produce the contigency table but the
>only entry in each cell is N.
>
>How can I get the same relative frequencies that crosstabs() gives?
>
>Thanks,
>mike
>
>--
>
>******************************************************************
>| Michael Fugate Phone: (505) 667-0398 |
>| Los Alamos National Laboratory Fax: (505) 665-5220 |
>| Group: CCS-3, Mail Stop: B265 e-mail: fugate at lanl.gov |
>| Los Alamos, NM 87545 |
>******************************************************************
This not an elegant solution. Once you have your matrix or array
contingency table, the following function will give you the marginal
totals, marginal distributions, and joint distributions as a list that you
can index to extract the specific result you want. Here's the function at work:
> x <- array(1:8,c(2,2,2))
> x
, , 1
[,1] [,2]
[1,] 1 3
[2,] 2 4
, , 2
[,1] [,2]
[1,] 5 7
[2,] 6 8
> marginals(x)
$margin.totals
, , 1
[,1] [,2] [,3]
[1,] 1 3 4
[2,] 2 4 6
[3,] 3 7 10
, , 2
[,1] [,2] [,3]
[1,] 5 7 12
[2,] 6 8 14
[3,] 11 15 26
$col.distrib
, , 1
[,1] [,2] [,3]
[1,] 0.3333333 0.4285714 0.4
[2,] 0.6666667 0.5714286 0.6
[3,] 1.0000000 1.0000000 1.0
, , 2
[,1] [,2] [,3]
[1,] 0.4545455 0.4666667 0.4615385
[2,] 0.5454545 0.5333333 0.5384615
[3,] 1.0000000 1.0000000 1.0000000
$row.distrib
, , 1
[,1] [,2] [,3]
[1,] 0.2500000 0.7500000 1
[2,] 0.3333333 0.6666667 1
[3,] 0.3000000 0.7000000 1
, , 2
[,1] [,2] [,3]
[1,] 0.4166667 0.5833333 1
[2,] 0.4285714 0.5714286 1
[3,] 0.4230769 0.5769231 1
$joint.distrib
, , 1
[,1] [,2] [,3]
[1,] 0.1 0.3 0.4
[2,] 0.2 0.4 0.6
[3,] 0.3 0.7 1.0
, , 2
[,1] [,2] [,3]
[1,] 0.1923077 0.2692308 0.4615385
[2,] 0.2307692 0.3076923 0.5384615
[3,] 0.4230769 0.5769231 1.0000000
Now here's the function:
marginals <- function(x){
#get marginal counts/distributions of matrix/array
#x=matrix or array
#Note: creates and uses indexing function
#
index <- function(x,MARGIN=1,INDEX=list(1:dim(x)[1]),drop=TRUE){
#Index any array along any dimension
#without need to specify number of dimensions!
#Tomas Aragon, S-Plus, Created 11/04/97, Edited
#x = array
#MARGIN = dimension to be subscripted
#INDEX = index, default returns x unchanged
#drop = FALSE, TRUE maintains same number of dimensions
dx <- dim(x)
ldx <- length(dim(x))
if(is.list(INDEX) == F){INDEX <- list(INDEX)}
dlist <- vector("list", ldx)
dlist[MARGIN] <- INDEX
for(i in (1:ldx)[ - MARGIN]) {
dlist[i] <- list(1:dx[i])
}
pp <- paste(dlist, ",", sep = "", collapse = "")
if(drop == TRUE){txt <- "drop=TRUE"}
else txt <- "drop=FALSE"
eval(parse(text = paste("x[", pp, txt, "]")))
}
#for matrices
if(is.null(dim(x))==TRUE) stop("Must be matrix or array")
dims <- dim(x)
dn <- dimnames(x)
nn <- names(dimnames(x))
#for matrices
if(length(dims)==2){
xx <- cbind(x,apply(x,1,sum))
result <- rbind(xx,apply(xx,2,sum))
if(!is.null(dn)==TRUE) {
dimnames(result) <- list(c(dn[[1]],"Total"),c(dn[[2]],"Total"))
}
if(!is.null(nn)==TRUE) {names(dimnames(result)) <- nn}
result2 <- sweep(result, 2, result[nrow(result), ], "/")
result3 <- sweep(result, 1, result[, ncol(result)], "/")
result4 <- result/result[nrow(result), ncol(result)]
}
#for arrays
if(length(dims)>=3){
dnlist <- vector("list",length(dims))
xx <- matrix(x,nrow=dims[1])
txxs <- t(rbind(xx,apply(xx,2,sum)))
txxs2 <- sweep(txxs,1,txxs[,ncol(txxs)],"/")
xxx <- matrix(txxs,dims[2])
xxxs <- (rbind(xxx,apply(xxx,2,sum)))
txxx <- t(matrix(xxxs,2*dims[2]+2))
result <- array(txxx,dim=c(dims[1:2]+1,dims[-c(1:2)]))
dnlist[[1]] <- c(dn[[1]],"Total")
dnlist[[2]] <- c(dn[[2]],"Total")
for(i in 3:length(dims)){dnlist[[i]] <- c(dn[[i]])}
if(!is.null(dn)==TRUE) {dimnames(result) <- dnlist}
if(!is.null(nn)==TRUE) {names(dimnames(result)) <- nn}
sub1 <- index(result,1,dim(result)[1],drop=FALSE)
sub2 <- index(result,2,dim(result)[2],drop=FALSE)
sub3 <- index(result,c(1,2),list(dim(result)[1],dim(result)[2]),drop=FALSE)
result2 <- sweep(result,(1:length(dims))[-1],sub1,"/")
result3 <- sweep(result,(1:length(dims))[-2],sub2,"/")
result4 <- sweep(result,(1:length(dims))[- c(1,2)],sub3,"/")
}
list(margin.totals=result,col.distrib=result2,row.distrib=result3,joint.distrib=result4)
}
Good luck,
Tomas
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-help mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-help-request at stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._
More information about the R-help
mailing list