R Code for X-Tab with Row/Col Proportions, Expected Vals and Tests
Marc Schwartz
MSchwartz@medanalytics.com
Sun, 28 Jul 2002 17:25:39 -0500
Recently, I noted a post and replies on R-Help from Professor Marc
Feldesman regarding a cross tabulation function that generates row and
column proportions, marginal values, expected cell values and tests for
independence presumably similar in a fashion to the output of the S-Plus
crosstabs() function or SAS Proc Freq.
Martin Maechler had posted some code in reply for folks to update and
translate.
In parallel, out of my own needs for something functionally similar, I
have been working for a while on a function that takes either a
two-dimensional matrix or two vectors and generates just such a table,
with output similar to the aforementioned S-Plus/SAS functions. Column
widths are adjusted based upon the dimnames and the digits argument and
the printed table is labeled with vector names if used.
The code for the function is below. If anyone wishes me to e-mail a
text file containing the code, let me know.
My goal would be to make this available to the R community as open
source, either independently or perhaps if deemed appropriate, as a base
function.
I would appreciate any constructive criticism of the code itself or the
output. My guess is that there may be more efficient means of performing
some of the manipulations and/or perhaps being consistent with core R
coding and output standards.
If there are any suggestions for improvement, I would be more than happy
to incorporate these and make them available to the community.
Thank you for your consideration and I look forward to any feedback.
Best regards,
Marc Schwartz
------------------------------------------------
CrossTable <- function (x, y, digits = 3, expected = FALSE,
correct = TRUE)
{
Syntax <- paste("\nSyntax:",
"CrossTable(x, y, digits = 3, drop = TRUE,",
" expected = FALSE, correct = TRUE)\n",
"x: A vector in a matrix or dataframe OR",
" if y not present, a two-dimension matrix",
"y: A vector in a matrix or dataframe.",
"digits: Number of digits after the decimal",
" point for cell proportions",
"expected: If TRUE, expected cell counts from the",
" Chi^2 will be included.",
"correct: If TRUE, the Yates correction will be",
" applied in the Chi^2 test.",
sep = "\n")
# Do error checking
if (missing(x))
stop(Syntax)
if (missing(y))
{
# if only x is specified, it must be a 2 dimensional matrix
if (length(dim(x)) != 2)
stop("x must be a 2 dimensional matrix if y is not given")
if(any(dim(x) < 2))
stop("x must have at least 2 rows and columns")
if(any(x < 0) || any(is.na(x)))
stop("all entries of x must be nonnegative and finite")
else
t <- x
}
else
{
if(length(x) != length(y))
stop("x and y must have the same length")
# Create Titles for Table From Vector Names
RowData <- deparse(substitute(x))
ColData <- deparse(substitute(y))
# Remove unused factor levels from vectors
x <- factor(x)
y <- factor(y)
if((nlevels(x) < 2) || (nlevels(y) < 2))
stop("x and y must have at least 2 levels")
# Generate table
t <- table(x, y)
}
# Generate cell proportion of row
CPR <- prop.table(t, 1)
# Generate cell proportion of col
CPC <- prop.table(t, 2)
# Generate cell proportion of total
CPT <- prop.table(t)
# Generate summary counts
GT <- sum(t)
RS <- rowSums(t)
CS <- colSums(t)
# Column and Row Total Headings
ColTotal <- "Column Total"
RowTotal <- "Row Total"
# Set consistent col widths based upon dimnames and table vals
CWidth <- max(digits + 2, c(nchar(t), nchar(dimnames(t)[[2]]),
nchar(RS), nchar(CS), nchar(RowTotal)))
RWidth <- max(c(nchar(dimnames(t)[[1]]), nchar(ColTotal)))
# Adjust first column width if Data Titles present
if (exists("RowData"))
RWidth <- max(RWidth, nchar(RowData))
# Create row separators
RowSep <- paste(rep("-", CWidth + 2), collapse = "")
RowSep1 <- paste(rep("-", RWidth + 1), collapse = "")
SpaceSep1 <- paste(rep(" ", RWidth), collapse = "")
SpaceSep2 <- paste(rep(" ", CWidth), collapse = "")
# Create formatted Names
FirstCol <- formatC(dimnames(t)[[1]], width = RWidth, format = "s")
ColTotal <- formatC(ColTotal, width = RWidth, format = "s")
RowTotal <- formatC(RowTotal, width = CWidth, format = "s")
# Perform Chi-Square Test
CST <- chisq.test(t, correct = correct)
# Perform Fisher Tests
FTt <- fisher.test(t, alternative = "two.sided")
# If 2 x 2, include one tailed values
if (all(dim(t) == 2))
{
FTl <- fisher.test(t, alternative = "less")
FTg <- fisher.test(t, alternative = "greater")
}
# Print Cell Layout
cat(rep("\n", 2))
cat("|-----------------|\n")
cat("| N |\n")
if (expected)
cat("| Expected N |\n")
cat("| N / Row Total |\n")
cat("| N / Col Total |\n")
cat("| N / Table Total |\n")
cat("|-----------------|\n")
cat(rep("\n", 2))
cat("Total Observations in Table: ", GT, "\n")
cat(rep("\n", 2))
# Print Column headings
# print vector names if present
if (exists("RowData"))
{
cat(SpaceSep1, "|", ColData, "\n")
cat(formatC(RowData, width = RWidth, format = "s"),
formatC(dimnames(t)[[2]], width = CWidth, format = "s"),
RowTotal, sep = " | ", collapse = "\n")
}
else
cat(SpaceSep1,
formatC(dimnames(t)[[2]], width = CWidth, format = "s"),
RowTotal, sep = " | ", collapse = "\n")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n")
# Print table cells
for (i in 1:nrow(t))
{
# print N
cat(FirstCol[i],
formatC(c(t[i, ], RS[i]), width = CWidth),
sep = " | ", collapse = "\n")
# print Expected N?
if (expected)
cat(SpaceSep1,
formatC(CST$expected[i, ], digits = digits,
format = "f", width = CWidth),
SpaceSep2, sep = " | ", collapse = "\n")
# print cell row proportions
cat(SpaceSep1,
formatC(c(CPR[i, ], RS[i] / GT), width = CWidth,
digits = digits, format = "f"),
sep = " | ", collapse = "\n")
# print cell col proportions
cat(SpaceSep1,
formatC(CPC[i, ], width = CWidth, digits = digits,
format = "f"),
SpaceSep2, sep = " | ", collapse = "\n")
# print cell Table proportions
cat(SpaceSep1,
formatC(CPT[i, ], width = CWidth, digits = digits,
format = "f"),
SpaceSep2, sep = " | ", collapse = "\n")
# print row separator
cat(RowSep1, rep(RowSep, ncol(t) + 1),
sep = "|", collapse = "\n")
}
# Print Column Totals
cat(ColTotal,
formatC(c(CS, GT), width = CWidth),
sep = " | ", collapse = "\n")
# Print col proportions
cat(SpaceSep1,
formatC(CS / GT, width = CWidth, digits = digits, format = "f"),
SpaceSep2, sep = " | ", collapse = "\n")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n")
# Print Statistics
cat(rep("\n", 2))
cat("Tests for Independence of All Table Factors\n\n\n")
cat(CST$method,"\n\n")
cat("Chi^2 = ", CST$statistic, " d.f. = ", CST$parameter,
" p = ", CST$p.value, "\n")
cat(rep("\n", 2))
cat("Fisher's Exact Test for Count Data\n\n")
# if 2 x 2 table print one and two-tailed values
if (all(dim(t) == 2))
{
cat("Sample estimate odds ratio: ", FTt$estimate, "\n\n")
cat("Alternative hypothesis: true odds ratio is not equal to 1\n")
cat("p = ", FTt$p.value, "\n")
cat("95% confidence interval: ", FTt$conf.int, "\n\n")
cat("Alternative hypothesis: true odds ratio is less than 1\n")
cat("p = ", FTl$p.value, "\n")
cat("95% confidence interval: ", FTl$conf.int, "\n\n")
cat("Alternative hypothesis: true odds ratio is greater than 1\n")
cat("p = ", FTg$p.value, "\n")
cat("95% confidence interval: ", FTg$conf.int, "\n\n")
}
else
{
cat("Alternative hypothesis: two.sided\n")
cat("p = ", FTt$p.value, "\n")
}
cat(rep("\n", 2))
}
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-devel-request@stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._