R Code for X-Tab with Row/Col Proportions, Expected Vals and Tests
Marc Schwartz
MSchwartz@medanalytics.com
Sat, 27 Jul 2002 13:43:34 -0500
This is a multi-part message in MIME format.
------=_NextPart_000_0042_01C23573.9AF517A0
Content-Type: text/plain;
charset="us-ascii"
Content-Transfer-Encoding: 7bit
Recently, I noted a post and replies on R-Help from Professor Marc
Feldesman regarding a cross tabulation function that generates row and
column proportions, marginal values, expected cell values and tests for
independence presumably similar in a fashion to the output of the S-Plus
crosstabs() function or the SAS Proc Freq.
Martin Maechler had posted some code in reply for folks to update and
translate.
In parallel, out of my own needs for something functionally similar, I
have been working for a while on a function that takes either a
two-dimensional matrix or two vectors and generates just such a table,
with output similar to the aforementioned S-Plus/SAS functions. The
code for the function is attached.
My goal would be to make this available to the R community as open
source, either independently or perhaps if deemed appropriate, as a base
function.
I would appreciate any constructive criticism of the code itself or the
output. My guess is that there may be more efficient means of performing
some of the manipulations and/or perhaps being consistent with R coding
and output standards.
If there are any suggestions for improvement, I would be more than happy
to incorporate these and make them available to the community.
Thank you for your consideration and I look forward to any feedback.
Best regards,
Marc Schwartz
------=_NextPart_000_0042_01C23573.9AF517A0
Content-Type: text/plain;
name="CrossTable.r"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
filename="CrossTable.r"
CrossTable <- function (x, y, digits =3D 3, expected =3D FALSE, correct =
=3D TRUE)
{
Syntax <- paste("Syntax: CrossTable(x, y, digits =3D 3, drop =3D TRUE, =
expected =3D FALSE, correct =3D TRUE)",
"x: A vector in a matrix or a dataframe OR if y =
not specified, a two-dimensional matrix",
"y: A vector in a matrix or a dataframe.",
"digits: Number of digits after the decimal point =
for cell proportions",
"expected: If TRUE, expected cell counts from the =
Chi^2 will be included.",
"correct: If TRUE, the Yates continuity correction =
will be applied in the Chi^2 test.", sep =3D "\n")
# Do error checking
if (missing(x))
stop(Syntax)
if (missing(y))
{
# if only x is specified, it must be a 2 dimensional matrix
if (length(dim(x)) !=3D 2)
stop("x must be a 2 dimensional matrix if y is not given")
if(any(dim(x) < 2))
stop("x must have at least 2 rows and columns")
if(any(x < 0) || any(is.na(x)))
stop("all entries of x must be nonnegative and finite")
else
t <- x
}
else
{
if(length(x) !=3D length(y))
stop("x and y must have the same length")
# Create Titles for Table From Vector Names
RowData <- deparse(substitute(x))
ColData <- deparse(substitute(y))
# Remove unused factor levels from vectors
x <- factor(x)
y <- factor(y)
if((nlevels(x) < 2) || (nlevels(y) < 2))
stop("x and y must have at least 2 levels")
# Generate table
t <- table(x, y)
}
# Generate cell proportion of row
CPR <- prop.table(t, 1)
# Generate cell proportion of col
CPC <- prop.table(t, 2)
# Generate cell proportion of total
CPT <- prop.table(t)
# Generate summary counts
GT <- sum(t)
RS <- rowSums(t)
CS <- colSums(t)
# Column and Row Total Headings
ColTotal <- "Column Total"
RowTotal <- "Row Total"
# Set consistent column widths based upon dimnames and table values
CWidth <- max(digits + 2, c(nchar(t), nchar(dimnames(t)[[2]]), =
nchar(RS), nchar(CS), nchar(RowTotal)))
RWidth <- max(c(nchar(dimnames(t)[[1]]), nchar(ColTotal)))
# Adjust first column width if Data Titles present
if (exists("RowData"))
RWidth <- max(RWidth, nchar(RowData))
# Create row separators
RowSep <- paste(rep("-", CWidth + 2), collapse =3D "")
RowSep1 <- paste(rep("-", RWidth + 1), collapse =3D "")
SpaceSep1 <- paste(rep(" ", RWidth), collapse =3D "")
SpaceSep2 <- paste(rep(" ", CWidth), collapse =3D "")
# Create formatted Names
FirstCol <- formatC(dimnames(t)[[1]], width =3D RWidth, format =3D =
"s")
ColTotal <- formatC(ColTotal, width =3D RWidth, format =3D "s")
RowTotal <- formatC(RowTotal, width =3D CWidth, format =3D "s")
# Perform Chi-Square Test
CST <- chisq.test(t, correct =3D correct)
# Perform Fisher Tests
FTt <- fisher.test(t, alternative =3D "two.sided")
if (all(dim(t) =3D=3D 2))
{
FTl <- fisher.test(t, alternative =3D "less")
FTg <- fisher.test(t, alternative =3D "greater")
}
# Print Cell Layout
cat(rep("\n", 2))
cat("|-----------------|\n")
cat("| N |\n")
if (expected)
cat("| Expected N |\n")
cat("| N / Row Total |\n")
cat("| N / Col Total |\n")
cat("| N / Table Total |\n")
cat("|-----------------|\n")
cat(rep("\n", 2))
cat("Total Observations in Table: ", GT, "\n")
cat(rep("\n", 2))
# Print Column headings
if (exists("RowData"))
{
cat(SpaceSep1, "|", ColData, "\n")
cat(formatC(RowData, width =3D RWidth, format =3D "s"), =
formatC(dimnames(t)[[2]], width =3D CWidth, format =3D "s"), RowTotal, =
sep =3D " | ", collapse =3D "\n")
}
else
cat(SpaceSep1, formatC(dimnames(t)[[2]], width =3D CWidth, format =
=3D "s"), RowTotal, sep =3D " | ", collapse =3D "\n")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep =3D "|", collapse =3D "\n")
# Print table cells
for (i in 1:nrow(t))
{
cat(FirstCol[i], formatC(c(t[i, ], RS[i]), width =3D CWidth), sep =
=3D " | ", collapse =3D "\n")
if (expected)
cat(SpaceSep1, formatC(CST$expected[i, ], digits =3D digits, =
format =3D "f", width =3D CWidth), SpaceSep2, sep =3D " | ", collapse =
=3D "\n")
cat(SpaceSep1, formatC(c(CPR[i, ], RS[i] / GT), width =3D CWidth, =
digits =3D digits, format =3D "f"), sep =3D " | ", collapse =3D "\n")
cat(SpaceSep1, formatC(CPC[i, ], width =3D CWidth, digits =3D =
digits, format =3D "f"), SpaceSep2, sep =3D " | ", collapse =3D "\n")
cat(SpaceSep1, formatC(CPT[i, ], width =3D CWidth, digits =3D =
digits, format =3D "f"), SpaceSep2, sep =3D " | ", collapse =3D "\n")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep =3D "|", collapse =3D =
"\n")
}
# Print Column Totals
cat(ColTotal, formatC(c(CS, GT), width =3D CWidth), sep =3D " | ", =
collapse =3D "\n")
cat(SpaceSep1, formatC(CS / GT, width =3D CWidth, digits =3D digits, =
format =3D "f"), SpaceSep2, sep =3D " | ", collapse =3D "\n")
cat(RowSep1, rep(RowSep, ncol(t) + 1), sep =3D "|", collapse =3D "\n")
# Print Statistics
cat(rep("\n", 2))
cat("Tests for Independence of All Table Factors\n\n\n")
cat(CST$method,"\n\n")
cat("Chi^2 =3D ", CST$statistic, " d.f. =3D ", CST$parameter, " =
p =3D ", CST$p.value, "\n")
cat(rep("\n", 2))
cat("Fisher's Exact Test for Count Data\n\n")
if (all(dim(t) =3D=3D 2))
{
cat("Sample estimate odds ratio: ", FTt$estimate, "\n\n")
cat("Alternative hypothesis: true odds ratio is not equal to 1\n")
cat("p =3D ", FTt$p.value, "\n")
cat("95% confidence interval: ", FTt$conf.int, "\n\n")
cat("Alternative hypothesis: true odds ratio is less than 1\n")
cat("p =3D ", FTl$p.value, "\n")
cat("95% confidence interval: ", FTl$conf.int, "\n\n")
cat("Alternative hypothesis: true odds ratio is greater than 1\n")
cat("p =3D ", FTg$p.value, "\n")
cat("95% confidence interval: ", FTg$conf.int, "\n\n")
}
else
{
cat("Alternative hypothesis: two.sided\n")
cat("p =3D ", FTt$p.value, "\n")
}
cat(rep("\n", 2))
}
------=_NextPart_000_0042_01C23573.9AF517A0--
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-devel-request@stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._