R Code for X-Tab with Row/Col Proportions, Expected Vals and Tests

Marc Schwartz MSchwartz@medanalytics.com
Sat, 27 Jul 2002 13:43:34 -0500


This is a multi-part message in MIME format.

------=_NextPart_000_0042_01C23573.9AF517A0
Content-Type: text/plain;
	charset="us-ascii"
Content-Transfer-Encoding: 7bit


Recently, I noted a post and replies on R-Help from Professor Marc
Feldesman regarding a cross tabulation function that generates row and
column proportions, marginal values, expected cell values and tests for
independence presumably similar in a fashion to the output of the S-Plus
crosstabs() function or the SAS Proc Freq.

Martin Maechler had posted some code in reply for folks to update and
translate.

In parallel, out of my own needs for something functionally similar, I
have been working for a while on a function that takes either a
two-dimensional matrix or two vectors and generates just such a table,
with output similar to the aforementioned S-Plus/SAS functions.  The
code for the function is attached.

My goal would be to make this available to the R community as open
source, either independently or perhaps if deemed appropriate, as a base
function.

I would appreciate any constructive criticism of the code itself or the
output. My guess is that there may be more efficient means of performing
some of the manipulations and/or perhaps being consistent with R coding
and output standards.

If there are any suggestions for improvement, I would be more than happy
to incorporate these and make them available to the community.

Thank you for your consideration and I look forward to any feedback.

Best regards,

Marc Schwartz


------=_NextPart_000_0042_01C23573.9AF517A0
Content-Type: text/plain;
	name="CrossTable.r"
Content-Transfer-Encoding: quoted-printable
Content-Disposition: attachment;
	filename="CrossTable.r"

CrossTable <- function (x, y, digits =3D 3, expected =3D FALSE, correct =
=3D TRUE)
{
  Syntax <- paste("Syntax: CrossTable(x, y, digits =3D 3, drop =3D TRUE, =
expected =3D FALSE, correct =3D TRUE)",
                  "x:        A vector in a matrix or a dataframe OR if y =
not specified, a two-dimensional matrix",
                  "y:        A vector in a matrix or a dataframe.",
                  "digits:   Number of digits after the decimal point =
for cell proportions",
                  "expected: If TRUE, expected cell counts from the =
Chi^2 will be included.",
                  "correct:  If TRUE, the Yates continuity correction =
will be applied in the Chi^2 test.", sep =3D "\n")

  # Do error checking
  if (missing(x))
    stop(Syntax)

  if (missing(y))
  {
    # if only x is specified, it must be a 2 dimensional matrix
    if (length(dim(x)) !=3D 2)
      stop("x must be a 2 dimensional matrix if y is not given")
    if(any(dim(x) < 2))
      stop("x must have at least 2 rows and columns")
    if(any(x < 0) || any(is.na(x)))
      stop("all entries of x must be nonnegative and finite")
    else
      t <- x
  }
  else
  {
    if(length(x) !=3D length(y))
      stop("x and y must have the same length")

    # Create Titles for Table From Vector Names
    RowData <- deparse(substitute(x))
    ColData <- deparse(substitute(y))

    # Remove unused factor levels from vectors
    x <- factor(x)
    y <- factor(y)

    if((nlevels(x) < 2) || (nlevels(y) < 2))
      stop("x and y must have at least 2 levels")

    # Generate table
    t <- table(x, y)
  }

  # Generate cell proportion of row
  CPR <- prop.table(t, 1)

  # Generate cell proportion of col
  CPC <- prop.table(t, 2)

  # Generate cell proportion of total
  CPT <- prop.table(t)

  # Generate summary counts
  GT <- sum(t)
  RS <- rowSums(t)
  CS <- colSums(t)

  # Column and Row Total Headings
  ColTotal <- "Column Total"
  RowTotal <- "Row Total"

  # Set consistent column widths based upon dimnames and table values
  CWidth <- max(digits + 2, c(nchar(t), nchar(dimnames(t)[[2]]), =
nchar(RS), nchar(CS), nchar(RowTotal)))
  RWidth <- max(c(nchar(dimnames(t)[[1]]), nchar(ColTotal)))

  # Adjust first column width if Data Titles present
  if (exists("RowData"))
    RWidth <- max(RWidth, nchar(RowData))

  # Create row separators
  RowSep <- paste(rep("-", CWidth + 2), collapse =3D "")
  RowSep1 <- paste(rep("-", RWidth + 1), collapse =3D "")
  SpaceSep1 <- paste(rep(" ", RWidth), collapse =3D "")
  SpaceSep2 <- paste(rep(" ", CWidth), collapse =3D "")

  # Create formatted Names
  FirstCol <- formatC(dimnames(t)[[1]], width =3D RWidth, format =3D =
"s")
  ColTotal <- formatC(ColTotal, width =3D RWidth, format =3D "s")
  RowTotal <- formatC(RowTotal, width =3D CWidth, format =3D "s")

  # Perform Chi-Square Test
  CST <- chisq.test(t, correct =3D correct)

  # Perform Fisher Tests
  FTt <- fisher.test(t, alternative =3D "two.sided")

  if (all(dim(t) =3D=3D 2))
  {
    FTl <- fisher.test(t, alternative =3D "less")
    FTg <- fisher.test(t, alternative =3D "greater")
  }

  # Print Cell Layout

  cat(rep("\n", 2))
  cat("|-----------------|\n")
  cat("|               N |\n")
  if (expected)
    cat("|      Expected N |\n")
  cat("|   N / Row Total |\n")
  cat("|   N / Col Total |\n")
  cat("| N / Table Total |\n")
  cat("|-----------------|\n")
  cat(rep("\n", 2))
  cat("Total Observations in Table: ", GT, "\n")
  cat(rep("\n", 2))

  # Print Column headings

  if (exists("RowData"))
  {
    cat(SpaceSep1, "|", ColData, "\n")
    cat(formatC(RowData, width =3D RWidth, format =3D "s"), =
formatC(dimnames(t)[[2]], width =3D CWidth, format =3D "s"), RowTotal, =
sep =3D " | ", collapse =3D "\n")
  }
  else
    cat(SpaceSep1, formatC(dimnames(t)[[2]], width =3D CWidth, format =
=3D "s"), RowTotal, sep =3D " | ", collapse =3D "\n")

  cat(RowSep1, rep(RowSep, ncol(t) + 1), sep =3D "|", collapse =3D "\n")

  # Print table cells
  for (i in 1:nrow(t))
  {
    cat(FirstCol[i], formatC(c(t[i, ], RS[i]), width =3D CWidth), sep =
=3D " | ", collapse =3D "\n")

    if (expected)
      cat(SpaceSep1, formatC(CST$expected[i, ], digits =3D digits, =
format =3D "f", width =3D CWidth), SpaceSep2, sep =3D " | ", collapse =
=3D "\n")

    cat(SpaceSep1, formatC(c(CPR[i, ], RS[i] / GT), width =3D CWidth, =
digits =3D digits, format =3D "f"), sep =3D " | ", collapse =3D "\n")
    cat(SpaceSep1, formatC(CPC[i, ], width =3D CWidth, digits =3D =
digits, format =3D "f"), SpaceSep2, sep =3D " | ", collapse =3D "\n")
    cat(SpaceSep1, formatC(CPT[i, ], width =3D CWidth, digits =3D =
digits, format =3D "f"), SpaceSep2, sep =3D " | ", collapse =3D "\n")

    cat(RowSep1, rep(RowSep, ncol(t) + 1), sep =3D "|", collapse =3D =
"\n")
  }

  # Print Column Totals
  cat(ColTotal, formatC(c(CS, GT), width =3D CWidth), sep =3D " | ", =
collapse =3D "\n")
  cat(SpaceSep1, formatC(CS / GT, width =3D CWidth, digits =3D digits, =
format =3D "f"), SpaceSep2, sep =3D " | ", collapse =3D "\n")
  cat(RowSep1, rep(RowSep, ncol(t) + 1), sep =3D "|", collapse =3D "\n")

  # Print Statistics

  cat(rep("\n", 2))
  cat("Tests for Independence of All Table Factors\n\n\n")

  cat(CST$method,"\n\n")
  cat("Chi^2 =3D ", CST$statistic, "    d.f. =3D ", CST$parameter, "    =
p =3D ", CST$p.value, "\n")

  cat(rep("\n", 2))
  cat("Fisher's Exact Test for Count Data\n\n")

  if (all(dim(t) =3D=3D 2))
  {
    cat("Sample estimate odds ratio: ", FTt$estimate, "\n\n")

    cat("Alternative hypothesis: true odds ratio is not equal to 1\n")
    cat("p =3D ", FTt$p.value, "\n")
    cat("95% confidence interval: ", FTt$conf.int, "\n\n")

    cat("Alternative hypothesis: true odds ratio is less than 1\n")
    cat("p =3D ", FTl$p.value, "\n")
    cat("95% confidence interval: ", FTl$conf.int, "\n\n")

    cat("Alternative hypothesis: true odds ratio is greater than 1\n")
    cat("p =3D ", FTg$p.value, "\n")
    cat("95% confidence interval: ", FTg$conf.int, "\n\n")
  }
  else
  {
    cat("Alternative hypothesis: two.sided\n")
    cat("p =3D ", FTt$p.value, "\n")
  }

  cat(rep("\n", 2))
}

------=_NextPart_000_0042_01C23573.9AF517A0--

-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-devel-request@stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._