R Code for X-Tab with Row/Col Proportions, Expected Vals and Tests

Marc Schwartz MSchwartz@medanalytics.com
Sun, 28 Jul 2002 17:25:39 -0500


Recently, I noted a post and replies on R-Help from Professor Marc
Feldesman regarding a cross tabulation function that generates row and
column proportions, marginal values, expected cell values and tests for
independence presumably similar in a fashion to the output of the S-Plus
crosstabs() function or SAS Proc Freq.

Martin Maechler had posted some code in reply for folks to update and
translate.  

In parallel, out of my own needs for something functionally similar, I
have been working for a while on a function that takes either a
two-dimensional matrix or two vectors and generates just such a table,
with output similar to the aforementioned S-Plus/SAS functions.  Column
widths are adjusted based upon the dimnames and the digits argument and
the printed table is labeled with vector names if used.

The code for the function is below.  If anyone wishes me to e-mail a
text file containing the code, let me know.

My goal would be to make this available to the R community as open
source, either independently or perhaps if deemed appropriate, as a base
function.

I would appreciate any constructive criticism of the code itself or the
output. My guess is that there may be more efficient means of performing
some of the manipulations and/or perhaps being consistent with core R
coding and output standards.

If there are any suggestions for improvement, I would be more than happy
to incorporate these and make them available to the community.

Thank you for your consideration and I look forward to any feedback.

Best regards,

Marc Schwartz

------------------------------------------------

CrossTable <- function (x, y, digits = 3, expected = FALSE,
                        correct = TRUE)
{
  Syntax <- paste("\nSyntax:",
                  "CrossTable(x, y, digits = 3, drop = TRUE,",
                  "           expected = FALSE, correct = TRUE)\n",
                  "x:        A vector in a matrix or dataframe OR",
                  "          if y not present, a two-dimension matrix",
                  "y:        A vector in a matrix or dataframe.",
                  "digits:   Number of digits after the decimal",
                  "          point for cell proportions",
                  "expected: If TRUE, expected cell counts from the",
                  "          Chi^2 will be included.",
                  "correct:  If TRUE, the Yates correction will be",
                  "          applied in the Chi^2 test.",
                  sep = "\n")

  # Do error checking
  if (missing(x))
    stop(Syntax)

  if (missing(y))
  {
    # if only x is specified, it must be a 2 dimensional matrix
    if (length(dim(x)) != 2)
      stop("x must be a 2 dimensional matrix if y is not given")
    if(any(dim(x) < 2))
      stop("x must have at least 2 rows and columns")
    if(any(x < 0) || any(is.na(x)))
      stop("all entries of x must be nonnegative and finite")
    else
      t <- x
  }
  else
  {
    if(length(x) != length(y))
      stop("x and y must have the same length")

    # Create Titles for Table From Vector Names
    RowData <- deparse(substitute(x))
    ColData <- deparse(substitute(y))

    # Remove unused factor levels from vectors
    x <- factor(x)
    y <- factor(y)

    if((nlevels(x) < 2) || (nlevels(y) < 2))
      stop("x and y must have at least 2 levels")

    # Generate table
    t <- table(x, y)
  }

  # Generate cell proportion of row
  CPR <- prop.table(t, 1)

  # Generate cell proportion of col
  CPC <- prop.table(t, 2)

  # Generate cell proportion of total
  CPT <- prop.table(t)

  # Generate summary counts
  GT <- sum(t)
  RS <- rowSums(t)
  CS <- colSums(t)

  # Column and Row Total Headings
  ColTotal <- "Column Total"
  RowTotal <- "Row Total"

  # Set consistent col widths based upon dimnames and table vals
  CWidth <- max(digits + 2, c(nchar(t), nchar(dimnames(t)[[2]]),
                nchar(RS), nchar(CS), nchar(RowTotal)))

  RWidth <- max(c(nchar(dimnames(t)[[1]]), nchar(ColTotal)))

  # Adjust first column width if Data Titles present
  if (exists("RowData"))
    RWidth <- max(RWidth, nchar(RowData))

  # Create row separators
  RowSep <- paste(rep("-", CWidth + 2), collapse = "")
  RowSep1 <- paste(rep("-", RWidth + 1), collapse = "")
  SpaceSep1 <- paste(rep(" ", RWidth), collapse = "")
  SpaceSep2 <- paste(rep(" ", CWidth), collapse = "")

  # Create formatted Names
  FirstCol <- formatC(dimnames(t)[[1]], width = RWidth, format = "s")
  ColTotal <- formatC(ColTotal, width = RWidth, format = "s")
  RowTotal <- formatC(RowTotal, width = CWidth, format = "s")

  # Perform Chi-Square Test
  CST <- chisq.test(t, correct = correct)

  # Perform Fisher Tests
  FTt <- fisher.test(t, alternative = "two.sided")

  # If 2 x 2, include one tailed values
  if (all(dim(t) == 2))
  {
    FTl <- fisher.test(t, alternative = "less")
    FTg <- fisher.test(t, alternative = "greater")
  }

  # Print Cell Layout

  cat(rep("\n", 2))
  cat("|-----------------|\n")
  cat("|               N |\n")
  if (expected)
    cat("|      Expected N |\n")
  cat("|   N / Row Total |\n")
  cat("|   N / Col Total |\n")
  cat("| N / Table Total |\n")
  cat("|-----------------|\n")
  cat(rep("\n", 2))
  cat("Total Observations in Table: ", GT, "\n")
  cat(rep("\n", 2))

  # Print Column headings

  # print vector names if present
  if (exists("RowData"))
  {
    cat(SpaceSep1, "|", ColData, "\n")
    cat(formatC(RowData, width = RWidth, format = "s"),
        formatC(dimnames(t)[[2]], width = CWidth, format = "s"),
        RowTotal, sep = " | ", collapse = "\n")
  }
  else
    cat(SpaceSep1,
      formatC(dimnames(t)[[2]], width = CWidth, format = "s"),
      RowTotal, sep = " | ", collapse = "\n")

  cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n")

  # Print table cells
  for (i in 1:nrow(t))
  {
    # print N
    cat(FirstCol[i],
        formatC(c(t[i, ], RS[i]), width = CWidth),
        sep = " | ", collapse = "\n")

    # print Expected N?
    if (expected)
      cat(SpaceSep1,
          formatC(CST$expected[i, ], digits = digits,
                  format = "f", width = CWidth),
          SpaceSep2, sep = " | ", collapse = "\n")

    # print cell row proportions
    cat(SpaceSep1,
        formatC(c(CPR[i, ], RS[i] / GT), width = CWidth,
                digits = digits, format = "f"),
        sep = " | ", collapse = "\n")

    # print cell col proportions
    cat(SpaceSep1,
        formatC(CPC[i, ], width = CWidth, digits = digits,
                format = "f"),
        SpaceSep2, sep = " | ", collapse = "\n")

    # print cell Table proportions
    cat(SpaceSep1,
        formatC(CPT[i, ], width = CWidth, digits = digits,
                format = "f"),
        SpaceSep2, sep = " | ", collapse = "\n")

    # print row separator
    cat(RowSep1, rep(RowSep, ncol(t) + 1),
        sep = "|", collapse = "\n")
  }

  # Print Column Totals
  cat(ColTotal,
      formatC(c(CS, GT), width = CWidth),
      sep = " | ", collapse = "\n")

  # Print col proportions
  cat(SpaceSep1,
      formatC(CS / GT, width = CWidth, digits = digits, format = "f"),
      SpaceSep2, sep = " | ", collapse = "\n")

  cat(RowSep1, rep(RowSep, ncol(t) + 1), sep = "|", collapse = "\n")

  # Print Statistics

  cat(rep("\n", 2))
  cat("Tests for Independence of All Table Factors\n\n\n")

  cat(CST$method,"\n\n")

  cat("Chi^2 = ", CST$statistic, "    d.f. = ", CST$parameter,
      "    p = ", CST$p.value, "\n")

  cat(rep("\n", 2))
  cat("Fisher's Exact Test for Count Data\n\n")

  # if 2 x 2 table print one and two-tailed values
  if (all(dim(t) == 2))
  {
    cat("Sample estimate odds ratio: ", FTt$estimate, "\n\n")

    cat("Alternative hypothesis: true odds ratio is not equal to 1\n")
    cat("p = ", FTt$p.value, "\n")
    cat("95% confidence interval: ", FTt$conf.int, "\n\n")

    cat("Alternative hypothesis: true odds ratio is less than 1\n")
    cat("p = ", FTl$p.value, "\n")
    cat("95% confidence interval: ", FTl$conf.int, "\n\n")

    cat("Alternative hypothesis: true odds ratio is greater than 1\n")
    cat("p = ", FTg$p.value, "\n")
    cat("95% confidence interval: ", FTg$conf.int, "\n\n")
  }
  else
  {
    cat("Alternative hypothesis: two.sided\n")
    cat("p = ", FTt$p.value, "\n")
  }

  cat(rep("\n", 2))
}

-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-devel-request@stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._