[Bioc-devel] bug-report for combine

Laurent Gautier lgautier at gmail.com
Wed Jul 18 14:23:53 CEST 2007


It seems to be a problem with 'identical'.

The following patched "combine" seems to fix the problem
(workaround for zero-rows data.frames:
length(sharedRows) == 0))
)

## ---
setMethod("combine",
          signature=list(x="data.frame", y="data.frame"),
          function(x, y, ...) {
            if (all(dim(x) == 0) && all(dim(y) == 0))
                return(x)
            else if (all(dim(x) == 0))
                return(y)
            else if (all(dim(y) == 0))
                return(x)
            uniqueRows <- unique(c(row.names(x), row.names(y)))
            uniqueCols <- unique(c(names(x), names(y)))
            sharedCols <- intersect(names(x), names(y))
            sharedRows <- intersect(row.names(x), row.names(y))
            ok <- sapply(sharedCols, function(nm) {
                if (class(x[[nm]]) != class(y[[nm]]))
                  return(FALSE)
                switch(class(x[[nm]])[[1]], factor = {
                  if (identical(levels(x[[nm]]), levels(y[[nm]])) &&
                    (identical(x[sharedRows, nm, drop = FALSE],
                               y[sharedRows, nm, drop = FALSE]) ||
length(sharedRows) == 0))
                    TRUE
                  else FALSE
                }, ordered = , identical(x[sharedRows, nm, drop = FALSE],
                                         y[sharedRows, nm, drop = FALSE]))
              })
            if (!all(ok))
              stop("data.frames contain conflicting data:",
                   "\n\tnon-conforming colname(s): ", paste(sharedCols[!ok],
                                                            collapse = ", "))
            if (length(uniqueRows) == 0) {
              x <- x["tmp", , drop = FALSE]
              y <- y["tmp", , drop = FALSE]
            }
            else if (nrow(x) == 0) {
              x <- x[row.names(y), , drop = FALSE]
              row.names(x) <- row.names(y)
            }
            else if (nrow(y) == 0) {
              y <- y[row.names(x), , drop = FALSE]
              row.names(y) <- row.names(x)
            }
            if (length(uniqueCols) > 0)
              extLength <- max(nchar(sub(".*\\.", "", uniqueCols))) +
                1
            else extLength <- 1
            extX <- paste(c(".", rep("x", extLength)), collapse = "")
            extY <- paste(c(".", rep("y", extLength)), collapse = "")
            z <- merge(x, y, by = "row.names", all = TRUE, suffixes = c(extX,
                                                             extY))
            for (nm in sharedCols) {
                nmx <- paste(nm, extX, sep = "")
                nmy <- paste(nm, extY, sep = "")
                z[[nm]] <- switch(class(z[[nmx]]), AsIs =
I(ifelse(is.na(z[[nmx]]),
                  z[[nmy]], z[[nmx]])), factor = {
                  col <- ifelse(is.na(z[[nmx]]), as.character(z[[nmy]]),
                    as.character(z[[nmx]]))
                  if (!identical(levels(z[[nmx]]), levels(z[[nmy]])))
                    factor(col)
                  else factor(col, levels = levels(z[[nmx]]))
                }, ifelse(is.na(z[[nmx]]), z[[nmy]], z[[nmx]]))
            }
            row.names(z) <- z$Row.names
            z$Row.names <- NULL
            z[uniqueRows, uniqueCols, drop = FALSE]
        }
)


2007/7/18, Laurent Gautier <lgautier at gmail.com>:
> Hi,
>
> There appears to be a problem either with the function "combine",
> or with my understanding of what it is doing:
>
> ## ----
> library(Biobase)
>
> dfA <- data.frame(label=rep("x", 2), row.names=1:2)
> dfB <- data.frame(label=rep("x", 3), row.names=3:5)
> dfC <- data.frame(label=rep("x", 4), row.names=6:9)
>
>
> dfAB <- combine(dfA, dfB) # ok
>
> dfABC <- combine(dfAB, dfC) # ?! error
>
> Error in combine(dfAB, dfC) : data.frames contain conflicting data:
>         non-conforming colname(s): label
> ## ---
>
>
>
>
> Laurent
>


-- 
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)

iEYEARECAAYFAkYgwJ4ACgkQB/w/MLoyRDeQlgCeMp8v69/Wy24Q4IaBVhoG1M5R
2h4AoIOTvKbrFpTklRDjV7u8tEOeSQqt
=JPph
-----END PGP SIGNATURE-----



More information about the Bioc-devel mailing list