[Bioc-devel] bug-report for combine
Laurent Gautier
lgautier at gmail.com
Wed Jul 18 14:23:53 CEST 2007
It seems to be a problem with 'identical'.
The following patched "combine" seems to fix the problem
(workaround for zero-rows data.frames:
length(sharedRows) == 0))
)
## ---
setMethod("combine",
signature=list(x="data.frame", y="data.frame"),
function(x, y, ...) {
if (all(dim(x) == 0) && all(dim(y) == 0))
return(x)
else if (all(dim(x) == 0))
return(y)
else if (all(dim(y) == 0))
return(x)
uniqueRows <- unique(c(row.names(x), row.names(y)))
uniqueCols <- unique(c(names(x), names(y)))
sharedCols <- intersect(names(x), names(y))
sharedRows <- intersect(row.names(x), row.names(y))
ok <- sapply(sharedCols, function(nm) {
if (class(x[[nm]]) != class(y[[nm]]))
return(FALSE)
switch(class(x[[nm]])[[1]], factor = {
if (identical(levels(x[[nm]]), levels(y[[nm]])) &&
(identical(x[sharedRows, nm, drop = FALSE],
y[sharedRows, nm, drop = FALSE]) ||
length(sharedRows) == 0))
TRUE
else FALSE
}, ordered = , identical(x[sharedRows, nm, drop = FALSE],
y[sharedRows, nm, drop = FALSE]))
})
if (!all(ok))
stop("data.frames contain conflicting data:",
"\n\tnon-conforming colname(s): ", paste(sharedCols[!ok],
collapse = ", "))
if (length(uniqueRows) == 0) {
x <- x["tmp", , drop = FALSE]
y <- y["tmp", , drop = FALSE]
}
else if (nrow(x) == 0) {
x <- x[row.names(y), , drop = FALSE]
row.names(x) <- row.names(y)
}
else if (nrow(y) == 0) {
y <- y[row.names(x), , drop = FALSE]
row.names(y) <- row.names(x)
}
if (length(uniqueCols) > 0)
extLength <- max(nchar(sub(".*\\.", "", uniqueCols))) +
1
else extLength <- 1
extX <- paste(c(".", rep("x", extLength)), collapse = "")
extY <- paste(c(".", rep("y", extLength)), collapse = "")
z <- merge(x, y, by = "row.names", all = TRUE, suffixes = c(extX,
extY))
for (nm in sharedCols) {
nmx <- paste(nm, extX, sep = "")
nmy <- paste(nm, extY, sep = "")
z[[nm]] <- switch(class(z[[nmx]]), AsIs =
I(ifelse(is.na(z[[nmx]]),
z[[nmy]], z[[nmx]])), factor = {
col <- ifelse(is.na(z[[nmx]]), as.character(z[[nmy]]),
as.character(z[[nmx]]))
if (!identical(levels(z[[nmx]]), levels(z[[nmy]])))
factor(col)
else factor(col, levels = levels(z[[nmx]]))
}, ifelse(is.na(z[[nmx]]), z[[nmy]], z[[nmx]]))
}
row.names(z) <- z$Row.names
z$Row.names <- NULL
z[uniqueRows, uniqueCols, drop = FALSE]
}
)
2007/7/18, Laurent Gautier <lgautier at gmail.com>:
> Hi,
>
> There appears to be a problem either with the function "combine",
> or with my understanding of what it is doing:
>
> ## ----
> library(Biobase)
>
> dfA <- data.frame(label=rep("x", 2), row.names=1:2)
> dfB <- data.frame(label=rep("x", 3), row.names=3:5)
> dfC <- data.frame(label=rep("x", 4), row.names=6:9)
>
>
> dfAB <- combine(dfA, dfB) # ok
>
> dfABC <- combine(dfAB, dfC) # ?! error
>
> Error in combine(dfAB, dfC) : data.frames contain conflicting data:
> non-conforming colname(s): label
> ## ---
>
>
>
>
> Laurent
>
--
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.6 (GNU/Linux)
iEYEARECAAYFAkYgwJ4ACgkQB/w/MLoyRDeQlgCeMp8v69/Wy24Q4IaBVhoG1M5R
2h4AoIOTvKbrFpTklRDjV7u8tEOeSQqt
=JPph
-----END PGP SIGNATURE-----
More information about the Bioc-devel
mailing list