[Rd] data frame subscription operator

Vladimir Dergachev vdergachev at rcgardis.com
Tue Nov 7 01:50:09 CET 2006


Hi all, 

   I was looking at the data frame subscription operator (attached in the end 
of this e-mail) and got puzzled by the following line:

    class(x) <- attr(x, "row.names") <- NULL

This appears to set the class and row.names attributes of the incoming data 
frame to NULL. So far I was not able to figure out why this is necessary - 
could anyone help ?

The reason I am looking at it is that changing attributes forces duplication 
of the data frame and this is the largest cause of slowness of data.frames in 
general.

                           thank you very much !

                                            Vladimir Dergachev


> `[.data.frame`
function (x, i, j, drop = if (missing(i)) TRUE else length(cols) ==
    1)
{
    mdrop <- missing(drop)
    Narg <- nargs() - (!mdrop)
    if (Narg < 3) {
        if (!mdrop)
            warning("drop argument will be ignored")
        if (missing(i))
            return(x)
        if (is.matrix(i))
            return(as.matrix(x)[i])
        y <- NextMethod("[")
        nm <- names(y)
        if (!is.null(nm) && any(is.na(nm)))
            stop("undefined columns selected")
        if (any(duplicated(nm)))
            names(y) <- make.unique(nm)
        return(structure(y, class = oldClass(x), row.names = attr(x,
            "row.names")))
    }
    rows <- attr(x, "row.names")
    cols <- names(x)
    cl <- oldClass(x)
    class(x) <- attr(x, "row.names") <- NULL
    if (missing(i)) {
        if (!missing(j))
            x <- x[j]
        cols <- names(x)
        if (any(is.na(cols)))
            stop("undefined columns selected")
    }
    else {
        if (is.character(i))
            i <- pmatch(i, as.character(rows), duplicates.ok = TRUE)
        rows <- rows[i]
        if (!missing(j)) {
            x <- x[j]
            cols <- names(x)
            if (any(is.na(cols)))
                stop("undefined columns selected")
        }
        for (j in seq_along(x)) {
            xj <- x[[j]]
            x[[j]] <- if (length(dim(xj)) != 2)
                xj[i]
            else xj[i, , drop = FALSE]
        }
    }
    if (drop) {
        drop <- FALSE
        n <- length(x)
        if (n == 1) {
            x <- x[[1]]
            drop <- TRUE
        }
        else if (n > 1) {
            xj <- x[[1]]
            nrow <- if (length(dim(xj)) == 2)
                dim(xj)[1]
            else length(xj)
            if (!mdrop && nrow == 1) {
                drop <- TRUE
                names(x) <- cols
                attr(x, "row.names") <- NULL
            }
        }
    }
    if (!drop) {
        names(x) <- cols
        if (any(is.na(rows) | duplicated(rows))) {
            rows[is.na(rows)] <- "NA"
            rows <- make.unique(rows)
        }
        if (any(duplicated(nm <- names(x))))
            names(x) <- make.unique(nm)
        attr(x, "row.names") <- rows
        class(x) <- cl
    }
    x
}
<environment: namespace:base>



More information about the R-devel mailing list