[R] How to specify ff object filepaths when reading a CSV file into a ff data frame.

strexxx strexxx at free.fr
Wed Feb 27 21:59:27 CET 2013


Really old subject…, so, all my apologizes for digging up
but, since I also ran into this… maybe this hack can be useful to someone
I propose monkey patching here:



library(ff)

my.as.ffdf.data.frame <- function (x, vmode = NULL, col_args = list(), ...) 
{
 rnam <- attr(x, "row.names")
 if (is.integer(rnam)) {
   if (all(rnam == seq_along(rnam))) 
     rnam <- NULL
   else rnam <- as.character(rnam)
 }
 x <- as.list(x)
 vmodes <- vector("list", length(x))
 if (!is.null(vmode)) {
   nam <- names(x)
   if (is.list(vmode)) {
     vnam <- names(vmode)
     i <- match(vnam, .vmode[.vimplemented])
     if (any(is.na(i))) 
       stop("vmodes not implemented: ", paste("'", vnam[is.na(i)], 
         "'", collapse = ",", sep = ""))
     names(vmodes) <- nam
     for (v in vnam) vmodes[vmode[[v]]] <- v
   }
   else {
     vnam <- names(vmode)
     if (is.null(vnam)) {
       vmodes <- as.list(rep(vmode, length.out = length(x)))
     }
     else {
       i <- match(vnam, nam)
       if (any(is.na(i))) 
         stop("names not matched for vmode specification: ", 
         paste("'", vnam[is.na(i)], "'", collapse = ",", 
           sep = ""))
       vmodes[i] <- as.list(vmode)
     }
   }
 }

 # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< patching <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
 # new to R, there are certainly better checks, or butter type than nested list...
 if(!is.null(names(col_args))){
   if (is.null(col_args$pattern)) 
     col_args$pattern <- "ffdf"
 }else{
   if(length(col_args)>0){
     for(i in 1:length(col_args)){
       tmp_col_args=col_args[[i]]
       if (is.null(tmp_col_args$pattern)) 
         tmp_col_args$pattern <- "ffdf"
     }
   }
 }
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> patching >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

 ret <- lapply(seq_along(x), function(i, ...) {
 # <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<< patching <<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<
   if(is.null(names(col_args)) & length(col_args)>0){
     col_args=col_args[[i]]
   }
 # >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> patching >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>

   xi <- x[[i]]
   AsIs <- inherits(xi, "AsIs")
   if (AsIs) {
     oldClass(xi) <- oldClass(xi)[-match("AsIs", oldClass(xi))]
     ret <- do.call("as.ff", c(list(xi, vmode = vmodes[[i]]), 
     col_args))
     oldClass(ret) <- c("AsIs", oldClass(ret))
     ret
   }
   else {
     do.call("as.ff", c(list(xi, vmode = vmodes[[i]]), 
     col_args))
   }
 }, ...)

 names(ret) <- names(x)
 do.call("ffdf", c(ret, list(row.names = rnam), l))
}




# |||||||||||||||||||||||||||||||| hook |||||||||||||||||||||||||||||||||
library(methods)
unlockBinding("as.ffdf.data.frame", as.environment("package:ff"))
assignInNamespace("as.ffdf.data.frame",
                 my.as.ffdf.data.frame,
                 ns="ff", envir=as.environment("package:ff"))
assign("as.ffdf.data.frame", my.as.ffdf.data.frame, as.environment("package:ff"))
lockBinding("as.ffdf.data.frame", as.environment("package:ff"))



asffdf_args=list(col_args=list(
                              list(filename = "tmp/a.ff", overwrite=TRUE, readonly=TRUE),
                              list(filename = "tmp/b.ff", overwrite=TRUE, readonly=TRUE)))



More information about the R-help mailing list