listDuplicatesProcess <- function( src, handle.duplicates=c("stop", "keep.original", "keep.first", "keep.last", "index.duplicates", "index.all"), do.return.flat=TRUE, .delim.index="-", .delim.path="/", ... ){ .buffer <- new.env() if(length(handle.duplicates) > 1){ handle.duplicates <- handle.duplicates[1] } # FLATTEN if(!("FlatList" %in% class(src))){ src.flat <- flatten( src=src, do.index=TRUE, .delim.index=.delim.index, .delim.path=.delim.path ) } else { src.flat <- src } # / if(!all(src.flat$degree == c(0,0))){ stop("Flatlist of degree c(0,0) required.") } .buffer$src.flat <- src.flat .buffer$index <- NULL idx.dupl <- which(src.flat$index$table$duplicate) src.flat.split <- split( .buffer$src.flat$index$table[idx.dupl,], f=.buffer$src.flat$index$table$name[idx.dupl] ) if(length(idx.dupl)){ if( !is.expression(handle.duplicates)){ if(handle.duplicates == "stop"){ stop("Duplicates identified.") } } JNK <- sapply(src.flat.split, function(x.spl){ if(!is.expression(handle.duplicates)){ idx.dupl <- which(.buffer$src.flat$index$table$name %in% x.spl$name) if(handle.duplicates == "index.duplicates"){ index.new <- list(.buffer$src.flat$index$table[idx.dupl[-1],]) names(index.new) <- unique(x.spl$name) .buffer$index <- c(.buffer$index, index.new) return(NULL) } if(handle.duplicates == "index.all"){ index.new <- list(.buffer$src.flat$index$table[idx.dupl,]) names(index.new) <- unique(x.spl$name) .buffer$index <- c(.buffer$index, index.new) return(NULL) } if(handle.duplicates == "keep.original"){ idx.keep <- 1 idx.dupl <- idx.dupl[-idx.keep] idx.drop <- which( names(.buffer$src.flat$flat) %in% .buffer$src.flat$index$table$name[idx.dupl] )[-idx.keep] } if(handle.duplicates == "keep.first"){ idx.keep <- 2 idx.dupl <- idx.dupl[-idx.keep] idx.drop <- which( names(.buffer$src.flat$flat) %in% .buffer$src.flat$index$table$name[idx.dupl] )[-idx.keep] } if(handle.duplicates == "keep.last"){ idx.keep <- length(idx.dupl) idx.dupl <- idx.dupl[-idx.keep] idx.drop <- which( names(.buffer$src.flat$flat) %in% .buffer$src.flat$index$table$name[idx.dupl] ) idx.drop <- idx.drop[-idx.keep] } } else { #handle.duplicates <- expression(.buffer$src.flat$index$table$class == "numeric") # TODO # Test more thoroughly. idx.dupl <- which(.buffer$src.flat$index$table$name %in% x.spl$name) idx.drop <- which( names(.buffer$src.flat$flat) %in% .buffer$src.flat$index$table$name[idx.dupl] ) idx.expr <- which(eval(handle.duplicates)) if(!length(idx.expr)){ msg <- c( "Duplicate processing criterion could not be matched:", paste("* handle.duplicates: '", handle.duplicates, "'", sep="") ) stop(cat(msg, sep="\n")) } idx.keep <- which(idx.dupl %in% idx.expr) if(!length(idx.keep)){ msg <- c( "Duplicate processing criterion could not be matched:", paste("* handle.duplicates: '", handle.duplicates, "'", sep="") ) stop(cat(msg, sep="\n")) } if(length(idx.keep) > 1){ msg <- c( "Duplicate processing criterion resulted in multiple matches:", paste("* handle.duplicates: '", handle.duplicates, "'", sep=""), paste("* Matches: ", paste(idx.dupl[idx.keep], collapse=" "), sep="") ) stop(cat(msg, sep="\n")) } idx.dupl <- idx.dupl[-idx.keep] idx.drop <- idx.drop[-idx.keep] } idx.dupl.1 <- strsplit( .buffer$src.flat$index$table$index[idx.dupl], split=.delim.index ) .buffer$counter <- 0 idx.dupl.1 <- lapply(idx.dupl.1, function(x){ idx <- as.numeric(x) idx[length(idx)] <- idx[length(idx)] - .buffer$counter .buffer$counter <- .buffer$counter + 1 return(idx) }) JNK <- sapply(idx.dupl.1, function(x.idx){ .buffer$src.flat$original[[x.idx]] <- NULL }) if(length(idx.drop)){ .buffer$src.flat$flat <- .buffer$src.flat$flat[-idx.drop] } .buffer$src.flat$index$raw <- .buffer$src.flat$index$raw[-idx.dupl] .buffer$src.flat$index$table <- .buffer$src.flat$index$table[-idx.dupl,] .buffer$src.flat$index$table$duplicate <- FALSE rownames(.buffer$src.flat$index$table) <- NULL # FIND RIGHT BOTTOM LAYER VALUES idx.bottom <- which( .buffer$src.flat$index$table$is.bottom & .buffer$src.flat$index$table$name == unique(x.spl$name) ) # / # UPDATE INDEX IN TABLE idx.index <- strsplit( .buffer$src.flat$index$table$index[idx.bottom], split="-" ) index.new <- sapply(idx.index, function(x){ x[length(x)] <- 1 paste(x, collapse=.delim.index) }) .buffer$src.flat$index$table$index[idx.bottom] <<- index.new # / # UPDATE INDEX IN RAW tmp.paths <- unique(.buffer$src.flat$index$table$name[idx.bottom]) JNK <- sapply(seq(along=tmp.paths), function(x){ .buffer$src.flat$index$raw[[tmp.paths[x]]]$index <<- index.new[x] }) # / }) } # RETURN VALUE if( handle.duplicates == "index.duplicates" | handle.duplicates == "index.all" ){ out <- .buffer$index } else { if(do.return.flat){ out <- .buffer$src.flat } else { out <- .buffer$src.flat$original } } # / return(out) }