[R] Retrieve all names of nested list and index list based on these names
wwa418 at ku-eichstaett.de
wwa418 at ku-eichstaett.de
Wed Nov 10 02:26:00 CET 2010
Hi all,
I looked for a function that would retrieve all(!) names of an arbitrary
deeply nested named list. Also, names should optionally be arranged in a
way that reflects the list's hierarchy structure (i.e. 'a$a.1$a.1.1' etc.)
Also, there should be a recursive index linked to a respective list branch
that could be used to index a list by names (as you would do with named
vectors, only that now there's also a hierarchy structure coming into
play.
Example:
name index
a 1
a.1 1-1
a.1.1 1-1-1
As I didn't really find anything that suited my needs, I ended up trying
to write a recursive function that loops through the individual branches
via lapply() and came to find this to be pretty nasty to debug/manually
test ;-).
I think I found a acceptable implementation now and thought I'd share it
in case someone is up to a similar task. Two function defs, then an
example:
##### FUNCTION DEFS #####
listnames.get <- function(
list.obj,
do.basename=TRUE,
do.name.chain=TRUE,
...
)
{
# VALIDATE
if(!is.list(list.obj)) stop("Argument 'list.obj' must be a list.")
# /
#---------------------------------------------------------------------------
# CORE FUNCTION
#---------------------------------------------------------------------------
listnames.get.core <- function(
list.obj,
do.basename=TRUE,
do.name.chain=TRUE,
buffer,
...
)
{
if(!exists("index", buffer))
{
buffer$index <- new.env(parent=emptyenv())
buffer$index <- NULL
buffer$name <- NULL
}
jnk <- sapply(1:length(list.obj), function(x)
{
list.branch <- list.obj[x]
list.branch.nme <- names(list.branch)
if(do.basename) list.branch.nme <- basename(list.branch.nme)
list.obj.updt <- list.branch[[1]]
# UPDATE BUFFER
buffer$run <- c(buffer$run, x)
if(do.name.chain)
{
buffer$name <- c(buffer$name, list.branch.nme)
} else
{
buffer$name <- list.branch.nme
}
# /
index.crnt <- paste(as.character(buffer$run), collapse="-")
index.crnt <- data.frame(
name=paste(buffer$name, collapse="$"),
index=index.crnt,
stringsAsFactors=FALSE
)
index.updt <- rbind(buffer$index, index.crnt)
buffer$index <- index.updt
if(is.list(list.obj.updt))
{
jcore.listnames.get.core(
list.obj=list.obj.updt,
do.basename=do.basename,
do.name.chain=do.name.chain,
buffer=buffer
)
}
# UPDATE BUFFER
buffer$run <- buffer$run[-length(buffer$run)]
buffer$name <- buffer$name[-length(buffer$name)]
# /
return(NULL)
})
return(TRUE)
}
# /CORE FUNCTION ----------
#---------------------------------------------------------------------------
# APPLICATION
#---------------------------------------------------------------------------
assign("buffer", new.env(parent=emptyenv()), envir=environment())
listnames.get.core(
list.obj=list.obj,
do.basename=do.basename,
buffer=buffer
)
# /APPLICATION ----------
return(buffer$index)
}
listbranch.get <- function(
list.obj,
query,
do.strict=TRUE,
do.rtn.val=TRUE,
msg.error=NULL,
...
)
{
# VALIDATE
if(!is.list(list.obj)) stop("Argument 'list.obj' must be a list.")
# /
# ESTABLISH LIST INDEX
list.index <- jcore.listnames.get(
list.obj=list.obj,
do.basename=TRUE,
do.name.chain=TRUE
)
list.index.nms <- list.index$name
# /
# SEARCH FOR QUERY
if(do.strict)
{
query.0 <- query
query <- gsub("\\$", "\\\\$", query)
query <- gsub("\\.", "\\\\.", query)
query <- paste("^", query, "$", sep="")
}
idx <- grep(query, list.index.nms, perl=TRUE)
if(!length(idx))
{
if(is.null(msg.error))
{
msg.error <- paste("Query not successful: '", query.0, "' ('",
query, "')", sep="")
}
stop(cat(msg.error, sep="\n"))
}
# /
# BUILDING RECURSIVE INDEX
idx <- list.index$index[idx]
idx <- as.numeric(unlist(strsplit(idx, split="-")))
# /
if(do.rtn.val)
{
# RECURSIVE INDEXING
rtn <- list.obj[[idx]]
# /
} else
{
rtn <- idx
}
return(rtn)
}
##### EXAMPLE #####
my.list <- list(
a=list(a.1="a", a.2=list(a.2.1="a", a.2.2="b"), a.3=list(a.3.1="a"),
b=list(b.1=list(b.1.1="a"), b.2="b"),
c="a"
)
listnames.get(list.obj=my.list, do.basename=TRUE, do.name.chain=TRUE)
listbranch.get(list.obj=my.list, query="a$a.2$a.2.2",
do.strict=TRUE, do.rtn.val=TRUE)
listbranch.get(list.obj=my.list, query="a$a.2$a.2.2",
do.strict=TRUE, do.rtn.val=FALSE)
More information about the R-help
mailing list