[Rd] indexing
Ben Bolker
bolker@zoo.ufl.edu
Sun, 1 Jul 2001 11:00:01 -0400 (EDT)
Don't know if this is useful to anyone, but here's a "poor man's"
solution I came up with to the CRAN indexing problem ... not as good as
having a full-text web link, but it does have some advantages. (1) it's
already done. (2) it's relatively easy to download and store this
information off-line, and to update it periodically. (3) it doesn't (now)
require any extra software.
I'm sure it could be improved in many ways.
Sorry if it duplicates existing or developing functionality.
I haven't actually tested "create.index" below, but I've done all the
component pieces.
Ben Bolker
get.ind <- function(pkg,CRAN=getOption("CRAN"),
descrip=paste(contrib.url(CRAN),"/Descriptions",sep=""),
local.ext=".INDEX.tmp") {
download.file(paste(descrip,"/",pkg,".INDEX",sep=""),
paste(pkg,local.ext,sep=""))
}
## strsplit produces zero-length chars if there are leading spaces
dropz <- function(x) {
x[nchar(x)>0]
}
nwords <- function(x) {
sapply(strsplit.words(x),function(z)length(dropz(z)))
}
strsplit.words <- function(x) {
strsplit(gsub("[ \t]*"," ",x)," ")
}
tr.index <- function(fn) {
## cat(fn,"\n")
lines <- scan(fn,what=character(),sep="\n",quiet=TRUE)
ret <- NULL
if (length(lines)>0) {
## paste continuation lines together
## look for initial whitespace; might miss funny formats
tablines <- grep("^[ \t]",lines)
lines <- sub("^[\t ]*","",lines) ## now delete initial whitespace
## join "continuation sets" (sets of consecutive lines with leading whitespace)
if (length(tablines)>0) {
v <- 1:length(lines)
csets <- v-cumsum(v %in% tablines)
lines <- sapply(split(lines,csets),paste,collapse=" ")
}
pkgname <- gsub("/INDEX","",gsub("[\* ]*","",lines[1]))
lines <- lines[-1]
pkg <- rep(pkgname,length(lines))
fun <- sapply(strsplit.words(lines),"[",1)
descr <- sapply(strsplit.words(lines),function(z)paste(z[-1],collapse=" "))
ret <- cbind(pkg,fun,descr)
dimnames(ret) <- list(NULL,c("Package","Function","Description"))
}
ret
}
create.index <- function(CRAN=getOptions("CRAN"),
contrib.url=contriburl(CRAN),
descrip=paste(contrib.url(CRAN),"/Descriptions",sep=""),
local.ext=".INDEX.tmp",
save=TRUE,savefile="CRAN-index") {
pkglist <- CRAN.packages(contrib.url=contrib.url)
sapply(pkglist[,1],get.ind,CRAN=CRAN,descrip=descrip,
local.ext=local.ext)
indfiles <- list.files(pattern=paste("*",local.ext))
CRAN.index <- do.call("rbind",lapply(indfiles,tr.index))
answer <- substr(readline("Delete temporary index files (y/N)? "), 1, 1)
if (answer == "y" | answer == "Y")
unlink(indfiles, TRUE)
if (save) save(CRAN.index,file=savefile)
CRAN.index
}
search.ind <- function(str,indexmat=CRAN.index,ignore.case=TRUE) {
noquote(all.ind[unique(c(grep(str,indexmat[,"Function"],ignore.case=ignore.case),
grep(str,indexmat[,"Description"],ignore.case=ignore.case))),])
}
CRAN.index <- create.index()
search.ind("integrat?")
search.ind("adapt")
search.ind("permute")
search.ind("Durbin")
** NEW ADDRESS as of Aug. 1 ***
Zoology Department, University of Florida
bolker@zoo.ufl.edu
(352) 392-5697
-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !) To: r-devel-request@stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._