[Rd] indexing

Ben Bolker bolker@zoo.ufl.edu
Sun, 1 Jul 2001 11:00:01 -0400 (EDT)


  Don't know if this is useful to anyone, but here's a "poor man's"
solution I came up with to the CRAN indexing problem ... not as good as
having a full-text web link, but it does have some advantages. (1) it's
already done.  (2) it's relatively easy to download and store this
information off-line, and to update it periodically.  (3) it doesn't (now)
require any extra software.
  I'm sure it could be improved in many ways.
  Sorry if it duplicates existing or developing functionality.
  I haven't actually tested "create.index" below, but I've done all the
component pieces.

  Ben Bolker

get.ind <- function(pkg,CRAN=getOption("CRAN"),
                    descrip=paste(contrib.url(CRAN),"/Descriptions",sep=""),
                    local.ext=".INDEX.tmp") {
  download.file(paste(descrip,"/",pkg,".INDEX",sep=""),
                paste(pkg,local.ext,sep=""))
}

## strsplit produces zero-length chars if there are leading spaces
dropz <- function(x) {
  x[nchar(x)>0]
}

nwords <- function(x) {
  sapply(strsplit.words(x),function(z)length(dropz(z)))
}

strsplit.words <- function(x) {
  strsplit(gsub("[ \t]*"," ",x)," ")
}

tr.index <- function(fn) {
  ##  cat(fn,"\n")
  lines <- scan(fn,what=character(),sep="\n",quiet=TRUE)
  ret <- NULL
  if (length(lines)>0) {
    ## paste continuation lines together
    ## look for initial whitespace; might miss funny formats
    tablines <- grep("^[ \t]",lines)
    lines <- sub("^[\t ]*","",lines) ## now delete initial whitespace
    ## join "continuation sets" (sets of consecutive lines with leading whitespace)
    if (length(tablines)>0) {
      v <- 1:length(lines)
      csets <- v-cumsum(v %in% tablines)
      lines <- sapply(split(lines,csets),paste,collapse=" ")
    }
    pkgname <- gsub("/INDEX","",gsub("[\* ]*","",lines[1]))
    lines <- lines[-1]
    pkg <- rep(pkgname,length(lines))
    fun <- sapply(strsplit.words(lines),"[",1)
    descr <- sapply(strsplit.words(lines),function(z)paste(z[-1],collapse=" "))
    ret <- cbind(pkg,fun,descr)
    dimnames(ret) <- list(NULL,c("Package","Function","Description"))
  }
  ret
}

create.index <- function(CRAN=getOptions("CRAN"),
                         contrib.url=contriburl(CRAN),
                         descrip=paste(contrib.url(CRAN),"/Descriptions",sep=""),
                         local.ext=".INDEX.tmp",
                         save=TRUE,savefile="CRAN-index") {
  pkglist <- CRAN.packages(contrib.url=contrib.url)
  sapply(pkglist[,1],get.ind,CRAN=CRAN,descrip=descrip,
                    local.ext=local.ext)
  indfiles <- list.files(pattern=paste("*",local.ext))
  CRAN.index <- do.call("rbind",lapply(indfiles,tr.index))
  answer <- substr(readline("Delete temporary index files (y/N)? "), 1, 1)
  if (answer == "y" | answer == "Y")
    unlink(indfiles, TRUE)
  if (save) save(CRAN.index,file=savefile)
  CRAN.index
}

search.ind <- function(str,indexmat=CRAN.index,ignore.case=TRUE) {
  noquote(all.ind[unique(c(grep(str,indexmat[,"Function"],ignore.case=ignore.case),
                     grep(str,indexmat[,"Description"],ignore.case=ignore.case))),])
}

CRAN.index <- create.index()
search.ind("integrat?")
search.ind("adapt")
search.ind("permute")
search.ind("Durbin")

** NEW ADDRESS as of Aug. 1 ***
Zoology Department, University of Florida
bolker@zoo.ufl.edu
(352) 392-5697

-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-.-
r-devel mailing list -- Read http://www.ci.tuwien.ac.at/~hornik/R/R-FAQ.html
Send "info", "help", or "[un]subscribe"
(in the "body", not the subject !)  To: r-devel-request@stat.math.ethz.ch
_._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._._