is.numeric0 <- function(x){length(x)==0 & is.numeric(x)} ## Get an index of all tips in one or more sections getWikiTipsIndex <- function(sections = NULL) { # First of all, we need to get the list of all subsections in tips stips <- readLines("http://wiki.r-project.org/rwiki/doku.php?id=&idx=tips") # Keep only lines related to tips sections stips <- stips[grep("idx=tips:", stips)] # Retrieve the sections from these lines stips <- sub("^.*(idx=tips:[^\"]*).*$", "\\1", stips) # If section is NULL, just use the whole list # Otherwise, rework sections "[section]" -> idx=tips:[section]" # and look if these sections are there if (!is.null(sections)) { Sections <- paste("idx=tips:", sections, sep = "") inwiki <- Sections %in% stips if (!all(inwiki)) stop("One or more sections are not in the R Wiki:\n\t", paste(sections[!inwiki], collapse = ", ")) stips <- Sections # Keep only these sections... and in that order! } # Cycle through all the tips subsections and retrieve the list of pages ltips <- character(0) for(st in stips) { Ltips <- readLines(paste("http://wiki.r-project.org/rwiki/doku.php?id=", st, sep = "&")) # Keep only lines related to tips pages in that section Ltips <- Ltips[grep("
  • ", Ltips)] if (!is.null(Ltips) && length(Ltips) > 0) { # Retrieve the path to the pages from that list ltips <- c(ltips, sub("^.*id=(tips:[^\"]*).*$", "\\1", Ltips)) } } return(ltips) } ### Define the heading of the page writeHTMLHeader <- function(title, file = file.path(tempdir(), "Rtips.html" )) { Head <- c("", " ", "", " ", paste("", title, "", sep = ""), "", " ", " ", " ", " ", " ", " ", "") cat(Head, file = file, sep = "\n") cat("
    \n", file = file, append = TRUE) cat("

    ", title, "

    \n", sep = "", file = file, append = TRUE) cat("

    This is a snapshot of a part of the", file = file, append = TRUE) cat(" R Wiki", file = file, append = TRUE) cat(" taken on ", date(), ".\n
    \n", sep = "", file = file, append = TRUE) cat("Please, visit the R Wiki to edit original document, or to check for latest version.

    \n\n", file = file, append = TRUE) } ## Finalize the page writeHTMLCloser <- function( file = file.path(tempdir(), "Rtips.html") , browse = FALSE) { cat('\n\n', file = file, append = TRUE) ## ...and possibly display it if (browse) { browseURL(paste("file://", file, sep = "")) cat("\n'", file, "' should be displayed in your web browser.\n", sep = "") return(invisible(file)) } } retrieveTipContent <- function (tip ){ xhtml <- readLines(sprintf("http://wiki.r-project.org/rwiki/doku.php?id=%s&do=export_xhtml", tip)) htmlbody <- (grep('', xhtml)[1]) if (!is.na(toc)) { # There is a TOC for this tip toc <- toc:(grep('^$', xhtml)[1] + 3) xhtml <- xhtml[-toc] } titletip <- "" # Get first H1 page title, and use it (in the table of content) h1 <- grep("^

    ]+>([^<]+)

    ", xhtml) if (length(h1) > 0) titletip <- sub("^

    ]+>([^<]+)

    .*$", "\\1", xhtml[h1[1]]) ## Rework footnotes to avoid duplications if there is more than one tip ##?? if (length(tips) > 1) { xhtml <- gsub("(\"#?fnt?__)([0-9]+\")", paste("\\1", i, "-\\2", sep = ""), xhtml) xhtml <- gsub("(\"fnt[(]')([0-9]+',)", paste("\\1", i, "-\\2", sep = ""), xhtml) ##?? } ###TODO: This is wasteful because content itself has link inside, but just wrong one. ## Name item to match TOC link content <- paste('', sep = "\n") ## Write the content of the tip content <- paste(content, paste(xhtml, collapse = "\n"), sep = "\n") content <- sub("h1","h3", content) content <- sub("level1","level3", content) ## If more than one tip, add a link to come back to the TOC + a ruler ##?? if (length(tips) > 1) { content <- paste(content, "

    [back to table of content]

    ", spe = "\n") content <- paste(content, "
    \n", sep = "\n") ##? } list(titletip, content) } writeTOC <- function (tips,titletips, oktips, file = file.path(tempdir(), "Rtips.html") ) { ## Write a table of contents at the beginning of the file (if more than one tip) if (length(tips) > 1) { ## Rework the list of pages actually included in the document tips <- tips[oktips] titletips <- titletips[oktips] ## Write the TOC cat('

    Table of Contents

    \n', file = file, append = TRUE) cat('
    \n
    \n', file = file, append = TRUE) cat('\n
    \n\n', file = file, append = TRUE) } else cat('\n\n', file = file, append =TRUE) } # Collect R Wiki pages together and save the resulting HTML page on disk getWikiTips <- function(tips = getWikiTipsIndex()) { ## Retrieve a list of tips in the R Wiki and present them on a single page ## By default, all tips are retrieved ## Create the content, collecting all tips one after the other oktips <- rep(TRUE, length(tips)) # The list of tips actually collected in the page titletips <- tips # By default, we use the name of the pages as titles contents <- vector() content <- character(0) # A temporary variable to hold all the content for(i in seq(along = tips)) { tip <- tips[i] if (length(tips) > 1) { cat("Importing: '", tip, "'...\n", sep = "") res <- flush.console() } xhtml <- retrieveTipContent(tip) ## The R Wiki engine returns always something, even for nonexistent pages ## So, we must check here if the page is empty! if (length(xhtml) < 3) { oktips[i] <- FALSE if (length(tips) == 1) { # Totally empty page => returns nothing unlink(file) stop("The page ", tip, " does not exist or is empty!") } else { cat("\tPage not found or empty!\n") res <- flush.console() } }else{ processedTip <- processTipContent(xhtml,i,tip) content <- processedTip[[2]] titletips[i] <- processedTip[[1]] } ## Look if there is at least one tip found if (all(!oktips)) { unlink(file) stop("None of those pages were found on the R Wiki server!") } contents[i] <- content } list(tips=tips, titletips=titletips, oktips=oktips, contents=contents) } processMinorHeading <- function(item){ minoritem <- unlist( strsplit ( item, "\\|" ) ) minorTitle <- minoritem[1] minorTitle <- sub("\\+\\+\\+\\+",'

    ', minorTitle ) minorTitle <- sub("$","

    ", minorTitle) ##reviseditem <- sub("\\*","",reviseditem) minorTipName <- minoritem[2] minorTipName <- unlist(strsplit (minorTipName, ":"))[2] minorTipName <- unlist(strsplit (minorTipName, "}}"))[1] list(minorTipName=minorTipName, minorTitle=minorTitle) } rawimport <- readLines("http://wiki.r-project.org/rwiki/doku.php?id=tips:tips&do=export_raw") majorHeadings <- grep('====', rawimport) minorHeadings <- grep('\\+\\+\\+\\+',rawimport) headings <- sort(c(majorHeadings, minorHeadings)) results <- list(tips=vector(mode="character"),titletips=vector(mode="character"),oktips=vector(mode="logical"),contents=vector(mode="character")) majorCounter <- 0 minorCounter <- 0 for (item in rawimport[headings]) { notaMajorheading <- is.numeric0 (grep("====", item )) notaMinorheading <- is.numeric0 (grep("\\+\\+\\+\\+", item )) if (isTRUE(notaMajorheading)){ minorCounter <- minorCounter + 1; minorHeading <- processMinorHeading(item) alltips <- getWikiTipsIndex(minorHeading$minorTipName) ###Put minor heading into content stream results$tips <- c(results$tips, minorHeading$minorTip) results$titletips <- c(results$titletips, paste(majorCounter,".",minorCounter," ", minorHeading$minorTitle, sep="")) results$oktips <- c(results$oktips, TRUE) results$contents <- c(results$contents, minorHeading$minorTitle) sectionResults <- getWikiTips(alltips) results$tips <- c(results$tips, sectionResults$tips) results$titletips <- c(results$titletips, paste(majorCounter,".",minorCounter," ",sectionResults$titletips, sep="")) results$oktips <- c(results$oktips, sectionResults$oktips) results$contents <- c(results$contents, sectionResults$contents) }else{ majorCounter <- majorCounter + 1; minorCounter <- 0; reviseditem <- sub("====",'

    ',item) reviseditem <- sub("====","

    ",reviseditem) results$tips <- c(results$tips, "majorHeading") results$titletips <- c(results$titletips, paste(majorCounter,".0 ",reviseditem, sep="")) results$oktips <- c(results$oktips, TRUE) results$contents <- c(results$contents, reviseditem) } } myfile <- "alltips3.html" unlink(myfile) writeHTMLHeader ( title = "R Wiki tips", file=myfile) writeTOC(results$tips,results$titletips,results$oktips, myfile) contents <- results$contents contents <- gsub(" ","",contents) contents <- reworkWikiLinks(contents, results$tips) for (content in contents){ ## Write the content in the page cat(content, file = myfile, append = TRUE) } writeHTMLCloser(file = myfile, browse=FALSE)