rawimport <- readLines("http://wiki.r-project.org/rwiki/doku.php?id=tips:tips&do=export_raw") linkIndex <- grep('\\* \\[\\[', rawimport) majorHeadings <- grep('====', rawimport) minorHeadings <- grep("\\* \\*\\*",rawimport) linkIndex <- sort(c(linkIndex, majorHeadings, minorHeadings)) cleanedlinks <- gsub( " \\* \\[\\[","", gsub("]]", "" , rawimport[linkIndex])) cleanedlinks <- cleanedlinks[-1] htmlout <- "alltips.html" ### head of the page wikihead <- c("", "", "", " ", " All R wiki tips", "", " ", " ", "", "", "") cat( wikihead, file=htmlout, sep="\n") # write a table of contents in the # beginning of the file is.numeric0 <- function(x){length(x)==0 & is.numeric(x)} cat('

The New Rtips!

\n
', file=htmlout, append=TRUE) wikiwikiaddress <- 'http://wiki.r-project.org/rwiki/doku.php' headerPara1 <- sprintf('

This is a snapshot of tips section in the new R Wiki. This snapshot was taken on %s. It is presented as a convenience for readers, who are always encouraged to double-check the wiki itself for the latest version: %s

' ,format(Sys.time(), "%b %d %Y"),wikiwikiaddress,wikiwikiaddress) cat(headerPara1, file=htmlout, append=TRUE) cat('

Table of Contents

\n
', file=htmlout, append=TRUE) for (item in cleanedlinks) { notaMajorheading <- is.numeric0 (grep("====", item )) notaMinorheading <- is.numeric0 (grep("\\* \\*\\*", item )) if (isTRUE(notaMajorheading & isTRUE(notaMinorheading))) { print (item) tipitem <- unlist( strsplit ( item, "\\|" ) ) print(tipitem) tipName <- gsub("\ ","",tipitem[1]) tipTitle <- tipitem[2] cat('
', tipTitle,'
\n', sep="", file=htmlout, append=TRUE) }else{ if (isTRUE(notaMajorheading)){ reviseditem <- sub("\\*\\*",'

', item) reviseditem <- sub("\\*\\*","

", reviseditem) reviseditem <- sub("\\*","",reviseditem) cat(reviseditem, file=htmlout, append=TRUE,sep="\n") }else{ reviseditem <- sub("====",'

',item) reviseditem <- sub("====","

",reviseditem) cat(reviseditem, file=htmlout, append=TRUE, sep="\n") } } } cat('

',file=htmlout, append=TRUE, sep="\n") for (item in cleanedlinks) { notaMajorheading <- is.numeric0 (grep("====", item )) notaMinorheading <- is.numeric0 (grep("\\* \\*\\*", item )) if (isTRUE(notaMajorheading & isTRUE(notaMinorheading))) { print (item) tipitem <- unlist( strsplit ( item, "\\|" ) ) print(tipitem) tipName <- gsub("\ ","",tipitem[1]) tipTitle <- tipitem[2] dlthis <- paste("http://wiki.r-project.org/rwiki/doku.php?id=", tipName, "&do=export_xhtml",sep="") print(dlthis) xhtml <- readLines(dlthis) xhtml <- gsub("

.*

","",xhtml) ### delete header that comes in with tip content <- ( grep('', tipTitle,'\n
', sep="", file=htmlout, append=TRUE) cat("
", file=htmlout, append=TRUE) ### Cut out the silly little TOC put in by WIKI roughdraft <- xhtml[content] roughdraft <- gsub("level1","",roughdraft) cat(roughdraft, file=htmlout, append=TRUE, sep="\n") ### Want to get rid of little tables of contents, but frustrated! # beginhere <- grep('
', roughdraft) # if (is.numeric0(beginhere)) { # cat(roughdraft, file=htmlout, append=TRUE, sep="\n") # }else { # endhere <- grep('
',roughdraft) # bogustoc <- beginhere:endhere # cat(roughdraft[-bogustoc], file=htmlout, append=TRUE, sep="\n") # } cat("
", file=htmlout, append=TRUE) }else{ if (isTRUE(notaMajorheading)){ reviseditem <- sub("\\*\\*",'

', item) reviseditem <- sub("\\*\\*","

", reviseditem) reviseditem <- sub("\\*","",reviseditem) cat(reviseditem, file=htmlout, append=TRUE,sep="\n") }else{ reviseditem <- sub('====','

',item) reviseditem <- sub('====','

',reviseditem) cat(reviseditem, file=htmlout, append=TRUE, sep="\n") } } } cat('\n\n', file=htmlout, append=TRUE)