diff --git a/R/assembly-utils.R b/R/assembly-utils.R index 7d9c4f3..1073d2f 100644 --- a/R/assembly-utils.R +++ b/R/assembly-utils.R @@ -218,7 +218,9 @@ "AssignedMoleculeLocationOrType", "GenBankAccn", "Relationship", "RefSeqAccn", "AssemblyUnit" , "SequenceLength", "UCSCStyleName") - read.table(url, sep="\t", col.names=colnames, stringsAsFactors=FALSE) + destfile <- tempfile() + download.file(url, destfile, quiet=TRUE) + read.table(destfile, sep="\t", col.names=colnames, stringsAsFactors=FALSE) } ### See .normarg_assembly_accession() for how 'assembly_accession' can be diff --git a/R/utils.R b/R/utils.R index eecbc63..9c14e0f 100644 --- a/R/utils.R +++ b/R/utils.R @@ -51,11 +51,22 @@ mergeNamedAtomicVectors <- function(x, y, what=c("key", "values")) list_ftp_dir <- function(url) { doc <- getURL(url) # from RCurl package - listing <- strsplit(doc, "\n", fixed=TRUE)[[1L]] - ## Keep field no. 8 only - pattern <- paste(c("^", rep.int("[^[:space:]]+[[:space:]]+", 8L)), - collapse="") - listing <- sub(pattern, "", listing) - sub("[[:space:]].*$", "", listing) + if (grepl("HTML listing generated by Squid", doc) == FALSE) { + # Normal directory listing (text format) + listing <- strsplit(doc, "\n", fixed=TRUE)[[1L]] + ## Keep field no. 8 only + pattern <- paste(c("^", rep.int("[^[:space:]]+[[:space:]]+", 8L)), + collapse="") + listing <- sub(pattern, "", listing) + listing <- sub("[[:space:]].*$", "", listing) + } else { + # Squid proxy directory listing (HTML format) + m <- gregexpr("HREF=\"([^./][^/\"]*)", doc, perl=TRUE) + l <- regmatches(doc, m) + l2 <- unlist(lapply(l, function(x) sub("HREF=\\\"", "", x))) + listing <- l2[c(TRUE, FALSE)] # select every other element + } + print(listing) + listing }