[R] Memory allocation failed: Copying Node
ppatel3026
pratik.patel at us.rothschild.com
Wed Jun 25 16:50:50 CEST 2008
Following code bugs with "Memory allocation failed: Copying Node" error after
parsing n thousand files. I have included the main code(below) and
functions(after the main code).
I am not sure which lines are causing the copying Node which results in
memory failure. Please advise.
#Beginning of Code
for(i in 1:nrow(newFile)) {
if(i%%3000 == 0) gc()
fname <- as.character(newFile$"File Name"[i])
file = strsplit(fname,"/")[[1]][4]
filein = "C:\\foldername\\" %+% file
if((!file.exists(filein)) || (length(readLines(filein)) == 0) )
{
ftp <- paste("ftp://servername/", fname, sep="")
fileout = filein
try(download.file(url=ftp, destfile=fileout))
}
txt <- readLines(filein)
if(length(txt) == 0){
next
}
xmlInside <- grep("</*XML", txt)
xmlTxt <- txt[seq(xmlInside[1]+1, xmlInside[2]-1)]
xml <- tryCatch(xmlMalformed2(filein), error = function(err)
unProcessedFiles(filein) )
if(is.null(xml)) next
if(is.null(xml)) {
stop("File not processed: " %+% file)
}
processed=FALSE
owner <- tryCatch(
data.frame(datadate=xValHelper("periodOfReport"),
CIK=xValHelper("issuerCik"),
conm=xValHelper("issuerName"),
tic=xValHelper("issuerTradingSymbol")),
error = function(err) unProcessedFiles(filein)
)
if(is.null(owner)) next
nodes <- getNodeSet(xml, "//nonDerivativeTransaction")
if(xmlSize(nodes) > 0){
processed <- tryCatch( processTransaction(owner, nodes,
outputFile),
error = function(err)
unProcessedFiles(filein) )
if(is.null(processed)) next
}
}
#End of Code
#List of Functions
xmlMalformed2 <- function(filename) {
quotes <- c("&\r\nquot;",
"&q\r\nuot;","&qu\r\not;","&quo\r\nt;",""\r\n;")
amp <- c("&\r\namp;", "&a\r\nmp;","&am\r\np;","&\r\n;")
xmlDoc<-NULL
charStream <- readChar(filename, file.info(filename)$size)
charStreamNew <- gsubfn("<[^>]*>", ~ gsub("[\r\n]", "", x), charStream)
for(k in quotes) {
if(length(grep(k, charStreamNew)) > 0) {
charStreamNew <- sub(k, """, charStreamNew)
}
}
for(v in amp) {
if(length(grep(v, charStreamNew)) > 0) {
charStreamNew <- sub(v, "&", charStreamNew)
}
}
charStreamNew <- gsub(""", "\"", charStreamNew)
charStreamNew <- gsub("&", "and", charStreamNew)
xmlVec<-readLines(textConnection(charStreamNew))
xmlInDoc <- grep("</*XML", xmlVec)
xmlDoc <- xmlTreeParse(xmlVec[seq(xmlInDoc[1]+1, xmlInDoc[2]-1)],
useInternal=TRUE)
}
processTransaction <- function(rptOwner, nodes, outFile) {
transaction <- data.frame(
transdate=xValHelperSpecial(nodes,"transactionDate"),
securityTitle=xValHelperSpecial(nodes,"securityTitle"),
transactionShares=if(length(xValHelperSpecial(nodes,"transactionShares")) ==
1)
xValHelperSpecial(nodes,"transactionShares")[[1]] else
xValHelperSpecial(nodes,"transactionShares"))
out <- merge(rptOwner,transaction, all.x=TRUE)
output<-cbind(out,file) #file - variable containing filename that data
was read from
write.table(output, file=outFile, append=TRUE, sep="\t", eol="\n",
quote=FALSE, col.names=FALSE,
row.names=FALSE)
processed=TRUE
return(processed)
}
unProcessedFiles <- function(filename) {
write.table(filename, file="C:/errorFile.txt", append=TRUE, sep="\t",
eol="\n", quote=FALSE,
col.names=FALSE, row.names=FALSE)
return(NULL)
}
#xValHelperSpecial and xValHelper are prerty similar hence avoiding code for
xValHelper
xValHelperSpecial <- function(node, xtag) {
nobs <- xmlSize(node)
out<-NULL
if(xtag == "tagName1") {
for (n in seq(1:nobs)) {
temp <- xpathApply(node[[n]], "//" %+% xtag, xmlValue)
if(length(temp) > 0) {
if (n==1) assign("out",gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))) else
assign("out",rbind(out,gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))))
} else {
if (n==1) assign("out","NA") else assign("out",rbind(out,"NA"))
}
}
} else if (xtag == "tagName2") {
for (n in seq(1:nobs)) {
temp <- xpathApply(node[[n]], "//" %+% xtag, xmlValue)
if(length(temp) > 0) {
if (n==1) assign("out",gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))) else
assign("out",rbind(out,gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))))
} else {
if (n==1) assign("out","NA") else assign("out",rbind(out,"NA"))
}
}
} else {
for (n in seq(1:nobs)) {
temp <- xpathApply(node[[n]], "//" %+% xtag, xmlValue)
if(length(temp) > 0) {
if (n==1) assign("out",gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))) else
assign("out",rbind(out,gsub('(^ +)|(
+$)','',gsub('\n','',temp[[1]]))))
} else {
if (n==1) assign("out","NA") else assign("out",rbind(out,"NA"))
}
}
}
return (out)
}
xValAll <- function(xtag) xpathApply(xml, "//" %+% xtag, xmlValue)
xParent<- function(xtag) xmlName(xpathApply(xml, paste("//" %+% xtag,
"/../.."))[[1]])
#End of Functions
--
View this message in context: http://www.nabble.com/Memory-allocation-failed%3A-Copying-Node-tp18114389p18114389.html
Sent from the R help mailing list archive at Nabble.com.
More information about the R-help
mailing list