[R] : Quantile and rowMean from multiple files in a folder
arun
smartpink111 at yahoo.com
Tue Apr 15 05:04:20 CEST 2014
Hi,
It is because of different dimensions of Simulation data within each Site.
Try:
dir.create("final")
lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "", list.files(pattern = ".csv")))
sapply(lst1,length)
#G100 G101 G102 G103 G104 G105 G106 G107 G108 G109 G110 G111 G112 G113 G114 G115
# 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100
#G116 G117 G118 G119 G120 GG10 GG11 GG12 GG13 GG14 GG15 GG16 GG17 GG18 GG19 GG20
# 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100 100
#GG21 GG22 GG23 GG24 GG25 GG26 GG27 GG28
# 100 100 100 100 100 100 100 100
lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) {
lines1 <- readLines(x2)
header1 <- lines1[1:2]
dat1 <- read.table(text = lines1, header = FALSE, sep = ",", stringsAsFactors = FALSE,
skip = 2)
colnames(dat1) <- Reduce(paste, strsplit(header1, ","))
dat1[-c(nrow(dat1), nrow(dat1) - 1), ]
}))
##dimensions differ within each Site
sapply(lst2,function(x) sapply(x,ncol))[1:6,5:8]
# G104 G105 G106 G107
#[1,] 258 257 258 258
#[2,] 258 258 258 258
#[3,] 258 258 258 258
#[4,] 258 257 258 258
#[5,] 258 258 258 258
#[6,] 258 258 258 258
##number of rows are consistent
sapply(lst2,function(x) any(sapply(x,nrow)!=9))
# G100 G101 G102 G103 G104 G105 G106 G107 G108 G109 G110 G111 G112
#FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# G113 G114 G115 G116 G117 G118 G119 G120 GG10 GG11 GG12 GG13 GG14
#FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# GG15 GG16 GG17 GG18 GG19 GG20 GG21 GG22 GG23 GG24 GG25 GG26 GG27
#FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
# GG28
#FALSE
names1 <- unique(unlist(lapply(lst2,function(x) unlist(lapply(x,function(y) names(y)[-1])))))
length(names1)
#[1] 257
# lstYear <- lapply(lst2,function(x) lapply(x, function(y)
# y[,1,drop=FALSE])[[1]])
library(plyr)
lapply(seq_along(lst2),function(i) {lstN <- lapply(lst2[[i]],function(x) {datN <- as.data.frame(matrix(NA, nrow=9, ncol=length(names1),dimnames=list(NULL,names1)));datN[,names1] <- x[,-1]; datN }); lstQ1 <- lapply(lstN,function(x) numcolwise(function(y) quantile(y,seq(0,1,by=0.01), na.rm=TRUE))(x)); arr1 <- array(unlist(lstQ1), dim=c(dim(lstQ1[[1]]),length(lstQ1)),dimnames=list(NULL,lapply(lstQ1,names)[[1]])); res <- rowMeans(arr1, dims=2, na.rm=TRUE); colnames(res) <- gsub(" ", "_", colnames(res)); res1 <- data.frame(Percentiles=paste0(seq(0,100, by=1),"%"),res, stringsAsFactors=FALSE); write.csv(res1,paste0(paste(getwd(), "final", paste(names(lst1)[[i]], "Quantile", sep="_"), sep= "/"), ".csv"), row.names=FALSE, quote=FALSE)})
## output files
list.files(recursive = TRUE)[grep("Quantile", list.files(recursive = TRUE))]
#[1] "final/G100_Quantile.csv" "final/G101_Quantile.csv"
#[3] "final/G102_Quantile.csv" "final/G103_Quantile.csv"
#[5] "final/G104_Quantile.csv" "final/G105_Quantile.csv"
#[7] "final/G106_Quantile.csv" "final/G107_Quantile.csv"
#[9] "final/G108_Quantile.csv" "final/G109_Quantile.csv"
#[11] "final/G110_Quantile.csv" "final/G111_Quantile.csv"
#[13] "final/G112_Quantile.csv" "final/G113_Quantile.csv"
#[15] "final/G114_Quantile.csv" "final/G115_Quantile.csv"
#[17] "final/G116_Quantile.csv" "final/G117_Quantile.csv"
#[19] "final/G118_Quantile.csv" "final/G119_Quantile.csv"
#[21] "final/G120_Quantile.csv" "final/GG10_Quantile.csv"
#[23] "final/GG11_Quantile.csv" "final/GG12_Quantile.csv"
#[25] "final/GG13_Quantile.csv" "final/GG14_Quantile.csv"
#[27] "final/GG15_Quantile.csv" "final/GG16_Quantile.csv"
#[29] "final/GG17_Quantile.csv" "final/GG18_Quantile.csv"
#[31] "final/GG19_Quantile.csv" "final/GG20_Quantile.csv"
#[33] "final/GG21_Quantile.csv" "final/GG22_Quantile.csv"
#[35] "final/GG23_Quantile.csv" "final/GG24_Quantile.csv"
#[37] "final/GG25_Quantile.csv" "final/GG26_Quantile.csv"
#[39] "final/GG27_Quantile.csv" "final/GG28_Quantile.csv"
ReadOut1 <- lapply(list.files(recursive = TRUE)[grep("Quantile", list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
sapply(ReadOut1,function(x) dim(x))
# [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#[1,] 101 101 101 101 101 101 101 101 101 101 101 101 101 101
#[2,] 258 258 258 258 258 258 258 258 258 258 258 258 258 258
# [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
#[1,] 101 101 101 101 101 101 101 101 101 101 101 101
#[2,] 258 258 258 258 258 258 258 258 258 258 258 258
# [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
#[1,] 101 101 101 101 101 101 101 101 101 101 101 101
#[2,] 258 258 258 258 258 258 258 258 258 258 258 258
# [,39] [,40]
#[1,] 101 101
#[2,] 258 258
ReadOut1[[1]][1:3,1:3]
# Percentiles txav_DJF txav_MAM
#1 0% -12.56619 6.795429
#2 1% -12.45888 6.864886
#3 2% -12.35157 6.934344
### Q2:
dir.create("Indices")
names1 <- lapply(ReadOut1, function(x) names(x))[[1]]
lstNew <- simplify2array(ReadOut1)
nrow(lstNew)
#[1] 258
lapply(2:nrow(lstNew), function(i) {
dat1 <- data.frame(lstNew[1], do.call(cbind, lstNew[i, ]), stringsAsFactors = FALSE)
colnames(dat1) <- c(rownames(lstNew)[1], paste(names(lst1), rep(rownames(lstNew)[i],
length(lst1)), sep = "_"))
write.csv(dat1, paste0(paste(getwd(), "Indices", rownames(lstNew)[i], sep = "/"),
".csv"), row.names = FALSE, quote = FALSE)
})
## Output2:
ReadOut2 <- lapply(list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))],
function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE))
length(ReadOut2)
#[1] 257
head(ReadOut2[[1]], 2)
#Percentiles G100_pav_ANN G101_pav_ANN G102_pav_ANN G103_pav_ANN G104_pav_ANN
#1 0% 0.978451 0.9517680 0.9383280 0.8519280 0.9438790
#2 1% 0.992648 0.9638816 0.9480754 0.8625262 0.9548512
# G105_pav_ANN G106_pav_ANN G107_pav_ANN G108_pav_ANN G109_pav_ANN G110_pav_ANN
#1 0.9303260 0.7484670 0.9757010 1.049533 0.9841290 0.7778830
#2 0.9417438 0.7594563 0.9868968 1.063668 0.9968095 0.7882509
# G111_pav_ANN G112_pav_ANN G113_pav_ANN G114_pav_ANN G115_pav_ANN G116_pav_ANN
#1 0.737651 0.8813010 0.9155330 0.829001 0.6778760 0.5463310
#2 0.746934 0.8924871 0.9265448 0.838534 0.6880397 0.5527359
# G117_pav_ANN G118_pav_ANN G119_pav_ANN G120_pav_ANN GG10_pav_ANN GG11_pav_ANN
#1 0.7191360 0.7470170 0.7859380 0.7774590 0.6303150 0.5200200
#2 0.7278231 0.7556053 0.7975213 0.7852408 0.6381671 0.5258248
# GG12_pav_ANN GG13_pav_ANN GG14_pav_ANN GG15_pav_ANN GG16_pav_ANN GG17_pav_ANN
#1 0.6672890 0.851834 0.5209710 0.6445290 0.5874320 0.7263650
#2 0.6761913 0.861177 0.5282514 0.6520456 0.5948674 0.7365299
# GG18_pav_ANN GG19_pav_ANN GG20_pav_ANN GG21_pav_ANN GG22_pav_ANN GG23_pav_ANN
#1 0.6642220 0.5385440 0.5043320 0.7484140 0.6436940 0.541165
#2 0.6729234 0.5454527 0.5120815 0.7575216 0.6502167 0.549040
# GG24_pav_ANN GG25_pav_ANN GG26_pav_ANN GG27_pav_ANN GG28_pav_ANN
#1 0.5067010 0.7082260 0.6447260 0.6197480 0.9163480
#2 0.5136588 0.7160864 0.6545266 0.6278891 0.9284303
Also, atttached is the script in case the email mangles the code.
A.K.
On Monday, April 14, 2014 6:26 PM, Zilefac Elvis <zilefacelvis at yahoo.com> wrote:
Hi AK,
I have another request for help.
Attached is a larger file (~27MB) for sample.zip. All files are same as previous except that I am using more sites to do the same thing that you did with sample.zip.
When generalizing Quantilecode.R to many sites, I receive an error when I run:
dir.create("Indices")
names1 <- lapply(ReadOut1, function(x) names(x))[[1]]
lstNew <- simplify2array(ReadOut1)
lapply(2:nrow(lstNew), function(i) {
dat1 <- data.frame(lstNew[1], do.call(cbind, lstNew[i, ]), stringsAsFactors = FALSE)
colnames(dat1) <- c(rownames(lstNew)[1], paste(names(lst1), rep(rownames(lstNew)[i],
length(lst1)), sep = "_"))
write.csv(dat1, paste0(paste(getwd(), "Indices", rownames(lstNew)[i], sep = "/"),
".csv"), row.names = FALSE, quote = FALSE)
})
and I get this:
Error in 2:nrow(lstNew) : argument of length 0
I have tried a few tricks but could not overcome the error message.
Please help!
Atem.
-------------- next part --------------
An embedded and charset-unspecified text was scrubbed...
Name: Quantilecode.txt
URL: <https://stat.ethz.ch/pipermail/r-help/attachments/20140414/2628a0b7/attachment-0002.txt>
More information about the R-help
mailing list