### Q1: working directory: Observed #Only one file per Site. Assuming this is the ### case for the full dataset, then I guess there is no need to average dir.create("final") lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "", list.files(pattern = ".csv"))) lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) { lines1 <- readLines(x2) header1 <- lines1[1:2] dat1 <- read.table(text = lines1, header = FALSE, sep = ",", stringsAsFactors = FALSE, skip = 2) colnames(dat1) <- Reduce(paste, strsplit(header1, ",")) dat1[-c(nrow(dat1), nrow(dat1) - 1), ] })) lst3 <- lst2[sapply(seq_along(lst2),function(i){lstN <- sapply(lst2[[i]],function(x) is.integer(ncol(x)))})] #difference in column number sapply(seq_along(lst3), function(i) { sapply(lst3[[i]], function(x) ncol(x)) }) # #[1] 157 258 258 98 157 258 256 258 250 258 258 147 157 250 250 256 249 240 # [19] 181 188 256 146 117 258 153 256 255 246 255 256 258 257 145 258 258 255 # [37] 258 157 164 144 265 258 254 258 258 157 258 176 258 256 257 258 258 258 # [55] 248 258 156 258 157 157 258 258 258 258 258 148 258 258 258 258 257 258 # [73] 258 258 157 154 153 258 248 255 257 256 258 258 157 256 256 257 257 250 # [91] 257 139 155 256 256 257 257 256 258 258 257 258 258 258 258 157 157 157 #[109] 258 258 258 258 256 258 157 258 258 256 258 library(plyr) library(stringr) lst4 <- setNames(lapply(seq_along(lst3), function(i) { lapply(lst3[[i]], function(x) { names(x)[-1] <- paste(names(x)[-1], names(lst1)[i], sep = "_") names(x) <- str_trim(names(x)) x })[[1]] }), names(lst3)) df1 <- join_all(lst4, by = "Year") dim(df1) # [1] 9 27311 sapply(split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1])), function(x) { df2 <- df1[, x] df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"), numcolwise(function(y) quantile(y, seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE) ncol(df3) }) # #G100 G101 G102 G103 G104 G105 G106 G107 G108 G109 G110 G111 G112 G113 G114 G115 # 157 258 258 98 157 258 256 258 250 258 258 147 157 250 250 256 #G116 G117 G118 G119 G120 GG10 GG11 GG12 GG13 GG14 GG15 GG16 GG17 GG18 GG19 GG20 # 249 240 181 188 256 146 117 258 153 256 255 246 255 256 258 257 #GG21 GG22 GG23 GG24 GG25 GG26 GG27 GG28 GG29 GG30 GG31 GG32 GG33 GG34 GG35 GG36 # 145 258 258 255 258 157 164 144 265 258 254 258 258 157 258 176 #GG37 GG38 GG39 GG40 GG41 GG42 GG43 GG44 GG45 GG46 GG47 GG48 GG49 GG50 GG51 GG52 # 258 256 257 258 258 258 248 258 156 258 157 157 258 258 258 258 #GG53 GG54 GG55 GG56 GG57 GG58 GG59 GG60 GG61 GG62 GG63 GG64 GG65 GG66 GG67 GG68 # 258 148 258 258 258 258 257 258 258 258 157 154 153 258 248 255 #GG69 GG70 GG71 GG72 GG73 GG74 GG75 GG76 GG77 GG78 GG79 GG80 GG81 GG82 GG83 GG84 # 257 256 258 258 157 256 256 257 257 250 257 139 155 256 256 257 #GG85 GG86 GG87 GG88 GG89 GG90 GG91 GG92 GG93 GG94 GG95 GG96 GG97 GG98 GG99 GGG1 # 257 256 258 258 257 258 258 258 258 157 157 157 258 258 258 258 #GGG2 GGG3 GGG4 GGG5 GGG6 GGG7 GGG8 # 256 258 157 258 258 256 258 lst5 <- split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1])) lapply(seq_along(lst5), function(i) { df2 <- df1[, lst5[[i]]] df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"), numcolwise(function(y) quantile(y, seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE) df3[1:3, 1:3] write.csv(df3, paste0(paste(getwd(), "final", paste(names(lst4)[[i]], "Quantile", sep = "_"), sep = "/"), ".csv"), row.names = FALSE, quote = FALSE) }) ReadOut1 <- lapply(list.files(recursive = TRUE)[grep("Quantile", list.files(recursive = TRUE))], function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE)) sapply(ReadOut1, dim)[,1:3] # [,1] [,2] [,3] #[1,] 101 101 101 #[2,] 157 258 258 lapply(ReadOut1, function(x) x[1:2, 1:3])[1:3] #[[1]] # Percentiles pav.DJF_G100 pav.MAM_G100 #1 0% 0 0.640500 #2 1% 0 0.664604 # #[[2]] # Percentiles txav.DJF_G101 txav.MAM_G101 #1 0% -13.8756 4.742400 #2 1% -13.8140 4.817184 # #[[3]] # Percentiles txav.DJF_G102 txav.MAM_G102 #1 0% -15.05000 4.520700 #2 1% -14.96833 4.543828 ### Q2: Observed data dir.create("Indices") names1 <- unlist(lapply(ReadOut1, function(x) names(x)[-1])) names2 <- gsub("\\_.*", "", names1) names3 <- unique(gsub("[.]", " ", names2)) res <- do.call(rbind, lapply(seq_along(lst5), function(i) { df2 <- df1[, lst5[[i]]] vec1 <- colMeans(df2, na.rm = TRUE) vec2 <- rep(NA, length(names3)) names(vec2) <- paste(names3, names(lst5)[[i]], sep = "_") vec2[names(vec2) %in% names(vec1)] <- vec1 names(vec2) <- gsub("\\_.*", "", names(vec2)) vec2 })) dim(res) #[1] 119 264 lapply(seq_len(ncol(res)), function(i) { mat1 <- t(res[, i, drop = FALSE]) colnames(mat1) <- names(lst4) write.csv(mat1, paste0(paste(getwd(), "Indices", gsub(" ", "_", rownames(mat1)), sep = "/"), ".csv"), row.names = FALSE, quote = FALSE) }) ## Output2: ReadOut2 <- lapply(list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))], function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE)) length(ReadOut2) # [1] 264 list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))][1] # [1] 'Indices/pav_ANN.csv' res[, "pav ANN", drop = FALSE] ReadOut2[[1]] ### Sample data Working directory changed to 'sample' dir.create("Indices_colMeans") lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "", list.files(pattern = ".csv"))) lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) { lines1 <- readLines(x2) header1 <- lines1[1:2] dat1 <- read.table(text = lines1, header = FALSE, sep = ",", stringsAsFactors = FALSE, skip = 2) colnames(dat1) <- Reduce(paste, strsplit(header1, ",")) dat1[-c(nrow(dat1), nrow(dat1) - 1), ] })) res1 <- do.call(rbind, lapply(seq_along(lst2), function(i) { rowMeans(do.call(cbind, lapply(lst2[[i]], function(x) colMeans(x[, -1], na.rm = TRUE))), na.rm = TRUE) })) lapply(seq_len(ncol(res1)), function(i) { mat1 <- t(res1[, i, drop = FALSE]) colnames(mat1) <- names(lst2) write.csv(mat1, paste0(paste(getwd(), "Indices_colMeans", gsub(" ", "_", rownames(mat1)), sep = "/"), ".csv"), row.names = FALSE, quote = FALSE) }) ## Output2 Sample ReadOut2S <- lapply(list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))], function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE)) length(ReadOut2S) # [1] 257 list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))][1] # [1] 'Indices_colMeans/pav_ANN.csv' res1[, "pav ANN", drop = FALSE] # pav ANN [1,] 1.545620 [2,] 1.518553 ReadOut2S[[1]] # G100 G101 1 1.54562 1.518553