### Q1: working directory: Observed #Only one file per Site. Assuming this is the ### case for the full dataset, then I guess there is no need to average dir.create("final") lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "", list.files(pattern = ".csv"))) lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) { lines1 <- readLines(x2) header1 <- lines1[1:2] dat1 <- read.table(text = lines1, header = FALSE, sep = ",", stringsAsFactors = FALSE, skip = 2) colnames(dat1) <- Reduce(paste, strsplit(header1, ",")) dat1[-c(nrow(dat1), nrow(dat1) - 1), ] })) # different number of rows sapply(seq_along(lst2), function(i) { lstN <- lapply(lst2[[i]], function(x) x[, -1]) sapply(lstN, function(x) nrow(x)) }) # [1] 9 9 9 8 2 9 difference in column number sapply(seq_along(lst2), function(i) { sapply(lst2[[i]], function(x) ncol(x)) }) # [1] 157 258 258 98 157 258 library(plyr) library(stringr) lst3 <- setNames(lapply(seq_along(lst2), function(i) { lapply(lst2[[i]], function(x) { names(x)[-1] <- paste(names(x)[-1], names(lst1)[i], sep = "_") names(x) <- str_trim(names(x)) x })[[1]] }), names(lst1)) df1 <- join_all(lst3, by = "Year") dim(df1) # [1] 9 1181 sapply(split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1])), function(x) { df2 <- df1[, x] df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"), numcolwise(function(y) quantile(y, seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE) ncol(df3) }) # G100 G101 G102 G103 G104 G105 157 258 258 98 157 258 lst4 <- split(names(df1)[-1], gsub(".*\\_", "", names(df1)[-1])) lapply(seq_along(lst4), function(i) { df2 <- df1[, lst4[[i]]] df3 <- data.frame(Percentiles = paste0(seq(0, 100, by = 1), "%"), numcolwise(function(y) quantile(y, seq(0, 1, by = 0.01), na.rm = TRUE))(df2), stringsAsFactors = FALSE) df3[1:3, 1:3] write.csv(df3, paste0(paste(getwd(), "final", paste(names(lst1)[[i]], "Quantile", sep = "_"), sep = "/"), ".csv"), row.names = FALSE, quote = FALSE) }) ReadOut1 <- lapply(list.files(recursive = TRUE)[grep("Quantile", list.files(recursive = TRUE))], function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE)) sapply(ReadOut1, dim) # [,1] [,2] [,3] [,4] [,5] [,6] [1,] 101 101 101 101 101 101 [2,] 157 258 258 98 # 157 258 lapply(ReadOut1, function(x) x[1:2, 1:3])[1:3] # [[1]] Percentiles pav.DJF_G100 pav.MAM_G100 1 0% 0 0.640500 2 1% 0 0.664604 # [[2]] Percentiles txav.DJF_G101 txav.MAM_G101 1 0% -13.8756 4.742400 2 1% # -13.8140 4.817184 [[3]] Percentiles txav.DJF_G102 txav.MAM_G102 1 0% -15.05000 # 4.520700 2 1% -14.96833 4.543828 ### Q2: Observed data dir.create("Indices") names1 <- unlist(lapply(ReadOut1, function(x) names(x)[-1])) names2 <- gsub("\\_.*", "", names1) names3 <- unique(gsub("[.]", " ", names2)) res <- do.call(rbind, lapply(seq_along(lst4), function(i) { df2 <- df1[, lst4[[i]]] vec1 <- colMeans(df2, na.rm = TRUE) vec2 <- rep(NA, length(names3)) names(vec2) <- paste(names3, names(lst4)[[i]], sep = "_") vec2[names(vec2) %in% names(vec1)] <- vec1 names(vec2) <- gsub("\\_.*", "", names(vec2)) vec2 })) lapply(seq_len(ncol(res)), function(i) { mat1 <- t(res[, i, drop = FALSE]) colnames(mat1) <- names(lst4) write.csv(mat1, paste0(paste(getwd(), "Indices", gsub(" ", "_", rownames(mat1)), sep = "/"), ".csv"), row.names = FALSE, quote = FALSE) }) ## Output2: ReadOut2 <- lapply(list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))], function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE)) length(ReadOut2) # [1] 257 list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))][1] # [1] 'Indices/pav_ANN.csv' res[, "pav ANN", drop = FALSE] # pav ANN [1,] 1.298811 [2,] 7.642922 [3,] 6.740011 [4,] NA [5,] 1.296650 [6,] # 6.887622 ReadOut2[[1]] # G100 G101 G102 G103 G104 G105 1 1.298811 7.642922 6.740011 NA 1.29665 6.887622 ### Sample data Working directory changed to 'sample' dir.create("Indices_colMeans") lst1 <- split(list.files(pattern = ".csv"), gsub("\\_.*", "", list.files(pattern = ".csv"))) lst2 <- lapply(lst1, function(x1) lapply(x1, function(x2) { lines1 <- readLines(x2) header1 <- lines1[1:2] dat1 <- read.table(text = lines1, header = FALSE, sep = ",", stringsAsFactors = FALSE, skip = 2) colnames(dat1) <- Reduce(paste, strsplit(header1, ",")) dat1[-c(nrow(dat1), nrow(dat1) - 1), ] })) res1 <- do.call(rbind, lapply(seq_along(lst2), function(i) { rowMeans(do.call(cbind, lapply(lst2[[i]], function(x) colMeans(x[, -1], na.rm = TRUE))), na.rm = TRUE) })) lapply(seq_len(ncol(res1)), function(i) { mat1 <- t(res1[, i, drop = FALSE]) colnames(mat1) <- names(lst2) write.csv(mat1, paste0(paste(getwd(), "Indices_colMeans", gsub(" ", "_", rownames(mat1)), sep = "/"), ".csv"), row.names = FALSE, quote = FALSE) }) ## Output2 Sample ReadOut2S <- lapply(list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))], function(x) read.csv(x, header = TRUE, stringsAsFactors = FALSE)) length(ReadOut2S) # [1] 257 list.files(recursive = TRUE)[grep("Indices", list.files(recursive = TRUE))][1] # [1] 'Indices_colMeans/pav_ANN.csv' res1[, "pav ANN", drop = FALSE] # pav ANN [1,] 1.545620 [2,] 1.518553 ReadOut2S[[1]] # G100 G101 1 1.54562 1.518553