[R] Can I use group_map to iteratively process a dataframe?

Rui Barradas ru|pb@rr@d@@ @end|ng |rom @@po@pt
Sat Jun 5 15:07:40 CEST 2021


Hello,

This is cross-posted from StackOverflow [1]. Cross posting is not well 
seen on R-help and the SO post is better explained (at least the data 
seem to be more complete). You should have waited for an answer there.


Hope this helps,

Rui Barradas

Às 15:03 de 04/06/21, Madison Bell escreveu:
> I want to iteratively process a master list of comparisons using
> group_walk() as an alternative method to import batches of .csv files.
> I have the code for iteratively importing batch csvs here:
> 
> #Import list of csv files from directory, formatted as:
> 
> |rownm | neg | pos |
> |------|-----|-----|
> |neg   |19   |18   |
> |pos   |5    |141  |
> 
> ```
> library(tidyverse) # for cleaning and shaping data
> library(epiR)
> library(irrCAC)
> 
> all_epi_files <- list.files("congtables", pattern = "*.csv",
>                                 full.names = TRUE)
> 
> #Make export directory
> check_create_dir <- function(the_dir) {
>    if (!dir.exists(the_dir)) {
>      dir.create(the_dir, recursive = TRUE) } #Creates a directory if it
> doesn't already exist
> }
> 
> the_dir_ex <- "data_generated/epidata" #Name the new desired directory
> 
> check_create_dir(the_dir_ex) #Make the directory if it doesn't already exist
> 
> #Make function for the series of analyses
> epi_analysis <- function(a_csv, the_dir){
>    #Import data as inserted variables
>    dat2 <- read_csv(a_csv)%>%
>      remove_rownames %>%
>      column_to_rownames(var="rownm") %>%
>      as.matrix()
> 
>    #Run tests
>    rval <- epi.tests(dat2, conf.level = 0.95)
>    rkappa<-epi.kappa(dat2)
>    gwet <- gwet.ac1.table(dat2)
>    kappa2 <- kappa2.table(dat2)
> 
>    #Export results
>    hd <- c('sensitivity', 'specificity', 'pfp', 'pfn', 'kappa', 'gwet', 'pabak')
>    ests <- c(round(rval$elements$sensitivity$est, digits = 3),
>              round(rval$elements$specificity$est, digits = 3),
>              round(rval$element$pfp$est, digits = 3),
>              round(rval$element$pfn$est, digits = 3),
>              round(kappa2$coeff.val, digits = 3),
>              round(gwet$coeff.val, digits = 3),
>              round(rkappa$pabak$est, digits = 3))
>    cis <- c(paste(round(rval$elements$sensitivity$lower, digits = 3),
> round(rval$elements$sensitivity$upper, digits = 3), sep = ","),
>             paste(round(rval$elements$specificity$lower, digits = 3),
> round(rval$elements$specificity$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfp$lower, digits = 3),
> round(rval$element$pfp$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfn$lower, digits = 3),
> round(rval$element$pfn$upper, digits = 3), sep = ","),
>             kappa2$coeff.ci,
>             gwet$coeff.ci,
>             paste(round(rkappa$pabak$lower, digits = 3),
> round(rkappa$pabak$lower, digits = 3), sep = ","))
> 
>    df <- data.frame(hd, ests, cis)
> 
>    write.csv(df,
>              file = paste0(the_dir, "/", basename(a_csv)),
>              na = "999.99",
>              row.names = FALSE)
> 
> }
> 
> 
> #Execute functions
> lapply(all_epi_files,
>         FUN = epi_analysis,
>         the_dir = the_dir_ex)
> ```
> 
> But instead I would like to input a dataset that looks like this:
> 
> |Test Assay | Var1 | Var2 |Freq|
> |-----------|------|------|----|
> |Assay1     |neg   |neg   |19  |
> |Assay1     |neg   |pos   |5   |
> |Assay1     |pos   |neg   |8   |
> |Assay1     |pos   |pos   |141 |
> |Assay2     |neg   |neg   |25  |
> |Assay2     |neg   |pos   |6   |
> |Assay2     |pos   |neg   |17  |
> |Assay2     |pos   |pos   |33  |
> |Assay3     |neg   |neg   |99  |
> |Assay3     |neg   |pos   |20  |
> |Assay3     |pos   |neg   |5   |
> |Assay3     |pos   |pos   |105 |
> 
> I want to use the same function epi_analysis and export a csv for each
> Test Assay (in this example Assay1, Assay2, and Assay3). So far I
> have:
> 
> ```
> #Make export directory
> check_create_dir <- function(the_dir) {
>    if (!dir.exists(the_dir)) {
>      dir.create(the_dir, recursive = TRUE) } #Creates a directory if it
> doesn't already exist
> }
> 
> the_dir_ex <- "data_generated/epidata" #Name the new desired directory
> 
> check_create_dir(the_dir_ex) #Make the directory if it doesn't already exist
> 
> #Make function for the series of analyses
> epi_analysis <- function(.x, the_dir){
>    #Clean data
>    dat2 <- .x  %>%
>      select(c(Var1, Var2, Freq)) %>%
>      pivot_wider(Var1, names_from = Var2, values_from = Freq) %>%
>      remove_rownames %>%
>      column_to_rownames( var = "Var1") %>%
>      as.matrix()
> 
>    #Run tests
>    rval <- epi.tests(dat2, conf.level = 0.95)
>    rkappa<-epi.kappa(dat2)
>    gwet <- gwet.ac1.table(dat2)
>    kappa2 <- kappa2.table(dat2)
> 
>    #Export results
>    hd <- c('sensitivity', 'specificity', 'pfp', 'pfn', 'kappa', 'gwet', 'pabak')
>    ests <- c(round(rval$elements$sensitivity$est, digits = 3),
>              round(rval$elements$specificity$est, digits = 3),
>              round(rval$element$pfp$est, digits = 3),
>              round(rval$element$pfn$est, digits = 3),
>              round(kappa2$coeff.val, digits = 3),
>              round(gwet$coeff.val, digits = 3),
>              round(rkappa$pabak$est, digits = 3))
>    cis <- c(paste(round(rval$elements$sensitivity$lower, digits = 3),
> round(rval$elements$sensitivity$upper, digits = 3), sep = ","),
>             paste(round(rval$elements$specificity$lower, digits = 3),
> round(rval$elements$specificity$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfp$lower, digits = 3),
> round(rval$element$pfp$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfn$lower, digits = 3),
> round(rval$element$pfn$upper, digits = 3), sep = ","),
>             kappa2$coeff.ci,
>             gwet$coeff.ci,
>             paste(round(rkappa$pabak$lower, digits = 3),
> round(rkappa$pabak$lower, digits = 3), sep = ","))
> 
>    df <- data.frame(hd, ests, cis)
> 
>    write.csv(df,
>              file = paste0(the_dir, "/", basename(.x$TestAssay)),
>              na = "999.99",
>              row.names = FALSE)
> 
> }
> 
> data <- read_csv("data_raw/EpiTest.csv") %>%
>    group_by(TestAssay)%>%
>    group_map(~ epi_analysis)
> ```
> 
> But the only output I see is:
> ```
> [[1]]
> function(.x, the_dir){
>    #Clean data
>    dat2 <- .x  %>%
>      select(c(Var1, Var2, Freq)) %>%
>      pivot_wider(Var1, names_from = Var2, values_from = Freq) %>%
>      remove_rownames %>%
>      column_to_rownames( var = "Var1") %>%
>      as.matrix()
> 
>    #Run tests
>    rval <- epi.tests(dat2, conf.level = 0.95)
>    rkappa<-epi.kappa(dat2)
>    gwet <- gwet.ac1.table(dat2)
>    kappa2 <- kappa2.table(dat2)
> 
>    #Export results
>    hd <- c('sensitivity', 'specificity', 'pfp', 'pfn', 'kappa', 'gwet', 'pabak')
>    ests <- c(round(rval$elements$sensitivity$est, digits = 3),
>              round(rval$elements$specificity$est, digits = 3),
>              round(rval$element$pfp$est, digits = 3),
>              round(rval$element$pfn$est, digits = 3),
>              round(kappa2$coeff.val, digits = 3),
>              round(gwet$coeff.val, digits = 3),
>              round(rkappa$pabak$est, digits = 3))
>    cis <- c(paste(round(rval$elements$sensitivity$lower, digits = 3),
> round(rval$elements$sensitivity$upper, digits = 3), sep = ","),
>             paste(round(rval$elements$specificity$lower, digits = 3),
> round(rval$elements$specificity$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfp$lower, digits = 3),
> round(rval$element$pfp$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfn$lower, digits = 3),
> round(rval$element$pfn$upper, digits = 3), sep = ","),
>             kappa2$coeff.ci,
>             gwet$coeff.ci,
>             paste(round(rkappa$pabak$lower, digits = 3),
> round(rkappa$pabak$lower, digits = 3), sep = ","))
> 
>    df <- data.frame(hd, ests, cis)
> 
>    write.csv(df,
>              file = paste0(the_dir, "/", basename(.x$TestAssay)),
>              na = "999.99",
>              row.names = FALSE)
> 
> }
> 
> [[2]]
> function(.x, the_dir){
>    #Clean data
>    dat2 <- .x  %>%
>      select(c(Var1, Var2, Freq)) %>%
>      pivot_wider(Var1, names_from = Var2, values_from = Freq) %>%
>      remove_rownames %>%
>      column_to_rownames( var = "Var1") %>%
>      as.matrix()
> 
>    #Run tests
>    rval <- epi.tests(dat2, conf.level = 0.95)
>    rkappa<-epi.kappa(dat2)
>    gwet <- gwet.ac1.table(dat2)
>    kappa2 <- kappa2.table(dat2)
> 
>    #Export results
>    hd <- c('sensitivity', 'specificity', 'pfp', 'pfn', 'kappa', 'gwet', 'pabak')
>    ests <- c(round(rval$elements$sensitivity$est, digits = 3),
>              round(rval$elements$specificity$est, digits = 3),
>              round(rval$element$pfp$est, digits = 3),
>              round(rval$element$pfn$est, digits = 3),
>              round(kappa2$coeff.val, digits = 3),
>              round(gwet$coeff.val, digits = 3),
>              round(rkappa$pabak$est, digits = 3))
>    cis <- c(paste(round(rval$elements$sensitivity$lower, digits = 3),
> round(rval$elements$sensitivity$upper, digits = 3), sep = ","),
>             paste(round(rval$elements$specificity$lower, digits = 3),
> round(rval$elements$specificity$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfp$lower, digits = 3),
> round(rval$element$pfp$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfn$lower, digits = 3),
> round(rval$element$pfn$upper, digits = 3), sep = ","),
>             kappa2$coeff.ci,
>             gwet$coeff.ci,
>             paste(round(rkappa$pabak$lower, digits = 3),
> round(rkappa$pabak$lower, digits = 3), sep = ","))
> 
>    df <- data.frame(hd, ests, cis)
> 
>    write.csv(df,
>              file = paste0(the_dir, "/", basename(.x$TestAssay)),
>              na = "999.99",
>              row.names = FALSE)
> 
> }
> 
> [[3]]
> function(.x, the_dir){
>    #Clean data
>    dat2 <- .x  %>%
>      select(c(Var1, Var2, Freq)) %>%
>      pivot_wider(Var1, names_from = Var2, values_from = Freq) %>%
>      remove_rownames %>%
>      column_to_rownames( var = "Var1") %>%
>      as.matrix()
> 
>    #Run tests
>    rval <- epi.tests(dat2, conf.level = 0.95)
>    rkappa<-epi.kappa(dat2)
>    gwet <- gwet.ac1.table(dat2)
>    kappa2 <- kappa2.table(dat2)
> 
>    #Export results
>    hd <- c('sensitivity', 'specificity', 'pfp', 'pfn', 'kappa', 'gwet', 'pabak')
>    ests <- c(round(rval$elements$sensitivity$est, digits = 3),
>              round(rval$elements$specificity$est, digits = 3),
>              round(rval$element$pfp$est, digits = 3),
>              round(rval$element$pfn$est, digits = 3),
>              round(kappa2$coeff.val, digits = 3),
>              round(gwet$coeff.val, digits = 3),
>              round(rkappa$pabak$est, digits = 3))
>    cis <- c(paste(round(rval$elements$sensitivity$lower, digits = 3),
> round(rval$elements$sensitivity$upper, digits = 3), sep = ","),
>             paste(round(rval$elements$specificity$lower, digits = 3),
> round(rval$elements$specificity$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfp$lower, digits = 3),
> round(rval$element$pfp$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfn$lower, digits = 3),
> round(rval$element$pfn$upper, digits = 3), sep = ","),
>             kappa2$coeff.ci,
>             gwet$coeff.ci,
>             paste(round(rkappa$pabak$lower, digits = 3),
> round(rkappa$pabak$lower, digits = 3), sep = ","))
> 
>    df <- data.frame(hd, ests, cis)
> 
>    write.csv(df,
>              file = paste0(the_dir, "/", basename(.x$TestAssay)),
>              na = "999.99",
>              row.names = FALSE)
> 
> }
> 
> [[4]]
> function(.x, the_dir){
>    #Clean data
>    dat2 <- .x  %>%
>      select(c(Var1, Var2, Freq)) %>%
>      pivot_wider(Var1, names_from = Var2, values_from = Freq) %>%
>      remove_rownames %>%
>      column_to_rownames( var = "Var1") %>%
>      as.matrix()
> 
>    #Run tests
>    rval <- epi.tests(dat2, conf.level = 0.95)
>    rkappa<-epi.kappa(dat2)
>    gwet <- gwet.ac1.table(dat2)
>    kappa2 <- kappa2.table(dat2)
> 
>    #Export results
>    hd <- c('sensitivity', 'specificity', 'pfp', 'pfn', 'kappa', 'gwet', 'pabak')
>    ests <- c(round(rval$elements$sensitivity$est, digits = 3),
>              round(rval$elements$specificity$est, digits = 3),
>              round(rval$element$pfp$est, digits = 3),
>              round(rval$element$pfn$est, digits = 3),
>              round(kappa2$coeff.val, digits = 3),
>              round(gwet$coeff.val, digits = 3),
>              round(rkappa$pabak$est, digits = 3))
>    cis <- c(paste(round(rval$elements$sensitivity$lower, digits = 3),
> round(rval$elements$sensitivity$upper, digits = 3), sep = ","),
>             paste(round(rval$elements$specificity$lower, digits = 3),
> round(rval$elements$specificity$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfp$lower, digits = 3),
> round(rval$element$pfp$upper, digits = 3), sep = ","),
>             paste(round(rval$element$pfn$lower, digits = 3),
> round(rval$element$pfn$upper, digits = 3), sep = ","),
>             kappa2$coeff.ci,
>             gwet$coeff.ci,
>             paste(round(rkappa$pabak$lower, digits = 3),
> round(rkappa$pabak$lower, digits = 3), sep = ","))
> 
>    df <- data.frame(hd, ests, cis)
> 
>    write.csv(df,
>              file = paste0(the_dir, "/", basename(.x$TestAssay)),
>              na = "999.99",
>              row.names = FALSE)
> 
> }
> ```
> 
> and there are no csvs in my epidata folder. Any
> suggestions/corrections welcomed. I haven't used group_map() before,
> but I am keen to use it.
> 
> ______________________________________________
> R-help using r-project.org mailing list -- To UNSUBSCRIBE and more, see
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>



More information about the R-help mailing list