## ----init, include = FALSE----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
set.seed(2023)
library(tidyverse, quietly = TRUE)
library(cowplot)
library(RColorBrewer)
devtools::load_all() 
today <- format(Sys.Date(), "%d%B%Y")

## ----file_load, eval=FALSE----------------------------------------------------
#  my_pk <- readRDS("data-raw/all_cvt_pk.rds")

## ----pk_obj_fromSQL, eval=FALSE-----------------------------------------------
#  
#  ### Minimal PK Object ###----
#  # Minimum pk object, add options later
#  # Note that these mappings are now a default mappings, just being verbose here.
#  # If there are any standard deviations that are zero or NA, but N_Subjects > 1,
#  # Set the n_subjects to 6 or the current value if lower than 6.
#  # (In Showa the mode is 6, but this condition is present in CVT_Legacy as well)
#  minimal_pk <- pk(data = cvt %>%
#                     mutate(n_subjects_normalized = if_else(
#                       n_subjects_normalized > 1 & is.na(invivPK_conc_sd),
#                       min(n_subjects_normalized, 6), n_subjects_normalized)),
#                   mapping = ggplot2::aes(
#                     Chemical = analyte_dtxsid,
#                     Chemical_Name = analyte_name_original,
#                     DTXSID = analyte_dtxsid,
#                     CASRN = analyte_casrn,
#                     Species = species,
#                     Reference = fk_extraction_document_id,
#                     Media = conc_medium_normalized,
#                     Route = administration_route_normalized,
#                     Dose = invivPK_dose_level,
#                     Dose.Units = "mg/kg",
#                     Subject_ID = fk_subject_id,
#                     Series_ID = fk_series_id,
#                     Study_ID = fk_study_id,
#                     ConcTime_ID = conc_time_id,
#                     N_Subjects = n_subjects_normalized,
#                     Weight = weight_kg,
#                     Weight.Units = "kg",
#                     Time = time_hr,
#                     Time.Units = "hours",
#                     Value = invivPK_conc,
#                     Value.Units = "mg/L",
#                     Value_SD = invivPK_conc_sd,
#                     LOQ = invivPK_loq
#                   ))

## ----outlining_all_options, eval=FALSE----------------------------------------
#  # Types of Choices:
#  ## Dose Normalized
#  ## Log10 transform
#  ## Scale time
#  # List of options
#  fitopts <- expand.grid(error_model = c("pooled",
#                                         "hierarchical"),
#                         dose_norm = c(TRUE, FALSE),
#                         log10_trans = c(TRUE, FALSE),
#                         time_scale = c(
#                           TRUE,
#                           FALSE),
#                         stringsAsFactors = FALSE)

## ----fitting_choices, eval=FALSE----------------------------------------------
#  # Make function similar to that in test_vignette authored by Caroline Ring
#  fit_data <- function(this_error_model,
#                       this_dose_norm,
#                       this_log10_trans,
#                       this_time_scale,
#                       n_cores = 12,
#                       file_dir = Sys.getenv("OD_DIR")){
#    retval <- -9
#    dose_indic <- as.numeric(this_dose_norm)
#    log10_indic <- as.numeric(this_log10_trans)
#    time_indic <- as.numeric(this_time_scale)
#    errmodel_indic <- substr(this_error_model, 1, 1)
#  
#    file_str <- paste0(today,
#                       "mypk_fit_",
#                       dose_indic,
#                       log10_indic,
#                       time_indic,
#                       errmodel_indic,
#                       ".Rds")
#  
#    cat("Fitting: ", file_str, "\n")
#  
#    tryCatch(
#      expr = {
#        this_pk <-   minimal_pk +
#          facet_data(vars(Chemical, Species)) +
#          settings_preprocess(keep_data_original = FALSE,
#                              suppress.messages = TRUE) +
#          scale_conc(dose_norm = this_dose_norm, log10_trans = this_log10_trans) +
#          settings_optimx(method = c(
#            "L-BFGS-B",
#            "bobyqa"
#            ))
#  
#        if(this_error_model %in% "pooled"){
#          this_pk <- this_pk +
#            stat_error_model(error_group = vars(Chemical, Species))
#        } else if(this_error_model %in% c("hierarchical", "joint")){
#          this_pk <- this_pk +
#            stat_error_model(error_group = vars(Chemical, Species, Reference))
#        } else{
#          stop("this_error_model must be either 'pooled' or 'hierarchical'/'joint'")
#        }
#  
#        if(this_time_scale %in% TRUE){
#          this_pk <- this_pk +
#            scale_time(new_units = "auto")
#        }
#  
#        #do the fit
#        this_time <- system.time(this_fit <- do_fit(this_pk, n_cores = n_cores))
#  
#  
#  
#        #save the result
#        saveRDS(this_fit,
#                file = paste0(file_dir,
#                       file_str))
#  
#        cli::cli_alert_success(text = "Fitting success!")
#        print(this_time)
#        rm(this_fit)
#  
#        retval <- this_time[[3]]
#      },
#      error = function(e){
#        cli::cli_alert_danger(text = paste("Analysis",
#                                           file_str,
#                                           "failed with error",
#                                           e))
#        retval <- -1
#      }
#    )
#  
#    return(retval)
#  }

## ----running_all_options, eval=FALSE------------------------------------------
#  
#  system.time(tmp_out <- mapply(fit_data,
#                                this_error_model = fitopts$error_model,
#                                this_dose_norm = fitopts$dose_norm,
#                                this_log10_trans = fitopts$log10_trans,
#                                this_time_scale = fitopts$time_scale,
#                                n_cores = 13))
#  
#  mean(tmp_out)/60 # Average runtime (in minutes) per fitting option
#  median(tmp_out)/60
#  sum(tmp_out) # Looks like there's 10 extra seconds of overhead for non-fitting methods

## ----Evaluation-helpers, eval=FALSE-------------------------------------------
#  # Evaluation
#  # Winning Model Tally
#  winmodel_comparison <- function(this_error_model,
#                               this_dose_norm,
#                               this_log10_trans,
#                               this_time_scale) {
#    dose_indic <- as.numeric(this_dose_norm)
#    log10_indic <- as.numeric(this_log10_trans)
#    time_indic <- as.numeric(this_time_scale)
#    errmodel_indic <- substr(this_error_model, 1, 1)
#  
#    file_str <- paste0(Sys.getenv("OD_DIR"),
#                       today,
#                       "mypk_fit_",
#                       dose_indic,
#                       log10_indic,
#                       time_indic,
#                       errmodel_indic,
#                       ".Rds")
#    pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
#  
#    current_rds <- readRDS(file = file_str)
#  
#    pf <- current_rds$fit %>%
#      distinct(Chemical, Species, model, method, convcode) %>%
#      group_by(method, .drop = FALSE) %>%
#      count(convcode) %>%
#      pivot_wider(names_from = convcode,
#                  names_prefix = "convcode.",
#                  values_from = n)
#  
#  
#  
#    # Wide winning model
#    suppressMessages({
#      winning_model <- get_winning_model(obj = current_rds)
#      wide_winning_model <- winning_model %>%
#        group_by(method, model) %>%
#        count() %>%
#        pivot_wider(names_from = model, values_from = n) %>%
#        left_join(pf)
#    })
#    return(wide_winning_model)
#  }
#  
#  # RMSLE for Cmax and AUC (with some tallys for extreme values)
#  rmsles_Cmax_AUC <- function(
#      this_error_model,
#      this_dose_norm,
#      this_log10_trans,
#      this_time_scale) {
#    dose_indic <- as.numeric(this_dose_norm)
#    log10_indic <- as.numeric(this_log10_trans)
#    time_indic <- as.numeric(this_time_scale)
#    errmodel_indic <- substr(this_error_model, 1, 1)
#  
#    file_str <- paste0(Sys.getenv("OD_DIR"),
#                       today,
#                       "mypk_fit_",
#                       dose_indic,
#                       log10_indic,
#                       time_indic,
#                       errmodel_indic,
#                       ".Rds")
#    pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
#  
#    message(paste0("Evaluation for: ", pk_name))
#    current_rds <- readRDS(file = file_str)
#    # Cmax and AUC_inf RMSEs, eval_tkstats already filters winning models
#    RMSLE_eval <- suppressMessages(eval_tkstats(obj = current_rds,
#                                               dose_norm = FALSE,
#                                               finite_only = TRUE) %>%
#                                    mutate(Options = pk_name) %>%
#                                    dplyr::select(
#                                      Options,
#                                      Chemical,
#                                      Species,
#                                      Route,
#                                      Media,
#                                      Reference,
#                                      method,
#                                      model,
#                                      starts_with("Cmax"),
#                                      starts_with("AUC_")
#                                    ))
#    message(paste0("How many finite, non-zero comparisons had ",
#                   "AUC_infinity.tkstats over/under 20X of AUC_infinity.nca?"))
#    print(
#      RMSLE_eval %>%
#        filter(
#          AUC_infinity.nca > 0,
#          AUC_infinity.tkstats > 0) %>%
#        group_by(Options, method) %>%
#        summarize(
#          `AUC_folderror>20X` = sum(
#            log10(AUC_infinity.tkstats/AUC_infinity.nca) > log10(20)
#          )
#        )
#    )
#    message("How many comparisons had AUC_infinity equal to zero?")
#    print(
#      RMSLE_eval %>%
#        filter(
#          AUC_infinity.nca > 0,
#          AUC_infinity.tkstats > 0) %>%
#        group_by(Options, method) %>%
#        summarize(NCA_zeroAUC = sum(AUC_infinity.nca == 0),
#                  tkstats_zeroAUC =sum(AUC_infinity.tkstats == 0))
#    )
#  
#    message("Removing those observations...")
#    RMSLE_eval <- RMSLE_eval %>%
#      filter(
#        AUC_infinity.nca > 0,
#        AUC_infinity.tkstats > 0,
#        log10(AUC_infinity.tkstats/AUC_infinity.nca) <= log10(20)
#      ) %>%
#      rowwise() %>%
#      mutate(
#        SLE_Cmax = (log10(Cmax.tkstats) - log10(Cmax.nca)) ^ 2,
#        SLE_AUC_inf = (log10(AUC_infinity.tkstats) - log10(AUC_infinity.nca)) ^ 2
#      ) %>%
#      filter(!is.na(SLE_Cmax), !is.na(SLE_AUC_inf)) %>%
#      group_by(Options, method) %>%
#      summarize(
#        RMSLE_Cmax = sqrt(mean(SLE_Cmax, na.rm = FALSE)) %>% signif(digits = 4),
#        RMSLE_AUC = sqrt(mean(SLE_AUC_inf, na.rm = FALSE)) %>% signif(digits = 4)
#      )
#  
#    print(RMSLE_eval)
#  
#    return(RMSLE_eval)
#  }
#  
#  # Goodness-of-fit table with R-squared, RMSLE and AIC
#  get_gof <- function(this_error_model,
#                               this_dose_norm,
#                               this_log10_trans,
#                               this_time_scale) {
#    dose_indic <- as.numeric(this_dose_norm)
#    log10_indic <- as.numeric(this_log10_trans)
#    time_indic <- as.numeric(this_time_scale)
#    errmodel_indic <- substr(this_error_model, 1, 1)
#  
#    file_str <- paste0(Sys.getenv("OD_DIR"),
#                       today,
#                       "mypk_fit_",
#                       dose_indic,
#                       log10_indic,
#                       time_indic,
#                       errmodel_indic,
#                       ".Rds")
#    pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
#  
#    current_rds <- readRDS(file = file_str)
#    winning_model <- suppressMessages(get_winning_model(obj = current_rds))
#    suppressMessages({
#      this_rsq <- rsq(current_rds,
#                      use_scale_conc = FALSE,
#                      rsq_group = ggplot2::vars(Chemical, Species)) %>%
#        semi_join(winning_model)
#      this_AIC <- AIC(current_rds) %>%
#        semi_join(winning_model)
#  
#      this_rmsle <- rmse.pk(current_rds,
#                        rmse_group = vars(Chemical, Species),
#                        use_scale_conc = list(dose_norm = FALSE,
#                                              log10_trans = TRUE)) %>%
#        semi_join(winning_model)
#    })
#  
#    # Since they are all joined by winmodel
#    # all three must have same NROW
#    message(
#      paste0("For ", pk_name, "... outputs below must have same number of rows")
#    )
#  
#    message(paste0("rsq: ", NROW(this_rsq)))
#    message(paste0("aic: ", NROW(this_AIC)))
#    message(paste0("rmsle: ", NROW(this_rmsle)))
#  
#    gof_df <- this_rsq %>%
#      inner_join(this_AIC, by = join_by(Chemical, Species, method, model)) %>%
#      inner_join(this_rmsle, by = join_by(Chemical, Species, method, model)) %>%
#      mutate(Options = pk_name, .before = Chemical)
#  
#  
#  
#    return(gof_df)
#  }
#  
#  
#  # All predictions
#  get_all_preds <- function(this_error_model,
#                               this_dose_norm,
#                               this_log10_trans,
#                               this_time_scale) {
#    dose_indic <- as.numeric(this_dose_norm)
#    log10_indic <- as.numeric(this_log10_trans)
#    time_indic <- as.numeric(this_time_scale)
#    errmodel_indic <- substr(this_error_model, 1, 1)
#  
#    file_str <- paste0(Sys.getenv("OD_DIR"),
#                       today,
#                       "mypk_fit_",
#                       dose_indic,
#                       log10_indic,
#                       time_indic,
#                       errmodel_indic,
#                       ".Rds")
#    pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
#  
#  
#    current_rds <- readRDS(file = file_str)
#  
#    # Wide winning model
#    suppressMessages({
#      winning_model <- get_winning_model(obj = current_rds) %>%
#        select(-c(near_flat, preds_below_loq))
#      this_preds <- predict(current_rds,
#                            use_scale_conc = FALSE) %>%
#        semi_join(winning_model) %>%
#        mutate(Options = pk_name, .before = Chemical)
#    })
#    return(this_preds)
#  }
#  
#  # Factor of two model error
#  tf_tests <- function(this_error_model,
#                               this_dose_norm,
#                               this_log10_trans,
#                               this_time_scale) {
#    dose_indic <- as.numeric(this_dose_norm)
#    log10_indic <- as.numeric(this_log10_trans)
#    time_indic <- as.numeric(this_time_scale)
#    errmodel_indic <- substr(this_error_model, 1, 1)
#  
#    file_str <- paste0(Sys.getenv("OD_DIR"),
#                       today,
#                       "mypk_fit_",
#                       dose_indic,
#                       log10_indic,
#                       time_indic,
#                       errmodel_indic,
#                       ".Rds")
#    pk_name <- paste0(dose_indic, log10_indic, time_indic, errmodel_indic)
#    current_rds <- readRDS(file = file_str)
#    print(paste0("Now analyzing: ", pk_name))
#  
#    out <- twofold_test(current_rds, suppress_messages = TRUE)
#  
#    out <- out$model_error_summary %>%
#      mutate(Options = pk_name)
#  }
#  

## ----do_evalutations, eval=FALSE----------------------------------------------
#  system.time(wm_comp <- fitopts %>%
#                dplyr::rowwise() %>%
#                dplyr::mutate(winmodel_comp = list(
#                  winmodel_comparison(this_error_model = error_model,
#                                   this_dose_norm = dose_norm,
#                                   this_log10_trans = log10_trans,
#                                   this_time_scale = time_scale))) %>%
#                tidyr::unnest(winmodel_comp))
#  
#  
#  system.time(cmax_auc_rmsle_tbl <- fitopts %>%
#                dplyr::rowwise() %>%
#                dplyr::mutate(nca_comp = list(
#                  rmsles_Cmax_AUC(this_error_model = error_model,
#                                   this_dose_norm = dose_norm,
#                                   this_log10_trans = log10_trans,
#                                   this_time_scale = time_scale))) %>%
#                tidyr::unnest(nca_comp))
#  
#  system.time(gof_tbl <- fitopts %>%
#                dplyr::rowwise() %>%
#                dplyr::mutate(gof_win = list(
#                  get_gof(this_error_model = error_model,
#                                   this_dose_norm = dose_norm,
#                                   this_log10_trans = log10_trans,
#                                   this_time_scale = time_scale))) %>%
#                tidyr::unnest(gof_win))
#  
#  
#  system.time(all_tf_tests <- fitopts %>%
#                dplyr::rowwise() %>%
#                dplyr::mutate(TF = list(
#                  tf_tests(error_model,
#                           this_dose_norm = dose_norm,
#                           this_log10_trans = log10_trans,
#                           this_time_scale = time_scale))) %>%
#                tidyr::unnest(TF))
#  
#  system.time(all_preds <- fitopts %>%
#                dplyr::rowwise() %>%
#                dplyr::mutate(these_preds = list(
#                  get_all_preds(this_error_model = error_model,
#                                   this_dose_norm = dose_norm,
#                                   this_log10_trans = log10_trans,
#                                   this_time_scale = time_scale))) %>%
#                tidyr::unnest(these_preds))
#  
#  
#  
#  gc()

## ----Evaluating choices, eval=FALSE-------------------------------------------
#  # Writing to file
#  
#  writexl::write_xlsx(x = list(
#    Fit_Counts = wm_comp,
#    AUC_and_Cmax_RMSE_summary = cmax_auc_rmsle_tbl,
#    Prediction_Evaluations = gof_tbl,
#    Twofold_Tests = all_tf_tests),
#    path = paste0(Sys.getenv("FIG_DIR"),
#                  "Manu_Files/",
#                  today,
#                  "_Supp_Table1",
#                  ".xlsx"))
#  gof_tbl <- readxl::read_xlsx(path = paste0(Sys.getenv("FIG_DIR"),
#                                                     "Manu_Files/",
#                                                     today,
#                                                     "_Supp_Table1.xlsx"),
#                                       sheet = "Prediction_Evaluations") %>%
#    mutate(RMSLE = as.numeric(RMSLE))
#  wm_comp <- readxl::read_xlsx(path = paste0(Sys.getenv("FIG_DIR"),
#                                                     "Manu_Files/",
#                                                     today,
#                                                     "_Supp_Table1.xlsx"),
#                                       sheet = "Fit_Counts")
#  cmax_auc_rmsle_tbl <- readxl::read_xlsx(path = paste0(Sys.getenv("FIG_DIR"),
#                                                     "Manu_Files/",
#                                                     today,
#                                                     "_Supp_Table1.xlsx"),
#                                       sheet = "AUC_and_Cmax_RMSE_summary")
#  gof_tbl %>%
#    group_by(Options, method) %>%
#    count(RMSLE < 1) %>%
#    filter(`RMSLE < 1`) %>%
#    arrange(desc(n)) %>%
#    print(n = 8)
#  # bobyqa-optimized options 010h, 110h, 110p
#  
#  
#  wm_comp %>% filter(method %in% "bobyqa",
#                     !time_scale, log10_trans)
#  # Off these, there is one more data group that is model_flat
#  # I will remove this so it doesn't affect downstream analyses
#  gof_tbl %>% filter(Options == "110p", model == "model_flat",
#                     method == "bobyqa") %>% distinct(Chemical, Species)
#  gof_tbl %>% filter(Options == "110h", model == "model_flat",
#                     method == "bobyqa") %>% distinct(Chemical, Species)
#  gof_tbl %>% filter(Options == "010h", model == "model_flat",
#                     method == "bobyqa") %>% distinct(Chemical, Species)
#  gof_tbl %>% filter(Options == "010p", model == "model_flat",
#                     method == "bobyqa") %>% distinct(Chemical, Species)
#  
#  
#  gof_tbl %>% filter(Options == "010h",
#                     method == "bobyqa",
#                     Chemical %in% "DTXSID1021116") %>%
#    distinct(Chemical, Species)
#  # Filter out DTXSID1021116
#  
#  gof_tbl %>%
#    anti_join(data.frame(Chemical = "DTXSID1021116", Species = "rat")) %>%
#    filter(!(model %in% "model_flat")) %>%
#    group_by(Options, method) %>%
#    count(RMSLE < 0.7) %>%
#    filter(`RMSLE < 0.7`) %>%
#    arrange(desc(n)) %>%
#    print(n = 8)
#  # 110p seems to win out here.
#  
#  gof_tbl %>%
#    filter(method %in% "bobyqa",
#           Options %in% c("110p", "010h", "110h")
#           ) %>%
#    anti_join(data.frame(Chemical = "DTXSID1021116", Species = "rat")) %>%
#    group_by(Options) %>%
#    summarize(medianR2 = median(Rsq, na.rm = TRUE))
#  
#  gof_tbl %>%
#    filter(method %in% "bobyqa",
#           Options %in% c("110p", "010h", "110h")) %>%
#    anti_join(data.frame(Chemical = "DTXSID1021116", Species = "rat")) %>%
#    group_by(Options) %>%
#    summarize(medianR2 = mean(Rsq, na.rm = TRUE))
#  # Here we see that the highest median Rsq is for 110p
#  # I use median because the distribution is skewed towards 1.
#  gof_tbl %>%
#    filter(method %in% "bobyqa",
#           Options %in% c("110p", "010h")) %>%
#    anti_join(data.frame(Chemical = "DTXSID1021116", Species = "rat")) %>%
#    distinct(Chemical, Species, Options, Rsq) %>%
#    pivot_wider(names_from = Options, values_from = Rsq) %>%
#    ggplot(aes(x = `110p`, y = `010h`)) + geom_point()
#  #110h is most even between models
#  
#  
#  cmax_auc_rmsle_tbl %>%
#    filter(Options %in% c("010h", "110p", "110h"),
#           method %in% "bobyqa")
#  # Here it is clear that 110p results in the
#  
#  all_tf_tests %>%
#    filter(Options %in% c("110p", "110h", "010h"),
#           model %in% "All non-flat",
#           method %in% "bobyqa") %>%
#    select(Options, model, starts_with("percent_"))
#  # Relatively similar here
#  
#  
#  cmaxAUCpl <- cmax_auc_rmsle_tbl %>%
#    filter(RMSLE_AUC < 0.8, RMSLE_Cmax < 0.5) %>%
#    ggplot(aes(x = RMSLE_AUC, y = RMSLE_Cmax,
#               shape = method, fill = Options)) +
#    scale_shape_manual(values = c(21,22))+
#    scale_color_brewer(palette = "Paired")+
#    geom_point(color = "black", size = 2, stroke = 0.5)  +
#    guides(fill = guide_legend(override.aes = list(shape = 21))) +
#    # coord_equal(xlim = c(0.3, 0.45), ylim = c(0.3, 0.4), expand = TRUE) +
#    labs(x = "AUC RMSLE", y = "Cmax RMSLE") +
#    theme_bw() +
#    theme(panel.border = element_rect(fill = NA,
#                                      color = "black",
#                                      linewidth = 2))
#  cmaxAUCpl
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "CmaxAUC_RMSLE_plotFinalists.png"),
#         bg = "white",
#         height = 3.25,
#         width = 3.5,
#         units = "in",
#         device = "png",
#         dpi = 300)
#  
#  
#  # Plot Rsq
#  violin_rmsle <- gof_tbl %>%
#    filter(!(model %in% "model_flat"),
#           Options %in% c("010h", "010p",
#                          "111h", "111p",
#                          "110h", "110p"),
#           RMSLE <= 10) %>%
#    ggplot() +
#    geom_violin(
#      aes(x = "",
#          y = RMSLE,
#          color = interaction(error_model, method)),
#      draw_quantiles = c(0.25,0.5,0.75)) +
#    facet_grid(cols = vars(Options)) +
#    scale_color_brewer(palette = "Paired") +
#    labs(title = "RMSLE",
#         color = "Error Model.Optimizer",
#         subtitle = "(values 10 or under)",
#         x = "") +
#    coord_cartesian(ylim = c(0, 10)) +
#    theme_bw() +
#    theme(legend.position = "none",
#          plot.title = element_text(hjust = 0.5),
#          plot.subtitle = element_text(hjust = 0.5),
#          axis.title.x = element_blank(),
#          axis.ticks.x = element_blank(),
#          axis.text.x = element_blank()
#          )
#  
#  violin_rsq <- gof_tbl %>%
#    filter(!(model %in% "model_flat"),
#           Options %in% c("010h", "010p",
#                          "111h", "111p",
#                          "110h", "110p")) %>%
#    ggplot() +
#    geom_violin(aes(x = "",
#                    y = Rsq,
#                    color = interaction(error_model, method)),
#                draw_quantiles = c(0.25,0.5,0.75)) +
#    facet_grid(cols = vars(Options)) +
#    scale_color_brewer(palette = "Paired") +
#    labs(title = "R-squared",
#         color = "Error Model.Optimizer",
#         x = "") +
#    coord_cartesian(ylim = c(0, 1)) +
#    theme_bw() +
#    guides(color = guide_legend(nrow = 2, byrow = TRUE)) +
#    theme(legend.position = "bottom",
#          plot.title = element_text(hjust = 0.5),
#          axis.title.x = element_blank(),
#          axis.ticks.x = element_blank(),
#          axis.text.x = element_blank()
#          )
#  
#  cowplot::plot_grid(plotlist = list(violin_rmsle, violin_rsq),
#                     nrow = 2, align = "v")
#  ggsave(
#    filename = paste0(
#      Sys.getenv("FIG_DIR"),
#      today,
#      "_RMSLE_Rsquared_ViolinPlots_selectOptions.png"
#      ),
#    height = 5,
#    width = 6,
#    units = "in"
#    )
#  
#  gof_tbl %>%
#    filter(!(model %in% "model_flat"),
#           Options %in% c("010h", "010p",
#                          "111h", "111p",
#                          "110h", "110p")) %>%
#    ggplot() +
#    geom_point(aes(x = Rsq, y = RMSLE,
#                   color = interaction(method, model)),
#               shape = 1) +
#    facet_wrap(~Options) +
#    scale_color_brewer(palette = "Paired") +
#    theme_bw() +
#    coord_cartesian(
#      xlim = c(0, 1),
#      expand = TRUE
#      ) +
#    theme(legend.position = "bottom",
#          plot.title = element_text(hjust = 0.5))
#  
#  ggsave(
#    filename = paste0(
#      Sys.getenv("FIG_DIR"),
#      today,
#      "_RMSLE_Rsquared_ScatterPlot.png"
#      ),
#    height = 7,
#    width = 10,
#    units = "in"
#    )
#  
#  rmsle_hist <- gof_tbl %>% filter(model != "model_flat",
#                     Options %in% c("100p", "010h", "110p", "110h"),
#                     method == "bobyqa") %>%
#    ggplot(aes(x = RMSLE)) +
#    geom_histogram(fill = "grey5",
#                   binwidth = 0.1) +
#    scale_x_log10(label = scales::label_math()) +
#    geom_vline(xintercept = 1, color = "red3", linetype = "dotted") +
#    facet_grid(rows = vars(Options),
#               cols = vars(method)) +
#    theme_bw()
#  
#  rmsle_hist
#  
#  ggsave(
#    filename = paste0(
#      Sys.getenv("FIG_DIR"),
#      today,
#      "_RMSLE_histogram_topChoices.png"
#      ),
#    height = 4,
#    width = 4,
#    units = "in"
#    )
#  
#  cowplot::plot_grid(
#    plotlist = list(rmsle_hist,
#                    cmaxAUCpl +
#                      guides(fill = guide_legend(ncol = 2,
#                                                 override.aes = list(shape = 21)),
#                             shape = guide_legend(nrow = 2)) +
#                      theme(legend.position = "bottom",
#                            legend.title.position = "top")
#    ),
#    rel_widths = c(1, 1.75),
#    labels = "AUTO")
#  ggsave(
#    filename = paste0(
#      Sys.getenv("FIG_DIR"),
#      today,
#      "_RMSLE_histogram_CmaxAUC.png"
#      ),
#    height = 4.5,
#    width = 6,
#    units = "in"
#    )
#  
#  
#  all_preds_prep <- all_preds %>%
#    filter(Conc_est >= LOQ,
#           Options %in% c("110p", "110h", "010h"),
#           method == "bobyqa") %>%
#    mutate(ID = as.factor(paste(Chemical, Species,
#                                method)),
#           .keep = "unused") %>%
#    select(ID, Options, dose_norm, log10_trans, time_scale,
#           Route, Media, Dose, Conc, Conc_est, Time, Reference)
#  
#  split_preds <- split(all_preds_prep, all_preds_prep$ID)
#  
#  sp_plots <- lapply(names(split_preds), function(i) {
#    ggplot(data = split_preds[[i]],
#           mapping = aes(x = Conc, y = Conc_est,
#                         color = Options, shape = Reference)) +
#      geom_point(size = 2) +
#      geom_abline(slope = 1) +
#      facet_grid(dose_norm~Route+Media+Dose,
#                 scales = "free_y",
#                 labeller = labeller(dose_norm = label_both)) +
#      scale_y_log10() + scale_x_log10() +
#      scale_color_brewer(palette = "Accent") +
#      theme_bw() +
#      labs(title = i) +
#      theme(legend.position = "bottom",
#            plot.title = element_text(hjust = 0.5, face = "bold"))
#  }
#  )
#  
#  # Very BIG FILE
#  pdf(file = paste0(
#    Sys.getenv("FIG_DIR"),
#    today,
#    "log10_GOFplots_select.pdf"),
#    onefile = TRUE, height = 8, width = 8)
#  for (x in seq_along(sp_plots)) {
#    print(sp_plots[[x]])
#  }
#  dev.off()

## ----load_multirefs, eval=FALSE-----------------------------------------------
#  # Reference Multiples
#  multiRef_ChemSpec <- all_my_data %>%
#    ungroup() %>%
#    distinct(Chemical, Species, Reference) %>%
#    group_by(Chemical, Species) %>%
#    count() %>%
#    arrange(desc(n)) %>%
#    filter(n > 1) %>%
#    dplyr::select(!n)
#  multiRef_ChemSpec %>% nrow() # Surprisingly only 15 Chem + Species have multiple references
#  multiRef_ChemSpec

## ----NCA_error_eval, eval=FALSE-----------------------------------------------
#  
#  # Which Chemical Species Route Media Dose groups have the randomness?
#  my_pk$data %>%
#    group_by(Chemical, Species, Reference, Route, Media, Dose, Time) %>%
#    summarize(Count = n()) %>%
#    group_by(Chemical, Species, Reference, Route, Media, Dose) %>%
#    filter(any(Count == 1) && any(Count > 1)) %>%
#    distinct(Chemical, Species, Reference, Route, Media, Dose) -> problem_set
#  
#  # Evaluate 100 times
#  n_evals <- 100
#  evalNCA_df <- NULL
#  my_pk_new <- my_pk +
#    settings_preprocess(suppress.message = TRUE)
#  
#  for (i in 1:100) {
#    seed_num <- 100 + i
#    set.seed(seed_num)
#    message(paste("Current seed:", seed_num))
#    my_pk_new <- do_preprocess(my_pk_new)
#    my_pk_new <- do_data_info(my_pk_new)
#  
#    this_nca <- my_pk_new$data_info$nca %>%
#      select(-param_sd_z) %>%
#      mutate(seed = as.factor(seed_num))
#    evalNCA_df <- bind_rows(evalNCA_df, this_nca)
#  }
#  
#  names(evalNCA_df)
#  
#  evalNCA_df %>%
#    group_by(Chemical, Species, Route, Media, Dose, param_name) %>%
#    summarize(Average = mean(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
#              Median = median(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
#              StDev = sd(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
#              Min = min(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
#              Max = max(param_value[which(!is.infinite(param_value))], na.rm = TRUE),
#              Unique_Values = length(unique(signif(param_value, 4))),
#              Inf_Count = sum(is.infinite(param_value))) %>%
#    filter(
#      # Route %in% "iv",
#      param_name %in% "Cmax"
#      ) %>%
#    View()
#  
#  
#  evalNCA_df %>%
#    filter(param_name %in% c("tmax"),
#           Route %in% "iv",
#           is.finite(param_value),
#           Species %in% c("rat", "human", "hamster"))  %>%
#    ggplot(aes(x = param_value, y = Chemical)) +
#    geom_point() +
#    facet_grid(rows = vars(Route)) +
#    theme_bw() +
#    theme(axis.text.y = element_text(size = 5),
#          axis.ticks.y = element_blank(),
#          panel.grid.major = element_line(color = "grey70",
#                                            linewidth = 0.4,
#                                            linetype = "solid"),
#          panel.grid.major.y = element_blank(),
#          panel.grid.minor = element_blank())
#  
#  
#  
#  
#  set.seed(2023)

## ----Final_Options, eval=FALSE------------------------------------------------
#  my_pk <- minimal_pk +
#    facet_data(vars(Chemical, Species)) +
#    settings_preprocess(keep_data_original = TRUE,
#                        suppress.messages = FALSE) +
#    settings_optimx(method = c("bobyqa")) +
#    scale_conc(dose_norm = TRUE, log10_trans = TRUE) +
#    scale_time(new_units = "auto") +
#    stat_error_model(error_group = vars(Chemical, Species))
#  
#  my_pk <- do_preprocess(my_pk)
#  cvt_DTXSIDs <- unique(my_pk$data$Chemical)
#  length(cvt_DTXSIDs)
#  
#  my_pk <- do_prefit(my_pk)
#  
#  
#  system.time(my_pk <- do_fit(my_pk,
#                              n_cores = 14))
#  
#  # saveRDS(my_pk, "data-raw/all_cvt_pk.rds")

## ----data_aggregation, eval=FALSE---------------------------------------------
#  # Need the following data for any of the subsequent plots
#  
#  # after fitting pk object for all cvt objects or reading the fitted pk object in `setup`
#  winmodel <- get_winning_model(my_pk)
#  my_preds <- semi_join(predict(my_pk, use_scale_conc = FALSE), winmodel)
#  my_residuals <- residuals(my_pk, use_scale_conc = FALSE) %>%
#    semi_join(winmodel)
#  my_tkstats <- eval_tkstats(my_pk) %>% semi_join(winmodel)
#  my_nca <- get_nca(my_pk)
#  all_my_data <- get_data(my_pk)
#  
#  formatted_coefs <- my_pk$fit %>%
#    filter(str_detect(param_name, "sigma_", negate = TRUE)) %>%
#    select(Chemical, Species, model, model, param_name, estimate,
#           param_name, estimate, convcode) %>%
#    mutate(param_name = paste(param_name, "tkstats", sep = ".")) |>
#    pivot_wider(names_from = param_name,
#                                       values_from = estimate)
#  
#  my_tf <- twofold_test(my_pk)
#  
#  NROW(all_my_data) > NROW(my_preds) # THis must be true... or else try distinct(my_preds)
#  
#  # Writing file to xlsx
#  writexl::write_xlsx(x = list(predictions = my_preds,
#                               tkstats = my_tkstats),
#                      path = paste0(Sys.getenv("FIG_DIR"),
#                                    "Manu_Files/",
#                                    today,
#                                    "_Supp_Table2",
#                                    ".xlsx"))
#  
#  fe_df <- fold_error(my_pk) %>% semi_join(winmodel)
#  # May need to change the use_scale_conc defaults
#  r2_df <- rsq(my_pk, use_scale_conc = FALSE) %>%
#    semi_join(winmodel)
#  myAIC <- AIC(my_pk) %>%
#    semi_join(winmodel)
#  myRMSLE <- rmse(my_pk,
#                 use_scale_conc = list(dose_norm = FALSE, log10_trans = TRUE)) %>%
#    semi_join(winmodel)
#  
#  myRMSE <- rmse(my_pk,
#                 rmse_group = ggplot2::vars(Chemical, Species, Route, Media, Dose)) %>%
#    semi_join(winmodel)
#  
#  chem_name_translate <- all_my_data %>%
#    dplyr::select(Chemical, Chemical_Name) %>%
#    dplyr::filter(!(Chemical_Name %in% c("2,4-d",
#                                       "methylene chloride",
#                                       "benzo[a]pyrene"))) %>%
#    dplyr::distinct()
#  

## ----fig3A, eval=FALSE--------------------------------------------------------
#  my_tf$indiv_data_test %>% select(Route, starts_with("percent_"))
#  
#  
#  pl3A <- my_tf$individual_data %>%
#    # Plotting
#    ggplot(aes(x = foldConc)) +
#    geom_histogram(binwidth = 0.2,
#                   fill = "grey5",
#                   color = "grey5",
#                   linewidth = 0.4) +
#    coord_cartesian(xlim =  c(0, 4),
#                    ylim = c(0, 4000)) +
#    scale_y_continuous(expand = c(0, 0),
#                       breaks = c(0, 1000, 2000, 3000),
#                       labels = c("0", "1", "2", "3")) +
#    expand_limits(y = 0) +
#    geom_vline(xintercept = c(0.5, 2), color = "red3", linetype = "dashed") +
#    theme_bw() +
#    labs(x = "Mean-normalized\nconcentration",
#                  y = "Observations\n(thousands)") +
#    theme(text = element_text(size = 10),
#          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
#          panel.background = element_blank(),
#          panel.grid.major.y = element_line(color = "grey90", linewidth = 0.4),
#          axis.title = element_text(face = "bold"),
#          axis.line = element_blank(),
#          plot.background = element_blank(),
#          axis.ticks.y = element_blank())
#  
#  pl3A
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "NormalizedConcentrations",".png"),
#         plot = pl3A,
#         bg = "white",
#         height = 2.2,
#         width = 2.5,
#         units = "in",
#         device = "png",
#         dpi = 300)
#  
#  # Not included in manuscript
#  my_tf$data_descriptors %>%
#    select(Chemical, Species, Reference, Route, Media, Time, data_descr) %>%
#    distinct() %>%
#    ggplot(aes(fill = data_descr, x = "")) +
#    geom_bar(position = "stack",
#             color = "black") +
#    labs(x = "", y = "Occurence by Chemical + Species group") +
#    scale_y_continuous(expand = c(0, NA)) +
#    theme_minimal()
#  

## ----fig4A-2-alt, eval=FALSE--------------------------------------------------
#  # With the mapped column names
#  my_tf$summarized_data_test
#  
#  pl3A_alt <- my_tf$summarized_data_test %>%
#    filter(Route == "All") %>%
#    select(Route, within, outside) %>%
#    pivot_longer(cols = c(within, outside),
#                 names_to = "status",
#                 values_to = "value") %>%
#    ggplot(aes(x = status, fill = status, y = value)) +
#    geom_col(position = position_stack(),
#             color = "black", linewidth = 1) +
#    coord_cartesian(ylim = c(0, 2000)) +
#    scale_y_continuous(expand = c(0, NA),
#                       breaks = c(0, 1000, 2000),
#                       labels = c("0", "1", "2")) +
#    scale_fill_manual(values = c("black", "white")) +
#    theme_classic() +
#    labs(x = "95% of observations\nwithin factor of 2",
#                  y = "Observations\n(thousands)") +
#    theme(text = element_text(size = 10),
#          panel.border = element_rect(color = "black", linewidth = 1, fill = NA),
#          panel.background = element_blank(),
#          panel.grid.major.y = element_line(color = "grey90", linewidth = 0.4),
#          axis.title = element_text(face = "bold"),
#          axis.line = element_blank(),
#          plot.background = element_blank(),
#          axis.ticks.y = element_blank(),
#          legend.position = "none")
#  
#  pl3A_alt # Not included in final manuscript
#  

## ----fig4B-cowplot, eval=FALSE------------------------------------------------
#  pl3B <- my_tf$individual_data %>%
#    inner_join(my_tkstats %>%
#                dplyr::select(Chemical, Species,
#                              Route, Media,
#                              tmax = tmax.nca) %>%
#                filter(!is.na(tmax))) %>%
#    group_by(Chemical,
#             Species,
#             Reference,
#             Route,
#             Media,
#             Dose) %>%
#    mutate(
#           normTime = ifelse(
#             Route == "iv",
#             1 + Time/max(Time),
#             ifelse(
#               Time > tmax,
#               ((Time - tmax)/max(Time)) + 1,
#               0.5 + Time/(2*tmax)
#             )
#           )) %>%
#    ggplot(aes(
#      x = normTime,
#      y = foldConc
#    )) +
#    geom_point(alpha = 0.1, size = 0.7) +
#    geom_vline(xintercept = 1, color = "blue",
#               linewidth = 0.8, linetype = "dashed") +
#    geom_vline(xintercept = 2, color = "green4",
#               linewidth = 0.8, linetype = "dashed") +
#    geom_hline(yintercept = c(0.5, 2), color = "red3",
#               linewidth = 0.8, linetype = "dashed") +
#    facet_grid(cols = vars(Route),
#               scales = "free_x") +
#    labs(x = "ADME-Normalized Time",
#         y = "Mean-Normalized\nConcentration") +
#    theme(text = element_text(size = 10),
#          panel.border = element_rect(color = "black", fill = NA, size = 1),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          strip.background = element_rect(fill = "white"),
#          strip.text = element_text(face = "bold"),
#          axis.ticks = element_blank(),
#          axis.line = element_blank(),
#          axis.text.x = element_blank(),
#          axis.title = element_text(face = "bold"),
#          panel.spacing = unit(0.125, units = "in"),
#          plot.background = element_blank(),
#          legend.position = "none")
#  
#  pl3B
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "ADME_NormTime-NormConcs_onlyDetects.png"),
#         height = 3,
#         bg = "white",
#         width = 5,
#         units = "in",
#         dpi = 300)
#  
#  
#  # Make the Combination Plot----
#  
#  plot_grid(pl3A, pl3B, labels = c("A", "B"),
#                       align = "h", axis = "bt", rel_widths = c(1, 1.7))
#  
#  
#  figure3 <- plot_grid(pl3A, pl3B, labels = c("A", "B"),
#                       align = "h", axis = "bt", rel_widths = c(1, 1.7))
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "Figure3_onlyDetects.png"),
#         plot = figure3,
#         bg = "white",
#         height = 3,
#         width = 6.5,
#         units = "in",
#         dpi = 300)
#  

## ----sFig2-cvt_analysis, eval=FALSE-------------------------------------------
#  data_range <- all_my_data %>%
#    filter(Detect, !exclude) %>%
#    group_by(Chemical, Species, Reference, Media, Route, Dose, Dose.Units) %>%
#    summarize(maxT = max(Time)/24,
#           concRange = log10(max(Conc)/min(Conc))) %>%
#    ungroup()
#  
#  
#  sf_2A <- data_range %>%
#    ggplot(aes(x = maxT)) +
#    geom_histogram(fill = "grey5",
#                   bins = 40,
#                   color = "grey5") +
#    scale_x_log10(labels = scales::number,
#                  breaks = c(0.1, 1, 7, 30, 365)) +
#    # scale_y_continuous(expand = c(0, 0),
#    #                    limits = c(0, 60),
#    #                    breaks = c(0, 10, 20, 30, 40, 50)) +
#    labs(x = "Day of final Timepoint",
#         y = "Count") +
#    theme(aspect.ratio = 1,
#          text = element_text(size = 10),
#          panel.border = element_rect(color = "black", fill = NA, size = 1),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          axis.ticks = element_blank(),
#          axis.line = element_blank())
#  
#  sf_2A
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "SFig2A_TimePoints.png"),
#         plot = sf_2C,
#         height = 4,
#         width = 4,
#         units = "in",
#         dpi = 300)
#  
#  
#  sf_2B <- data_range %>%
#    ggplot(aes(x = concRange)) +
#    geom_histogram(fill = "grey5",
#                   bins = 40,
#                   color = "grey5") +
#    scale_y_continuous(expand = c(0, 0),
#                       limits = c(0, 50),
#                       breaks = c(0, 25, 50)) +
#    labs(x = "log10(Concentration Range)",
#         y = "Count") +
#    theme(aspect.ratio = 1,
#          panel.border = element_rect(color = "black", fill = NA, size = 1),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          strip.background = element_rect(fill = "white"),
#          strip.text = element_text(face = "bold"),
#          axis.ticks = element_blank(),
#          axis.line = element_blank(),
#          axis.text = element_text(size = 10),
#          axis.title = element_text(size = 11))
#  
#  sf_2B
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "SFig2B_ConcRange.png"),
#         plot = sf_2B,
#         height = 4,
#         width = 4,
#         units = "in",
#         dpi = 300)
#  
#  sf_2AB <- plot_grid(sf_2A, sf_2B,
#                      labels = c("A", "B"),
#                      nrow = 1)
#  
#  
#  sf_2AB
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "SFig2_ConcTimeRanges.png"),
#         height = 3.2,
#         width = 6,
#         bg = "white",
#         units = "in",
#         dpi = 300)
#  

## ----sFig4-model_performance, eval=FALSE--------------------------------------
#  my_tf$model_error_summary %>%
#    select(model, method, starts_with("percent_"))
#  
#  my_tf$model_error_all %>%
#    filter(model %in% c("model_1comp", "model_2comp")) %>%
#    ggplot(aes(x = Fold_Error)) +
#    geom_histogram(
#                   fill = "grey5") +
#    scale_x_log10(limits = c(1E-3, 1E3), labels = scales::label_math(format = log10)) +
#    expand_limits(y = 0) +
#    geom_vline(xintercept = c(0.5, 2), color = "red3", linetype = "dashed") +
#    facet_grid(cols = vars(model),
#               rows = vars(Route)) +
#    labs(x = "Predicted/Observed",
#         y = "Number of Observations") +
#    theme_classic() +
#    theme(aspect.ratio = 1,
#          panel.border = element_rect(color = "black", fill = NA),
#          panel.grid.major = element_line(color = "grey95", linewidth = 0.4))
#  
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "PredictedObserved_compartmentModels.png"),
#         height = 5, width = 5)
#  
#  

## ----fig4-modelPerform-v-dataVar, eval=FALSE----------------------------------
#  my_tf$indiv_data_test_fold_errors %>%
#    select(model, starts_with("percent_")) %>%
#    glimpse()
#  
#  pl_4A <- my_tf$indiv_data_fold_errors %>%
#    # For Plotting
#    ggplot(aes(
#      y = Fold_Error,
#      x = foldConc
#    )) +
#    geom_bin2d(bins = 40, color = NA) +
#    geom_hline(yintercept = c(0.5, 2), linetype = "dashed") +
#    geom_vline(xintercept = c(0.5, 2), linetype = "dashed") +
#    facet_grid(cols = vars(model)) +
#    scale_x_log10(labels = scales::label_math(format = log10),
#                  limits = c(0.0001, 1000)) +
#    scale_y_log10(labels = scales::label_math(format = log10),
#                  limits = c(0.0001, 10000)) +
#    scale_fill_viridis_c(option = "cividis",
#                         limits = c(1, 100),
#                         oob = scales::oob_squish) +
#    labs(y = "Model Error",
#         x = "Data Variability",
#         fill = "Count") +
#    theme(aspect.ratio = 1,
#          panel.border = element_rect(color = "black", fill = NA, size = 1.5),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          strip.background = element_rect(fill = "white"),
#          strip.text = element_text(face = "bold"),
#          axis.ticks = element_blank(),
#          axis.line = element_blank())
#  
#  pl_4A
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "ModelPerformance_vDataVariability.png"),
#         height = 2,
#         width = 4.2,
#         device = "png",
#         dpi = 300,
#         units = "in")
#  
#  my_table <- my_tf$indiv_data_test_fold_errors %>%
#    ungroup() %>%
#    select(starts_with("percent_")) %>% t() %>%
#    round(digits = 2) %>%
#    as.data.frame() %>%
#    rownames_to_column(var = "percentages") %>%
#    mutate(percentages = case_when(
#      percentages == "percent_both_within" ~ "Both within a factor of 2",
#      percentages == "percent_both_outside" ~ "Both outside a factor of 2",
#      percentages == "percent_model_outside" ~ "Model too variable",
#      percentages == "percent_data_outside" ~ "Data too variable"
#    ))
#  
#  
#  
#  names(my_table) <- c(" ","1-compartment (%)", "2-compartment (%)", "Overall (%)")
#  
#  library(flextable)
#  library(officer)
#  
#  flextable(my_table)%>%
#    border_inner() %>%
#    fontsize(part = "all", size = 11) %>%
#    bold(part = "all", j = 4) %>% autofit()
#  
#  table_plot <- gen_grob(flextable(my_table) %>%
#                           border_inner() %>%
#                           fontsize(part = "all", size = 10) %>%
#                           bold(part = "all", j = 4) %>%
#                           autofit())
#  
#  
#  dual_plot <- plot_grid(pl_4A, table_plot, ncol = 1, align = "v",
#            rel_heights = c(1, 0.5))
#  
#  dual_plot
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "ModelPerformance_vDataVariability_wTable.png"),
#         plot = dual_plot,
#         height = 4.5,
#         width = 6,
#         device = "png",
#         bg = "white",
#         dpi = 300,
#         units = "in")

## ----fig5Alt, eval=FALSE------------------------------------------------------
#  # Ultimately this will be removed from manuscript
#  my_tf$summarized_data_model_error %>%
#    select(model, starts_with("percent_"))

## ----fig5-goodness-of-fit, eval=FALSE-----------------------------------------
#  combined_gof_df <- my_tf$model_error_all %>%
#    group_by(Chemical, Species, model, method) %>%
#    summarize(within_2fold = sum(between(Fold_Error, 0.5, 2))/n()) %>%
#    left_join(r2_df) %>%
#    left_join(myAIC) %>%
#    left_join(myRMSLE) %>%
#    semi_join(winmodel)
#  
#  
#  mypl <- ggplot(data = combined_gof_df,
#                 aes(
#                   x = within_2fold,
#                   y = Rsq,
#                   color = model
#                 )) +
#    geom_point() +
#    geom_abline(slope = 1, color = "red4", linetype = "longdash") +
#    scale_color_manual(values = c("#0398FC", "#D68E09", "black")) +
#      coord_cartesian(xlim = c(0,1), ylim = c(0,1)) +
#    labs(x = "Fraction of predictions within 2-fold",
#         y = "R-squared value") +
#    theme(aspect.ratio = 1,
#          panel.border = element_rect(color = "black", fill = NA, size = 1),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          axis.ticks = element_blank(),
#          axis.line = element_blank(),
#          legend.position = "none",
#          legend.title = element_blank(),
#          legend.key = element_blank())
#  
#  panelA_plot <- ggExtra::ggMarginal(mypl, groupFill = TRUE,
#                      type = "histogram",
#                      xparams = list(binwidth = 0.1),
#                      yparams = list(binwidth = 0.1))
#  panelA_plot
#  ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                           today,
#                           "R2_within2fold_comparison.png"),
#         plot = ggExtra::ggMarginal(mypl, groupFill = TRUE,
#                      type = "histogram",
#                      xparams = list(binwidth = 0.05),
#                      yparams = list(binwidth = 0.05)),
#         height = 5,
#         width = 5,
#         units = "in")
#  
#  panelB_plot <- ggplot(data = myRMSE,
#                 aes(
#                   x = log10(RMSE),
#                   color = model,
#                   fill = model
#                 )) +
#    geom_histogram(bins = 50) +
#    labs(y = "Count") +
#    scale_color_manual(values = c("#0398FC", "#D68E09", "grey10")) +
#    scale_fill_manual(values = c("#0398FC", "#D68E09", "grey10")) +
#    facet_grid(rows = vars(model),
#               cols = vars(Route)) +
#    theme(text = element_text(size = 10),
#          aspect.ratio = 0.6,
#      panel.border = element_rect(color = "black", fill = NA, size = 1),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          axis.ticks = element_blank(),
#          axis.line = element_blank(),
#          strip.background = element_blank(),
#      panel.spacing.y = unit(0.125, units = "in"),
#          legend.position = "bottom",
#          legend.title = element_blank())
#  panelB_plot
#  
#  ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                           today,
#                           "RMSEs_winningmodel_RouteFacet.png"),
#         height = 4,
#         width = 6,
#         units = "in")
#  
#  legend <- get_legend(panelB_plot)
#  
#  plot_grid(plot_grid(panelA_plot,
#                     panelB_plot + theme(legend.position = "none"), labels = c("A", "B"),
#                     axis = "tb", align = "h", rel_heights = c(1,1)), legend,
#            ncol = 1,
#            rel_heights = c(1, 0.1))
#  ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                           today,
#                           "CombinedPlot_Rsquared_RMSE.png"),
#         height = 4,
#         width = 6.5,
#         bg = "white",
#         units = "in")

## ----goodness-of-fit_exampleFits, eval = FALSE--------------------------------
#  pl <- plot(my_pk, best_fit = TRUE,
#             use_scale_conc = TRUE,
#             drop_nonDetect = FALSE)
#  
#  cvt %>% filter(curation_set_tag == "CVT_Showa") %>%
#    pull(analyte_dtxsid) -> Showa_chems
#  
#  combined_gof_df %>% filter(!(Chemical %in% Showa_chems)) %>% View()
#  
#  ex_fits <- pl %>%
#    filter(Chemical %in% c("DTXSID2020139",
#                           "DTXSID4023917",
#                           "DTXSID3061635",
#                           "DTXSID1034187"),
#           Species %in% "rat") %>%
#    pull(final_plot)
#  
#  
#  cowplot::plot_grid(plotlist = list(ex_fits[[2]] +
#                                       scale_color_manual(values = c("#a6bddb",
#                                                                     "#74a9cf",
#                                                                     "#41bbc4",
#                                                                     "#2b8cbe",
#                                                                     "#045a8d")) +
#                                       theme(legend.position = "none") +
#                                       xlim(0,30),
#                                     ex_fits[[3]] +
#                                       scale_color_manual(values = c("black",
#                                                                     "grey40")) +
#                                       theme(legend.position = "none"),
#                                     ex_fits[[1]] +
#                                       scale_color_manual(values = c("magenta3")) +
#                                       theme(legend.position = "none"),
#                                     ex_fits[[4]] +
#                                       scale_color_manual(values = c("#006837")) +
#                                       theme(legend.position = "none")),
#                     scale = 0.85)
#  
#  ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                           today,
#                           "exampleFits_Rsq_within2fold.png"),
#         height = 12,
#         width = 12)
#  

## ----sFig7-sigmaDistribution, eval=FALSE--------------------------------------
#  
#  my_pk$fit %>%
#    select(Chemical, Species, model, method, param_name, estimate) %>%
#    filter(str_detect(param_name, "sigma_")) %>%
#    semi_join(winmodel) %>%
#    left_join(my_pk$prefit$stat_error_model$sigma_DF) %>%
#    inner_join(get_data_summary(my_pk) %>%
#                 distinct(Chemical, Species, n_obs)) -> my_coefs
#  
#  
#  my_coefs %>%
#    mutate(model =
#             case_when(
#               model == "model_1comp" ~ "1-compartment",
#               model == "model_2comp" ~ "2-compartment",
#               model == "model_flat"  ~ "Null"
#             )) %>%
#    ggplot() +
#    geom_histogram(aes(x = estimate/n_obs),
#                   bins = 15,
#                   color = NA, fill = "grey5") +
#    # geom_histogram(aes(x = start/n_obs), fill = NA, color = "grey20", bins = 15) +
#    scale_x_log10(labels = scales::label_math(format = log10)) +
#    facet_grid(cols = vars(model)) +
#    labs(x = "Size-normalized Sigma Value",
#         y = "Number of Observations") +
#    theme(panel.border = element_rect(color = "black", fill = NA, size = 1.5),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          strip.background = element_rect(fill = "white"),
#          strip.text = element_text(face = "bold"),
#          plot.title = element_text(hjust = 0.5),
#          axis.ticks = element_blank(),
#          axis.line = element_blank())
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "NormalizedSigma_Distributions.png"),
#         bg = "white",
#         height = 3, width = 5)
#  

## ----Lombardo_Comparison, eval=FALSE------------------------------------------
#  cvt_DTXSIDs <- unique(my_pk$data$Chemical)
#  # First things first, let's load the data from Lombardo et al.
#  lombardo <- readxl::read_xlsx(
#    paste0(Sys.getenv("FIG_DIR"),
#           "Lombardo2018-Supplemental_82966_revised_corrected.xlsx"),
#    skip = 8)
#  
#  
#  l_batch_search <- readxl::read_xlsx(
#    paste0(Sys.getenv("FIG_DIR"),
#           "LombardoBatchSearch_Name.xlsx"),
#    sheet = "Main Data"
#  )
#  
#  
#  l_batch_search <- l_batch_search[c("INPUT", "CASRN","DTXSID")] %>%
#    distinct() %>%
#    right_join(lombardo[c("Name", "CAS #")], by = c("CASRN" = "CAS #", "INPUT" = "Name"))
#  
#  
#  
#  l_noNamesCASRN <- readxl::read_xlsx(
#    paste0(Sys.getenv("FIG_DIR"),
#           "LombardoBatchSearch_CASRNnoName.xlsx"),
#    sheet = "Main Data"
#  ) %>%
#    dplyr::select(CASRN = INPUT, DTXSID)
#  
#  l_curatedDTX <- read_tsv(paste0(Sys.getenv("FIG_DIR"),
#                           "CuratedIDs_lombardo.txt")) %>%
#    rename(INPUT = Name)
#  
#  
#  l_batch_search <- l_batch_search %>%
#    left_join(l_noNamesCASRN, by = "CASRN") %>%
#    mutate(DTXSID = coalesce(DTXSID.x, DTXSID.y)) %>%
#    dplyr::select(-DTXSID.x, -DTXSID.y)
#  # Now all but two of the IDs are unidentified
#  
#  lombardo <- lombardo %>%
#    right_join(l_batch_search, by = c("Name" = "INPUT"))
#  
#  
#  ####
#  lombard_abbr <- lombardo %>% dplyr::select(DTXSID,
#                                             hVss = `human VDss (L/kg)`,
#                                             hCltot = `human CL (mL/min/kg)`,
#                                             halflife = `terminal  t1/2 (h)` ) %>%
#    filter(DTXSID %in% cvt_DTXSIDs) %>%
#    left_join(chem_name_translate, by = c("DTXSID" = "Chemical"))
#  
#  # 15 chemicals in common with values for both
#  lombardo_comparison <- my_tkstats %>%
#    dplyr::select(DTXSID = Chemical, Species,
#                  model, Vss.tkstats, CLtot.tkstats) %>%
#    dplyr::filter(model != "model_flat") %>%
#    distinct() %>%
#    inner_join(lombard_abbr, by = "DTXSID") %>%
#    # filter(!is.na(Vss.tkstats)) %>%
#    arrange(halflife)
#  
#  comparable_ids <- lombardo_comparison %>% pull(DTXSID)
#  
#  lombard_abbr <- lombard_abbr %>%
#    mutate(Species = "human", model = "Lombardo et al.") %>%
#    rename(Vss = hVss, CLtot = hCltot) %>%
#    mutate(CLtot = CLtot*60/1000) %>% # Lombardo reports this as mL/min/kg, need L/h/kg
#    filter(DTXSID %in% comparable_ids) %>%
#    mutate(Chemical_Name = reorder(Chemical_Name, halflife))
#  
#  lombardo_comparison <- my_tkstats %>%
#    dplyr::select(DTXSID = Chemical, Species,
#                  model, Vss.tkstats, CLtot.tkstats, halflife.tkstats) %>%
#    filter(DTXSID %in% comparable_ids) %>%
#    left_join(chem_name_translate, by = join_by(DTXSID == Chemical)) %>%
#    distinct() %>%
#    rename(Vss = Vss.tkstats, CLtot = CLtot.tkstats, halflife = halflife.tkstats) %>%
#    bind_rows(lombard_abbr) %>%
#    mutate(model = ifelse(str_detect(model, "^model"), "invivoPKfit", model),
#           Chemical_Name = factor(Chemical_Name,
#                                  levels = levels(lombard_abbr$Chemical_Name)))
#  
#  lombardo_comparison %>%
#    pivot_longer(cols = c("Vss", "CLtot", "halflife")) %>%
#    mutate(name = factor(name, levels = c("halflife", "Vss", "CLtot"))) %>%
#    ggplot(mapping = aes(
#      x = value,
#      y = Chemical_Name
#    )) +
#    geom_point(aes(color = model,
#                   shape = Species),
#               position = position_dodge(0.7),
#               size = 5/.pt, stroke = 2.5/.pt) +
#    facet_grid(cols = vars(name),
#               scales = "free_x") +
#    scale_x_log10( labels = scales::label_math(format = log10)) +
#    scale_shape_manual(values = c(21, 22, 24),
#                       breaks = c("human", "dog", "rat")) +
#    scale_color_manual(values = c("black", "#1064c9")) +
#    guides(color = guide_legend(override.aes = list(shape = 21),
#                                nrow = 2,
#                                title.position = "top",
#                                title.hjust = 0.5),
#           shape = guide_legend(nrow = 1,
#                                title.position = "top",
#                                title.hjust = 0.5)) +
#    theme(panel.border = element_rect(color = "black", fill = NA, linewidth = 1.5),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          strip.background = element_rect(fill = "white"),
#          strip.text = element_text(face = "bold", size = 12),
#          text = element_text(size = 10),
#          axis.ticks = element_blank(),
#          axis.line = element_blank(),
#          axis.text = element_text(face = "bold", size = 6),
#          axis.title.y = element_blank(),
#          axis.title.x = element_blank(),
#          legend.position = "bottom",
#          legend.key = element_blank(),
#          legend.title = element_text(face = "bold"),
#          legend.text = element_text())
#  
#  ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                           today,
#                           "lombardoComparison_named.png"),
#         height = 7,
#         width = 6.5,
#         device = "png", dpi = 300)
#  

## ----otherMeta-analysis, eval=FALSE-------------------------------------------
#  
#  # Histograms
#  lombardo_hist <- data.frame(Chemical = c(lombardo$DTXSID,
#                                my_tkstats$Chemical),
#                              Vss = c(lombardo$`human VDss (L/kg)`,
#                                my_tkstats$Vss.tkstats),
#                              halflife = c(lombardo$`terminal  t1/2 (h)`,
#                                my_tkstats$halflife.tkstats),
#                              Source = c(rep_len("lombardo",
#                                      length.out = nrow(lombardo)),
#                                rep_len("invivoPKfit",
#                                        length.out = nrow(my_tkstats))))
#  
#  
#  sfig7A <- lombardo_hist %>%
#    ggplot(aes(x = log2(Vss), color = Source,
#               group = Source)) +
#    geom_freqpoly(aes(y = after_stat(ndensity)),
#                  linewidth = 2/.pt,
#                  bins = 20) +
#    labs(y = "Scaled Frequency",
#         x = "Vss") +
#    scale_x_continuous(labels = scales::label_math(expr = 2^.x)) +
#    scale_color_manual(values = c("black", "#1064c9")) +
#    guides(color = guide_legend(nrow = 2, linewidth = 1)) +
#    theme(panel.border = element_rect(color = "black", fill = NA, linewidth = 1.5),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          strip.background = element_rect(fill = "white"),
#          strip.text = element_text(face = "bold", size = 12),
#          text = element_text(size = 10),
#          axis.ticks = element_blank(),
#          axis.line = element_blank(),
#          axis.text = element_text(face = "bold"),
#          legend.position = "bottom",
#          legend.key = element_blank(),
#          legend.title = element_text(face = "bold"),
#          legend.text = element_text())
#  
#  sfig7A
#  other_meta <- read_csv(
#    file = paste0(Sys.getenv("FIG_DIR"), "SupTableInVivoDatandPreds.csv")) %>%
#    dplyr::select(DTXSID, fbioh, fbior) %>%
#    filter(DTXSID %in% my_tkstats$Chemical)
#  
#  fgutabs <- my_pk$fit %>% filter(param_name %in% "Fgutabs", convcode == 0) %>%
#    distinct(Chemical, Species, model, method, estimate) %>%
#    semi_join(winmodel) %>%
#    filter(model != "model_flat") %>%
#    rename(Fgutabs.tkstats = "estimate")
#  
#  
#  sfig7B <- other_meta %>% left_join(fgutabs, by = c("DTXSID" = "Chemical")) %>%
#    filter(Species %in% c("rat")) %>%
#    filter(!is.na(Fgutabs.tkstats),
#           !is.na(fbior)) %>%
#    distinct(DTXSID, Species,
#             Fgutabs.tkstats, fbior) %>%
#    pivot_longer(cols = c(Fgutabs.tkstats, fbior),
#                 names_to = "Source",
#                 values_to = "Fgutabs") %>%
#      mutate(Source = ifelse(str_detect(Source, "tkstats"), "invivoPKfit",
#                             "Honda et al.")) %>%
#    left_join(chem_name_translate, by = join_by(DTXSID == Chemical)) %>%
#    mutate(Chemical_Name) %>%
#    ggplot(mapping = aes(
#      x = Fgutabs,
#      y = reorder(Chemical_Name, Fgutabs)
#    )) +
#    geom_point(aes(color = Source),
#               shape = 24,
#               position = position_dodge(0.7),
#               size = 4/.pt, stroke = 1.5/.pt) +
#    scale_color_manual(values = c("black", "green4"),
#                       breaks = c("invivoPKfit", "Honda et al.")) +
#    labs(x = "Oral Bioavailability") +
#    guides(color = guide_legend(nrow = 2)) +
#    theme(panel.border = element_rect(color = "black", fill = NA, linewidth = 1.5),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          strip.background = element_rect(fill = "white"),
#          strip.text = element_text(face = "bold", size = 12),
#          text = element_text(size = 10),
#          axis.ticks = element_blank(),
#          axis.line = element_blank(),
#          axis.text = element_text(face = "bold"),
#          axis.title.y = element_blank(),
#          legend.position = "bottom",
#          legend.key = element_blank(),
#          legend.title = element_text(face = "bold"),
#          legend.text = element_text())
#  
#  
#  plot_grid(sfig7A, sfig7B,
#            align = "v",
#            ncol = 1,
#            axis = "lr",
#            rel_heights = c(1,1.25),
#            labels = c("A", "B"))
#  ggsave(filename = paste0(Sys.getenv("FIG_DIR"),
#                           today,
#                           "metaAnalysis_Comparisons_SuppFigure7_named.png"),
#         height = 5.5,
#         width = 4,
#         device = "png", dpi = 300)
#  

## ----do_fit-benchmarking, eval=FALSE------------------------------------------
#  # Randomize the order of chemical species
#  chem_spec <- cvt %>%
#    dplyr::distinct(analyte_dtxsid, species) %>%
#    dplyr::slice_sample(prop = 1)
#  
#  test_group_size <- seq(25, 105, by = 10)
#  fit_options <- expand.grid(error_model = c("pooled",
#                                             "hierarchical"),
#                             dose_norm = FALSE,
#                             log10_trans = TRUE,
#                             time_scale = c(
#                               TRUE,
#                               FALSE),
#                             stringsAsFactors = FALSE)
#  
#  fit_options <- tidyr::expand_grid(fit_options, test_group_size)
#  
#  fit_options <- fit_options %>% rowwise() %>%
#    mutate(this_data = list({
#      tmp <- chem_spec %>% slice_head(n = test_group_size) %>%
#        left_join(cvt, by = join_by(analyte_dtxsid, species)) %>%
#        pk() +
#          scale_time(new_units = ifelse(!time_scale,
#                                        "identity",
#                                        "auto")) +
#          scale_conc(dose_norm = dose_norm, log10_trans = log10_trans) +
#          settings_preprocess(suppress.messages = TRUE)
#      do_prefit(tmp)
#    }
#    ),
#    data_size = chem_spec %>% slice_head(n = test_group_size) %>%
#      left_join(cvt, by = join_by(analyte_dtxsid, species)) %>%
#      NROW())
#  
#  # Organization of the benchmarking
#  # For each fit_option, return the four values of system.time() that we want
#  df_out <- fit_options %>%
#    rowwise() %>%
#    mutate(tim_1core = system.time(do_fit(this_data))[["elapsed"]],
#           tim_4core = system.time(do_fit(this_data, n_cores = 4))[["elapsed"]],
#           tim_7core = system.time(do_fit(this_data, n_cores = 7))[["elapsed"]],
#           tim_12core = system.time(do_fit(this_data, n_cores = 12))[["elapsed"]]
#           )
#  
#  df_out <- df_out %>% select(-this_data)
#  gc()
#  
#  full_long <- df_out %>%
#    pivot_longer(cols = c(tim_1core:tim_12core),
#                          names_to = "N_cores",
#                          values_to = "Time_s") %>%
#    group_by(N_cores, test_group_size, data_size) %>%
#    summarize(mean_time = mean(Time_s, na.rm = TRUE)/60,
#              max_time = max(Time_s, na.rm = TRUE)/60,
#              min_time = min(Time_s, na.rm = TRUE)/60
#              ) %>%
#    mutate(N_cores = as.numeric(str_extract(N_cores, "[:digit:]+")))
#  
#  df_out %>% clipr::write_clip()
#  ggplot(full_long,
#         aes(x = test_group_size,
#             y = mean_time,
#             color = as.factor(N_cores))) +
#    geom_point() +
#    geom_linerange(aes(ymin = min_time,
#                  ymax = max_time)) +
#    geom_line(aes(group = N_cores)) +
#    labs(x = "Number of Data Groups",
#         y = "Runtime (minutes)",
#         title = "Number of Processing Cores",
#         subtitle = "(with min/max runtime range)") +
#    facet_grid(cols = vars(N_cores)) +
#    scale_color_manual(values = c("black", "#40c679", "#31a354", "#006837")) +
#    coord_fixed(ratio = 8) +
#    theme(panel.border = element_rect(color = "black", fill = NA, size = 1.5),
#          panel.background = element_blank(),
#          panel.grid.major = element_line(color = "grey90", linewidth = 0.5),
#          legend.position = "none",
#          plot.title = element_text(hjust = 0.5),
#          plot.subtitle = element_text(hjust = 0.5),
#          axis.ticks = element_blank(),
#          axis.line = element_blank(),
#          strip.background = element_blank())
#  
#  
#  
#  
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                today,
#                "Parallelization_Benchmarking.png"),
#         width = 5,
#         height = 2.5,
#         units = "in")
#  

## ----sfig5-examples-of-fits, eval=FALSE---------------------------------------
#  pl <- plot(my_pk, use_scale_conc = TRUE, n_interp = 10)
#  names(pl) # Chemical Species observations observation_plot predicted predicted_plot final_plot
#  
#  s6_plA <- pl %>% filter(Chemical %in% "DTXSID3031860") %>%
#    pull(observation_plot) %>% .[[1]]
#  s6_plB_mouse <- pl %>% filter(Chemical %in% "DTXSID4020533",
#                                Species %in% "mouse") %>% pull(final_plot) %>% .[[1]]
#  s6_plB_rat <- pl %>% filter(Chemical %in% "DTXSID4020533",
#                                Species %in% "rat") %>% pull(final_plot) %>% .[[1]]
#  s6_plC <- pl %>% filter(Chemical %in% "DTXSID00216205") %>%
#    pull(final_plot) %>% .[[1]]
#  s6_plD <- pl %>% filter(Chemical %in% "DTXSID8025337") %>%
#    pull(final_plot) %>% .[[1]]
#  
#  # Set colors palettes for each
#  # panel A : Oranges
#  panelA_cols <- brewer.pal(8, "Oranges")[3:8]
#  # panel B : Greens
#  panelB_cols <- brewer.pal(9, "Greens")[2:10]
#  # panel C : Blue (only 2 values)
#  # panel D : Purple (6 values)
#  panelD_cols <- brewer.pal(9, "Purples")[4:9]
#  
#  s6_plA <- s6_plA +
#    labs(title = "perfluorodecanoic acid") +
#    scale_color_manual(values = panelA_cols, aesthetics = c("color", "fill")) +
#    theme(text = element_text(size = 20),
#          legend.position = "none",
#          strip.background = element_rect(fill = "white"))
#  
#  s6_plB_mouse <- s6_plB_mouse +
#    labs(title = "1,4-dioxane\nMouse") +
#    scale_color_manual(values = panelB_cols[c(5,8)],
#                       aesthetics = c("color", "fill")) +
#    theme(text = element_text(size = 20),
#          legend.position = "none",
#          strip.background = element_rect(fill = "white"))
#  
#  s6_plB_rat <- s6_plB_rat +
#    labs(title = "1,4-dioxane\nRat") +
#    scale_color_manual(values = panelB_cols[c(1, 2, 3, 4, 5, 6,7, 8)],
#                       aesthetics = c("color", "fill")) +
#    theme(text = element_text(size = 20),
#          legend.position = "none",
#          strip.background = element_rect(fill = "white"))
#  
#  s6_plC <- s6_plC +
#    labs(title = "psoralen") +
#    scale_color_manual(values = c("#6BAED6", "#08519C", "darkblue"),
#                       aesthetics = c("color", "fill")) +
#    theme(text = element_text(size = 20),
#          legend.position = "none",
#          strip.background = element_rect(fill = "white"))
#  
#  s6_plD <- s6_plD +
#    labs(title = "formamide") +
#    scale_color_manual(values = panelD_cols,
#                       aesthetics = c("color", "fill")) +
#    theme(text = element_text(size = 20),
#          legend.position = "none",
#          strip.background = element_rect(fill = "white"))
#  
#  # Put it all together
#  s6_plB <- plot_grid(s6_plB_mouse, s6_plB_rat,
#              align = "h", axis = "bt",
#             rel_widths = c(1.2,2))
#  
#  
#  s6_plCD <- plot_grid(s6_plC, s6_plD, labels = c("C", "D"), label_size = 24)
#  
#  
#  plot_grid(
#    s6_plA,
#    s6_plB,
#    s6_plCD,
#    align = "v",
#    ncol = 1,
#    rel_heights = c(1.25, 1.2, 1.25),
#    labels = c("A", "B", ""),
#    label_size = 24
#  )
#  
#  # Am making this 2X the size I need it to be
#  # so the points are a reasonable size when I scale it down
#  ggsave(paste0(Sys.getenv("FIG_DIR"),
#                "ExampleFitsPlots_06.12.2024.png"),
#         width = 13,
#         height = 14,
#         units = "in")
#  

## ----plotting_all_Fits, eval=FALSE--------------------------------------------
#  ### Plotting for all (2.5MB file)
#  pl <- plot(my_pk, n_interp = 12, use_scale_conc = TRUE)
#  pdf(file = paste0(Sys.getenv("FIG_DIR"),
#                "Transformed_Joint_AllFits_Final_2024_05_16.pdf"),
#      height = 6, width = 10)
#  for (i in 1:nrow(pl)) {
#    print(pl$final_plot[[i]])
#  }
#  dev.off()
#  
#  # From my new SQL pull
#  pl <- plot(my_pk, n_interp = 12, use_scale_conc = FALSE, best_fit = TRUE)
#  pdf(file = paste0(Sys.getenv("FIG_DIR"),
#                "Joint_bobyqa_AllFits_Final_20240514.pdf"),
#      height = 10, width = 10)
#  for (i in 1:nrow(pl)) {
#    print(pl$final_plot[[i]])
#  }
#  dev.off()
#  

## ----Single-Plot, eval=FALSE--------------------------------------------------
#  pl <- plot(my_pk, n_interp = 10, use_scale_conc = FALSE)
#  pl %>%
#    filter(Chemical %in% "DTXSID5020029") %>%
#    pull(final_plot) %>% .[[1]]
#  

## ----cvtdb-prep, eval=FALSE---------------------------------------------------
#  my_cvt <- cvt
#  my_wm <- get_winning_model(my_pk)
#  
#  my_tkstats <- get_tkstats(my_pk) %>%
#    rename2_cvt() %>%
#    select(-c(invivpk_dose_level, dose_units, conc_units, time_units,
#              data_sigma_group, fk_extraction_document_id)) %>%
#    distinct() %>%
#    pivot_longer(cols = cltot:vss_fgutabs,
#                 names_to = "param_name",
#                 values_to = "estimate") %>%
#    filter(!is.na(estimate)) %>%
#    mutate(param_units = case_when(
#      param_name %in% c("cltot", "cltot_fgutabs") ~ "L/hours",
#      param_name %in% c("css", "cmax") ~ "mg/L",
#      param_name %in% c("halflife", "tmax") ~ "hours",
#      param_name %in% c("vss", "vss_fgutabs") ~ "L",
#      param_name %in% "auc_infinity" ~ "mg/L * hours",
#      .default = NA_character_
#    )) %>%
#    distinct()%>%
#    mutate(dose_normalization = conc_scale_use(TRUE, my_pk)$dose_norm,
#           log10_transformation = conc_scale_use(TRUE,my_pk)$log10_trans,
#           time_scaling = "identity",
#           invivoPKfit_version = "2.0.0",
#           update_date = today) %>%
#    filter(model %in% c("model_1comp", "model_2comp"))
#  
#  my_params <- my_pk$fit %>%
#    rename2_cvt() %>% distinct(
#      analyte_dtxsid, species, model, method, param_name, estimate, param_units
#    ) %>%
#    mutate(dose_normalization = conc_scale_use(TRUE, my_pk)$dose_norm,
#           log10_transformation = conc_scale_use(TRUE,my_pk)$log10_trans,
#           time_scaling = "identity",
#           invivoPKfit_version = "2.0.0",
#           update_date = today) %>%
#    filter(model %in% c("model_1comp", "model_2comp"))
#  
#  my_cvt_unique <- my_cvt %>%
#    dplyr::mutate(
#      conc_processing_flags =
#        case_when(
#          invivPK_conc == conc ~ glue::glue("Used 'conc' value."),
#          invivPK_conc == conc_original ~ glue::glue("Used 'conc_original' value."),
#          invivPK_conc == conc_original * conc_unit_norm_factor ~ glue::glue(
#            "Mulitplied 'conc_original' by {conc_unit_norm_factor}."
#          ),
#          invivPK_conc != conc_original * conc_unit_norm_factor ~ glue::glue(
#            "Multiplied 'conc_original' by {signif(invivPK_conc/conc_original, 2)}."
#          ),
#          is.na(conc) ~ glue::glue(
#            "Multiplied 'conc_original' by {signif(invivPK_conc/conc_original, 2)}"
#            ),
#          .default = ""
#        )
#    ) %>%
#    dplyr::mutate(
#      loq_processing_flags =
#        if_else(
#          invivPK_loq == loq,
#          glue::glue("Used 'loq' value."),
#          glue::glue("Multiplied 'loq' value by {signif(invivPK_loq/loq,2)}."),
#          missing = glue::glue("'loq' was imputed as 90% of lowest concentration.")
#        )
#    ) %>%
#    dplyr::mutate(
#      dose_processing_flags =
#        if_else(
#          invivPK_dose_level == dose_level_normalized,
#          glue::glue("Used 'dose_level_normalized'."),
#          glue::glue("Multiplied 'dose_level_normalized' by",
#                     " {signif(invivPK_dose_level/dose_level_normalized, 3)}.")
#        )
#    ) %>%
#    dplyr::select(fk_series_id,
#                  analyte_dtxsid,
#                  species,
#                  administration_route_normalized,
#                  conc_medium_normalized,
#                  conc_processing_flags,
#                  dose_processing_flags,
#                  loq_processing_flags,
#                  ) %>%
#    group_by(analyte_dtxsid, species,
#             administration_route_normalized,
#             conc_medium_normalized) %>%
#    summarize(all_series_ids = paste(unique(fk_series_id), collapse = ", "),
#              conc_processing_flags = paste(unique(conc_processing_flags), collapse = " "),
#              dose_processing_flags = paste(unique(dose_processing_flags), collapse = " "),
#              loq_processing_flags = paste(unique(loq_processing_flags), collapse = " ")) %>%
#    dplyr::distinct()
#  
#  tkstats_cvtdb <- inner_join(my_cvt_unique, my_tkstats,
#                       by = join_by(analyte_dtxsid, species,
#                                    administration_route_normalized,
#                                    conc_medium_normalized)) %>%
#    distinct()
#  params_cvtdb <- inner_join(my_cvt_unique, my_params,
#                       by = join_by(analyte_dtxsid, species),
#                       relationship = "many-to-many") %>%
#    distinct()
#  
#  # Writing to file
#  bind_rows(params_cvtdb,
#            tkstats_cvtdb) %>%
#    distinct() -> tkparams_cvtdb
#  tkparams_cvtdb_winmodels <- tkparams_cvtdb %>% semi_join(my_wm)
#  save(tkparams_cvtdb, tkstats_cvtdb, params_cvtdb, tkparams_cvtdb_winmodels,
#       file = "data-raw/tkparams_CvTdb.Rdata")

## ----information--------------------------------------------------------------
sessionInfo()