run_RLMs <- function(funct="rob", do.range=TRUE, save=FALSE, rmv.NA=FALSE){
  
### FUNCTION FOR ORGANIZING DATA AND RUNNING ROBUST LINEAR MODELS ###
#   INPUT
#     funct = which robust linear model code to use: rlm = rlm() from the library 'MASS', rob=lmrob() from library 'robust', default is rob
#     do.range = whether to save the range of years or not, default is TRUE
#     save = save output as .csv fil, default= FALSE
#     rmv.NA = if the input keeps extra rows, can remove here (e.g. sea turles) - should not normally need so default = FALSE
  
  print("Hope you remembered to update your working directory and file names! Just checking!")
  setwd("C:/Users/emily/Desktop/Collabs, meetings, etc/UVM/Lifting baselines/Data")
  
  DB<-read.csv("Terrestrial.csv")
  # for some reason sea turtles has an issue with calling in all the rows not just some
  if(rmv.NA==TRUE){
    DB<-DB[194,]
  } 
  
# make sure all the things needed are available - this makes sure none are missing
# If any are TRUE review input data and make sure there are no missing values and that all read correctly
# One thing to check is to make sure formulaic cells have been translated correctly in to numbers - open .cvs in Excel and check call formatting
  if(sum(is.na(DB$common_name)==TRUE)>0){print("stop - there are NAs in your common name column")}
  if(sum(is.na(DB$scientific_name)==TRUE)>0){print("stop - there are NAs in your scientific name column")}
  if(sum(is.na(DB$sub_country_location)==TRUE)>0){print("stop - there are NAs in your location column")}
  if(sum(is.na(DB$manstat)==TRUE)>0){print("stop - there are NAs in your 'manstat' column")}
  if(sum(is.na(DB$pop_status)==TRUE)>0){print("stop - there are NAs in your 'pop_status' column")}
   
  
# Create group index ('gind') for running the rlm() by correct grups in the loop 
# Selects based on management status and location
  DB$gind<-paste(DB$scientific_name,"_",DB$sub_country_location,"_",DB$manstat,sep="")
  
# check and remove any with NA in g-ind
  if(sum(is.na(DB$gind)==FALSE)>0){DB<-DB[!is.na(DB$gind),]}
  
# get unique qualifiers for group ID, some are not set as a character
  Gind<-unique(as.character(DB$gind))
  
# check that the number of g-inds are the same
  if((length(unique(DB$gind)))!=(length(Gind))){stop("your gind and Gind values do not match")}

  
### Robust linear models of logged data - lmrob() ###
### fits linear model by robust regression using M estimator
  
# create data.frame for the output statistics here - REMOVE "manstat" IF NOT RUNNING MANAGEMENT TYPES SEPARATELY 
  LMT<-data.frame(gind = character(), slopeval = numeric(), slope_se =numeric(), slope_t =numeric(), 
                  slope_pv=numeric(0), slope_95L=numeric(0), slope_95H=numeric(0), intval=numeric(0), 
                  int_se=numeric(0), int_t = numeric(), int_pv = numeric(), int_95L=numeric(0),int_95H=numeric(0))
  
  if(funct=="rlm"){library(MASS)}       # rlm()
  if(funct=="rob2"){library(robust)}    # for robustbase and lmRob(), but that one doesn't appear any longer
  if(funct=="rob"){library(robustbase)} # for lmrob()
  
# if you have trouble running the rlms, you can test the code below species by species, setting 'i' to one species here
# i<- "antilocapra americana_az_rec"  # run for just one spp group
  
  for (i in Gind){ 
    options(warn=1) # so the warnings are printed
    
    DBx<-DB[DB$gind==i,]  # use unique group ID to run each separately
    mngmt<- as.character(unique(DBx$manstat))  
    
    if(nrow(DBx)>2){  #only run the rest if there are more than 2 observed years
      # run rlms on logged data, "pop_status" is population index that year   
      if(funct=="rlm"){
        lm_test<-rlm(log(pop_status+1)~observation_year,DBx)
        # Note "tryCatch" will report back which time series spits out a warning so user can keep track of them
        # lm_test<-tryCatch(rlm(log(pop_status+1)~observation_year,DBx), finally=print(i))
      }
      if(funct=="rob"){
        # lm_test<-lmrob(log(pop_status+1)~observation_year,DBx)
        # Note "tryCatch" will report back which time series spits out a warning so user can keep track of them
        # lm_test<-tryCatch(lmrob(log(pop_status+1)~observation_year,DBx), finlly=print(i))
        lm_test<-tryCatch(lmrob(log(pop_status+1)~observation_year,DBx,  method="M", init=lmrob.lar, psi="lqq"), finlly=print(i))
      }
      # save slope stats - NB that lmrob() calculates p-values using students t, so just call them
      slopeval<-lm_test$coefficients[2]  # slope value
      slope_se<-summary(lm_test)$coefficients[4]  # slope standard error
      slope_t<-summary(lm_test)$coefficients[6]   # slope t-value
      if(funct=="rlm"){slope_pv = 2*pt(abs(slope_t), summary(lm_test)$df[2], lower.tail=FALSE)}#students t of t-value
      else{slope_pv = summary(lm_test)$coefficients[8]}  # students t of t-value from lmrob()
      slope_95L = summary(lm_test)$coefficients[2]-(1.96*summary(lm_test)$coefficients[4]) # Lower 95 confidence interval bound
      slope_95H = summary(lm_test)$coefficients[2]+(1.96*summary(lm_test)$coefficients[4]) # Upper 95 confidence interval bound
      
      # save stats on the intercept
      intval<-lm_test$coefficients[1]
      int_se<-summary(lm_test)$coefficients[3]
      int_t<-summary(lm_test)$coefficients[5]
      if(funct=="rlm"){int_pv = 2*pt(abs(int_t), summary(lm_test)$df[2], lower.tail=FALSE)}
      else {int_pv = summary(lm_test)$coefficients[7]}
      int_95L = summary(lm_test)$coefficients[1]-(1.96*summary(lm_test)$coefficients[3]) # Lower 95 confidence interval
      int_95H = summary(lm_test)$coefficients[1]+(1.96*summary(lm_test)$coefficients[3]) # Upper 95 confidence interval
      
      SP2<-data.frame(gind=i, slopeval , slope_se, slope_t, slope_pv,slope_95L, slope_95H, intval,int_se, int_t, int_pv, int_95L, int_95H)
      
      # Add management type    
      SP2 <- cbind(SP2, mngmt)
      
      # keep the range of years included
      if(do.range==TRUE){
        years<-range1(DBx$observation_year)
        SP2<-cbind(SP2, years)
      }
      LMT<-rbind(LMT,SP2) # use the earlier category names to make final table
      #
      
    }
  }
  rm(i, lm_test, slope_pv, slope_se, slope_t, slopeval, intval, int_se, int_t, 
     int_pv, SP2, DBx, mngmt, slope_95H, slope_95L, int_95H, int_95L, years, do.range)

  options(warn=0) #reset
  

  
### ADD ADDITIONAL IMPORTANT ELEMENTS TO THE OUTPUT ####
  
# create another data frame on how many obs per g-ind, rename cols
# combine with LMT to add in # obs to the data frame (i.e. the "n")
  numDB<-as.data.frame(table(DB$gind))
  names(numDB)<- c("gind", "ntot")
  LMT<-merge(LMT,numDB,all.x=TRUE, by="gind") 
  
# note those where 95% CI include slope = 0 (because a negative * positive = <0, therefore one is above and one below 0)
  LMT$signf <- (LMT$slope_95H*LMT$slope_95L)>0

  
# create a new data frame from summary table - counts of group ID, common and sci name base on location
# this step may take some time
  inDB<-as.data.frame(table(DB$gind,DB$common_name,DB$scientific_name,DB$sub_country_location))
  
# remove those with no counts and add column names
  inDB<-inDB[inDB$Freq>0,c(1:4)]
  names(inDB)<- c("gind", "common_name", "sci_name", "region")
  
# combine with output data - adds back in the common and sci name as well as region seperately
  LMT<-merge(LMT,inDB,all.x=TRUE,by="gind")


### SAVING SUMMARY OF OUTPUT ###
  if(save==TRUE){
    setwd("C:/Users/emily/Desktop/Collabs, meetings, etc/UVM/Lifting baselines/Results")
    write.csv(LMT,"Terr_lmrob.csv")
  }
  LMT
}