run_RLMs <- function(funct="rob", do.range=TRUE, save=FALSE, rmv.NA=FALSE){ ### FUNCTION FOR ORGANIZING DATA AND RUNNING ROBUST LINEAR MODELS ### # INPUT # funct = which robust linear model code to use: rlm = rlm() from the library 'MASS', rob=lmrob() from library 'robust', default is rob # do.range = whether to save the range of years or not, default is TRUE # save = save output as .csv fil, default= FALSE # rmv.NA = if the input keeps extra rows, can remove here (e.g. sea turles) - should not normally need so default = FALSE print("Hope you remembered to update your working directory and file names! Just checking!") setwd("C:/Users/emily/Desktop/Collabs, meetings, etc/UVM/Lifting baselines/Data") DB<-read.csv("Terrestrial.csv") # for some reason sea turtles has an issue with calling in all the rows not just some if(rmv.NA==TRUE){ DB<-DB[194,] } # make sure all the things needed are available - this makes sure none are missing # If any are TRUE review input data and make sure there are no missing values and that all read correctly # One thing to check is to make sure formulaic cells have been translated correctly in to numbers - open .cvs in Excel and check call formatting if(sum(is.na(DB$common_name)==TRUE)>0){print("stop - there are NAs in your common name column")} if(sum(is.na(DB$scientific_name)==TRUE)>0){print("stop - there are NAs in your scientific name column")} if(sum(is.na(DB$sub_country_location)==TRUE)>0){print("stop - there are NAs in your location column")} if(sum(is.na(DB$manstat)==TRUE)>0){print("stop - there are NAs in your 'manstat' column")} if(sum(is.na(DB$pop_status)==TRUE)>0){print("stop - there are NAs in your 'pop_status' column")} # Create group index ('gind') for running the rlm() by correct grups in the loop # Selects based on management status and location DB$gind<-paste(DB$scientific_name,"_",DB$sub_country_location,"_",DB$manstat,sep="") # check and remove any with NA in g-ind if(sum(is.na(DB$gind)==FALSE)>0){DB<-DB[!is.na(DB$gind),]} # get unique qualifiers for group ID, some are not set as a character Gind<-unique(as.character(DB$gind)) # check that the number of g-inds are the same if((length(unique(DB$gind)))!=(length(Gind))){stop("your gind and Gind values do not match")} ### Robust linear models of logged data - lmrob() ### ### fits linear model by robust regression using M estimator # create data.frame for the output statistics here - REMOVE "manstat" IF NOT RUNNING MANAGEMENT TYPES SEPARATELY LMT<-data.frame(gind = character(), slopeval = numeric(), slope_se =numeric(), slope_t =numeric(), slope_pv=numeric(0), slope_95L=numeric(0), slope_95H=numeric(0), intval=numeric(0), int_se=numeric(0), int_t = numeric(), int_pv = numeric(), int_95L=numeric(0),int_95H=numeric(0)) if(funct=="rlm"){library(MASS)} # rlm() if(funct=="rob2"){library(robust)} # for robustbase and lmRob(), but that one doesn't appear any longer if(funct=="rob"){library(robustbase)} # for lmrob() # if you have trouble running the rlms, you can test the code below species by species, setting 'i' to one species here # i<- "antilocapra americana_az_rec" # run for just one spp group for (i in Gind){ options(warn=1) # so the warnings are printed DBx<-DB[DB$gind==i,] # use unique group ID to run each separately mngmt<- as.character(unique(DBx$manstat)) if(nrow(DBx)>2){ #only run the rest if there are more than 2 observed years # run rlms on logged data, "pop_status" is population index that year if(funct=="rlm"){ lm_test<-rlm(log(pop_status+1)~observation_year,DBx) # Note "tryCatch" will report back which time series spits out a warning so user can keep track of them # lm_test<-tryCatch(rlm(log(pop_status+1)~observation_year,DBx), finally=print(i)) } if(funct=="rob"){ # lm_test<-lmrob(log(pop_status+1)~observation_year,DBx) # Note "tryCatch" will report back which time series spits out a warning so user can keep track of them # lm_test<-tryCatch(lmrob(log(pop_status+1)~observation_year,DBx), finlly=print(i)) lm_test<-tryCatch(lmrob(log(pop_status+1)~observation_year,DBx, method="M", init=lmrob.lar, psi="lqq"), finlly=print(i)) } # save slope stats - NB that lmrob() calculates p-values using students t, so just call them slopeval<-lm_test$coefficients[2] # slope value slope_se<-summary(lm_test)$coefficients[4] # slope standard error slope_t<-summary(lm_test)$coefficients[6] # slope t-value if(funct=="rlm"){slope_pv = 2*pt(abs(slope_t), summary(lm_test)$df[2], lower.tail=FALSE)}#students t of t-value else{slope_pv = summary(lm_test)$coefficients[8]} # students t of t-value from lmrob() slope_95L = summary(lm_test)$coefficients[2]-(1.96*summary(lm_test)$coefficients[4]) # Lower 95 confidence interval bound slope_95H = summary(lm_test)$coefficients[2]+(1.96*summary(lm_test)$coefficients[4]) # Upper 95 confidence interval bound # save stats on the intercept intval<-lm_test$coefficients[1] int_se<-summary(lm_test)$coefficients[3] int_t<-summary(lm_test)$coefficients[5] if(funct=="rlm"){int_pv = 2*pt(abs(int_t), summary(lm_test)$df[2], lower.tail=FALSE)} else {int_pv = summary(lm_test)$coefficients[7]} int_95L = summary(lm_test)$coefficients[1]-(1.96*summary(lm_test)$coefficients[3]) # Lower 95 confidence interval int_95H = summary(lm_test)$coefficients[1]+(1.96*summary(lm_test)$coefficients[3]) # Upper 95 confidence interval SP2<-data.frame(gind=i, slopeval , slope_se, slope_t, slope_pv,slope_95L, slope_95H, intval,int_se, int_t, int_pv, int_95L, int_95H) # Add management type SP2 <- cbind(SP2, mngmt) # keep the range of years included if(do.range==TRUE){ years<-range1(DBx$observation_year) SP2<-cbind(SP2, years) } LMT<-rbind(LMT,SP2) # use the earlier category names to make final table # } } rm(i, lm_test, slope_pv, slope_se, slope_t, slopeval, intval, int_se, int_t, int_pv, SP2, DBx, mngmt, slope_95H, slope_95L, int_95H, int_95L, years, do.range) options(warn=0) #reset ### ADD ADDITIONAL IMPORTANT ELEMENTS TO THE OUTPUT #### # create another data frame on how many obs per g-ind, rename cols # combine with LMT to add in # obs to the data frame (i.e. the "n") numDB<-as.data.frame(table(DB$gind)) names(numDB)<- c("gind", "ntot") LMT<-merge(LMT,numDB,all.x=TRUE, by="gind") # note those where 95% CI include slope = 0 (because a negative * positive = <0, therefore one is above and one below 0) LMT$signf <- (LMT$slope_95H*LMT$slope_95L)>0 # create a new data frame from summary table - counts of group ID, common and sci name base on location # this step may take some time inDB<-as.data.frame(table(DB$gind,DB$common_name,DB$scientific_name,DB$sub_country_location)) # remove those with no counts and add column names inDB<-inDB[inDB$Freq>0,c(1:4)] names(inDB)<- c("gind", "common_name", "sci_name", "region") # combine with output data - adds back in the common and sci name as well as region seperately LMT<-merge(LMT,inDB,all.x=TRUE,by="gind") ### SAVING SUMMARY OF OUTPUT ### if(save==TRUE){ setwd("C:/Users/emily/Desktop/Collabs, meetings, etc/UVM/Lifting baselines/Results") write.csv(LMT,"Terr_lmrob.csv") } LMT }