[R] Generating all possible models from full model

Xiaogang Su xiaogangsu at gmail.com
Wed May 19 21:42:40 CEST 2010


A couple of years ago, I wrote a function for doing this. It can be
found in the following file:
http://pegasus.cc.ucf.edu/~xsu/CLASS/STA4164/mt4-2009.doc
I also pasted a copy below. Hope you find it useful.  -XG

# ========================
# ALL POSSIBLE REGRESSIONS
# ========================
# The parameter k is the total number of predictors. To apply the function,
# you need to prepare the data so that all predictors for selection are named
# as x1, x2, ..., and the response is called y.

all.possible.regressions <- function(dat, k){
    n <- nrow(dat)
    regressors <- paste("x", 1:k, sep="")
    lst <- rep(list(c(T, F)), k)
    regMat <- expand.grid(lst);
    names(regMat) <- regressors
    formular <- apply(regMat, 1, function(x)
            as.character(paste(c("y ~ 1", regressors[x]), collapse="+")))
    allModelsList <- apply(regMat, 1, function(x)
            as.formula(paste(c("y ~ 1", regressors[x]),collapse=" + ")) )
    allModelsResults <- lapply(allModelsList,
            function(x, data) lm(x, data=data), data=dat)
    n.models <- length(allModelsResults)
    extract <- function(fit) {
        df.sse <- fit$df.residual
        p <- n - df.sse -1
        sigma <- summary(fit)$sigma
        MSE <- sigma^2
        R2 <- summary(fit)$r.squared
        R2.adj <- summary(fit)$adj.r.squared
        sse <- MSE*df.sse
        aic <- n*log(sse) + 2*(p+2)
        bic <- n*log(sse) + log(n)*(p+2)
        out <- data.frame(df.sse=df.sse, p=p, SSE=sse, MSE=MSE,
            R2=R2, R2.adj=R2.adj, AIC=aic, BIC=bic)
        return(out)
    }
    result <- lapply(allModelsResults, extract)
    result <- as.data.frame(matrix(unlist(result), nrow=n.models, byrow=T))
    result <- cbind(formular, result)
rownames(result) <- NULL
colnames(result) <- c("model", "df.sse", "p", "SSE", "MSE", "R2",
"R2.adj", "AIC", "BIC")
    return(result)
}
all.possible.regressions(dat=quasar, k=5)


On Tue, May 18, 2010 at 11:38 PM, Tim Clark <mudiver1200 at yahoo.com> wrote:
> Is there a function that will allow me to run all model iterations if I specify a full model?  I am using information criteria to choose between possible candidate models.  I have been writing out all possible model combinations by hand, and I am always worried that I am missing models or have made a mistake somewhere.  It is also difficult to alter models if I want to change a term.  For example, below are the set of models I would like to run.  Is there a way to specify the full model and have R generate the rest?  I.e. specify
>
>  m1234567<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinmonth+coslunar+sinlunar+plankton, data=mydata))
>
> and have R run all the other models.
>
>
> library(MASS)
>
> #Intercept only
>  m0<-glm.convert(glm.nb(mantas~1,data=mydata))
>
> #One term - 7 models
>  #Manta abundance is greater at one of the two sites
>  m1<-glm.convert(glm.nb(mantas~site,data=mydata))
>  #Manta abundance increases each year as the population increases in size due to births or immigration being greater than deaths and emmigration
>  m2<-glm.convert(glm.nb(mantas~year,data=mydata))
>  #Manta abundances increases during part of the year due to seasonal cycles in resources (mates, food)
>  m3<-glm.convert(glm.nb(mantas~cosmonth,data=mydata))
>  m4<-glm.convert(glm.nb(mantas~sinmonth,data=mydata))
>  #Manta abundance decreases with increased lunar phase
>  m5<-glm.convert(glm.nb(mantas~coslunar, data=mydata))
>  m6<-glm.convert(glm.nb(mantas~sinlunar, data=mydata))
>  #Manta abundance increases with increased levels of plankton
>  m7<-glm.convert(glm.nb(mantas~plankton,data=mydata))
>
> #Two terms - 21 models
>  m12<-glm.convert(glm.nb(mantas~site*year, data=mydata))   #Interaction term to account for hotel being closed at Keauhou for some years
>  m13<-glm.convert(glm.nb(mantas~site+cosmonth,data=mydata))
>  m14<-glm.convert(glm.nb(mantas~site+sinmonth,data=mydata))
>  m15<-glm.convert(glm.nb(mantas~site+coslunar,data=mydata))
>  m16<-glm.convert(glm.nb(mantas~site+sinlunar,data=mydata))
>  m17<-glm.convert(glm.nb(mantas~site+plankton,data=mydata)) #Should this have an interaction term?  Plankton may varry by site
>
>  m23<-glm.convert(glm.nb(mantas~year+cosmonth,data=mydata))
>  m24<-glm.convert(glm.nb(mantas~year+sinmonth,data=mydata))
>  m25<-glm.convert(glm.nb(mantas~year+coslunar,data=mydata))
>  m26<-glm.convert(glm.nb(mantas~year+sinlunar,data=mydata))
>  m27<-glm.convert(glm.nb(mantas~year+plankton,data=mydata))
>
>  m34<-glm.convert(glm.nb(mantas~cosmonth+sinmonth,data=mydata))
>  m35<-glm.convert(glm.nb(mantas~cosmonth+coslunar,data=mydata))
>  m36<-glm.convert(glm.nb(mantas~cosmonth+sinlunar,data=mydata))
>  m37<-glm.convert(glm.nb(mantas~cosmonth+plankton,data=mydata))  #Interaction term?  Plankton may vary by season
>
>  m45<-glm.convert(glm.nb(mantas~sinmonth+coslunar, data=mydata))
>  m46<-glm.convert(glm.nb(mantas~sinmonth+sinlunar, data=mydata))
>  m47<-glm.convert(glm.nb(mantas~sinmonth+plankton, data=mydata)) #Interaction term?  Plankton may vary by season
>
>  m56<-glm.convert(glm.nb(mantas~coslunar+sinlunar, data=mydata))
>  m57<-glm.convert(glm.nb(mantas~coslunar+plankton, data=mydata))
>
>  m67<-glm.convert(glm.nb(mantas~sinlunar+plankton, data=mydata)) #Interaction term?  Plankton may have lunar cycles
>
> #Three terms - 35 models
>  m123<-glm.convert(glm.nb(mantas~site*year+cosmonth, data=mydata))
>  m124<-glm.convert(glm.nb(mantas~site*year+sinmonth, data=mydata))
>  m125<-glm.convert(glm.nb(mantas~site*year+coslunar, data=mydata))
>  m126<-glm.convert(glm.nb(mantas~site*year+sinlunar, data=mydata))
>  m127<-glm.convert(glm.nb(mantas~site*year+plankton, data=mydata))
>
>  m134<-glm.convert(glm.nb(mantas~site+cosmonth+sinmonth,data=mydata))
>  m135<-glm.convert(glm.nb(mantas~site+cosmonth+coslunar,data=mydata))
>  m136<-glm.convert(glm.nb(mantas~site+cosmonth+sinlunar,data=mydata))
>  m137<-glm.convert(glm.nb(mantas~site+cosmonth+plankton,data=mydata))
>
>  m145<-glm.convert(glm.nb(mantas~site+sinmonth+coslunar,data=mydata))
>  m146<-glm.convert(glm.nb(mantas~site+sinmonth+sinlunar,data=mydata))
>  m147<-glm.convert(glm.nb(mantas~site+sinmonth+plankton,data=mydata))
>
>  m156<-glm.convert(glm.nb(mantas~site+coslunar+sinlunar,data=mydata))
>  m157<-glm.convert(glm.nb(mantas~site+coslunar+plankton,data=mydata))
>
>  m167<-glm.convert(glm.nb(mantas~site+sinlunar+plankton,data=mydata))
>
>  m234<-glm.convert(glm.nb(mantas~year+cosmonth+sinmonth,data=mydata))
>  m235<-glm.convert(glm.nb(mantas~year+cosmonth+coslunar,data=mydata))
>  m236<-glm.convert(glm.nb(mantas~year+cosmonth+sinlunar,data=mydata))
>  m237<-glm.convert(glm.nb(mantas~year+cosmonth+plankton,data=mydata))
>
>  m245<-glm.convert(glm.nb(mantas~year+sinmonth+coslunar,data=mydata))
>  m246<-glm.convert(glm.nb(mantas~year+sinmonth+sinlunar,data=mydata))
>  m247<-glm.convert(glm.nb(mantas~year+sinmonth+plankton,data=mydata))
>
>  m256<-glm.convert(glm.nb(mantas~year+coslunar+sinlunar,data=mydata))
>  m257<-glm.convert(glm.nb(mantas~year+coslunar+plankton,data=mydata))
>
>  m267<-glm.convert(glm.nb(mantas~year+sinlunar+plankton,data=mydata))
>
>  m345<-glm.convert(glm.nb(mantas~cosmonth+sinmonth+coslunar,data=mydata))
>  m346<-glm.convert(glm.nb(mantas~cosmonth+sinmonth+sinlunar,data=mydata))
>  m347<-glm.convert(glm.nb(mantas~cosmonth+sinmonth+plankton,data=mydata))
>
>  m356<-glm.convert(glm.nb(mantas~cosmonth+coslunar+sinlunar,data=mydata))
>  m357<-glm.convert(glm.nb(mantas~cosmonth+coslunar+plankton,data=mydata))
>
>  m367<-glm.convert(glm.nb(mantas~cosmonth+sinlunar+plankton,data=mydata))
>
>  m456<-glm.convert(glm.nb(mantas~sinmonth+coslunar+sinlunar,data=mydata))
>  m457<-glm.convert(glm.nb(mantas~sinmonth+coslunar+plankton,data=mydata))
>
>  m467<-glm.convert(glm.nb(mantas~sinmonth+sinlunar+plankton,data=mydata))
>
>  m567<-glm.convert(glm.nb(mantas~coslunar+sinlunar+plankton,data=mydata))
>
> #Four terms - 34 models
>  m1234<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinmonth, data=mydata))
>  m1235<-glm.convert(glm.nb(mantas~site*year+cosmonth+coslunar, data=mydata))
>  m1236<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinlunar, data=mydata))
>  m1237<-glm.convert(glm.nb(mantas~site*year+cosmonth+plankton, data=mydata))
>
>  m1245<-glm.convert(glm.nb(mantas~site*year+sinmonth+coslunar, data=mydata))
>  m1246<-glm.convert(glm.nb(mantas~site*year+sinmonth+sinlunar, data=mydata))
>  m1247<-glm.convert(glm.nb(mantas~site*year+sinmonth+plankton, data=mydata))
>
>  m1256<-glm.convert(glm.nb(mantas~site*year+coslunar+sinlunar, data=mydata))
>  m1257<-glm.convert(glm.nb(mantas~site*year+coslunar+plankton, data=mydata))
>
>  m1267<-glm.convert(glm.nb(mantas~site*year+sinlunar+plankton, data=mydata))
>
>  m1345<-glm.convert(glm.nb(mantas~site+cosmonth+sinmonth+coslunar,data=mydata))
>  m1346<-glm.convert(glm.nb(mantas~site+cosmonth+sinmonth+sinlunar,data=mydata))
>  m1347<-glm.convert(glm.nb(mantas~site+cosmonth+sinmonth+plankton,data=mydata))
>
>  m1356<-glm.convert(glm.nb(mantas~site+cosmonth+coslunar+sinlunar,data=mydata))
>  m1357<-glm.convert(glm.nb(mantas~site+cosmonth+coslunar+plankton,data=mydata))
>
>  m1367<-glm.convert(glm.nb(mantas~site+cosmonth+sinlunar+plankton,data=mydata))
>
>  m1456<-glm.convert(glm.nb(mantas~site+sinmonth+coslunar+sinlunar,data=mydata))
>  m1457<-glm.convert(glm.nb(mantas~site+sinmonth+coslunar+plankton,data=mydata))
>
>  m1467<-glm.convert(glm.nb(mantas~site+sinmonth+sinlunar+plankton,data=mydata))
>
>  m1567<-glm.convert(glm.nb(mantas~site+coslunar+sinlunar+plankton,data=mydata))
>
>  m2345<-glm.convert(glm.nb(mantas~year+cosmonth+sinmonth+coslunar,data=mydata))
>  m2346<-glm.convert(glm.nb(mantas~year+cosmonth+sinmonth+sinlunar,data=mydata))
>  m2347<-glm.convert(glm.nb(mantas~year+cosmonth+sinmonth+plankton,data=mydata))
>
>  m2356<-glm.convert(glm.nb(mantas~year+cosmonth+coslunar+sinlunar,data=mydata))
>  m2357<-glm.convert(glm.nb(mantas~year+cosmonth+coslunar+plankton,data=mydata))
>
>  m2367<-glm.convert(glm.nb(mantas~year+cosmonth+sinlunar+plankton,data=mydata))
>
>  m2456<-glm.convert(glm.nb(mantas~year+sinmonth+coslunar+sinlunar,data=mydata))
>  m2457<-glm.convert(glm.nb(mantas~year+sinmonth+coslunar+plankton,data=mydata))
>
>  m2467<-glm.convert(glm.nb(mantas~year+sinmonth+sinlunar+plankton,data=mydata))
>
>  m2567<-glm.convert(glm.nb(mantas~year+coslunar+sinlunar+plankton,data=mydata))
>
>  m3456<-glm.convert(glm.nb(mantas~cosmonth+sinmonth+coslunar+sinlunar,data=mydata))
>  m3457<-glm.convert(glm.nb(mantas~cosmonth+sinmonth+coslunar+plankton,data=mydata))
>
>  m3567<-glm.convert(glm.nb(mantas~cosmonth+coslunar+sinlunar+plankton,data=mydata))
>
>  m4567<-glm.convert(glm.nb(mantas~sinmonth+coslunar+sinlunar+plankton,data=mydata))
>
> #Five terms - 21 models
>  m12345<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinmonth+coslunar, data=mydata))
>  m12346<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinmonth+sinlunar, data=mydata))
>  m12347<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinmonth+plankton, data=mydata))
>
>  m12356<-glm.convert(glm.nb(mantas~site*year+cosmonth+coslunar+sinlunar, data=mydata))
>  m12357<-glm.convert(glm.nb(mantas~site*year+cosmonth+coslunar+plankton, data=mydata))
>
>  m12367<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinlunar+plankton, data=mydata))
>
>  m12456<-glm.convert(glm.nb(mantas~site*year+sinmonth+coslunar+sinlunar, data=mydata))
>  m12457<-glm.convert(glm.nb(mantas~site*year+sinmonth+coslunar+plankton, data=mydata))
>
>  m12467<-glm.convert(glm.nb(mantas~site*year+sinmonth+sinlunar+plankton, data=mydata))
>
>  m12567<-glm.convert(glm.nb(mantas~site*year+coslunar+sinlunar+plankton, data=mydata))
>
>  m13456<-glm.convert(glm.nb(mantas~site+cosmonth+sinmonth+coslunar+sinlunar, data=mydata))
>  m13457<-glm.convert(glm.nb(mantas~site+cosmonth+sinmonth+coslunar+plankton, data=mydata))
>
>  m13467<-glm.convert(glm.nb(mantas~site+cosmonth+sinmonth+sinlunar+plankton, data=mydata))
>
>  m13567<-glm.convert(glm.nb(mantas~site+cosmonth+coslunar+sinlunar+plankton, data=mydata))
>
>  m14567<-glm.convert(glm.nb(mantas~site+sinmonth+coslunar+sinlunar+plankton, data=mydata))
>
>  m23456<-glm.convert(glm.nb(mantas~year+cosmonth+sinmonth+coslunar+sinlunar, data=mydata))
>  m23457<-glm.convert(glm.nb(mantas~year+cosmonth+sinmonth+coslunar+plankton, data=mydata))
>
>  m23467<-glm.convert(glm.nb(mantas~year+cosmonth+sinmonth+sinlunar+plankton, data=mydata))
>
>  m23567<-glm.convert(glm.nb(mantas~year+cosmonth+coslunar+sinlunar+plankton, data=mydata))
>
>  m24567<-glm.convert(glm.nb(mantas~year+sinmonth+coslunar+sinlunar+plankton, data=mydata))
>
>  m34567<-glm.convert(glm.nb(mantas~cosmonth+sinmonth+coslunar+sinlunar+plankton, data=mydata))
>
> #Six terms - 7 models
>  m123456<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinmonth+coslunar+sinlunar, data=mydata))
>  m123457<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinmonth+coslunar+plankton, data=mydata))
>
>  m123467<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinmonth+sinlunar+plankton, data=mydata))
>
>  m123567<-glm.convert(glm.nb(mantas~site*year+cosmonth+coslunar+sinlunar+plankton, data=mydata))
>
>  m124567<-glm.convert(glm.nb(mantas~site*year+sinmonth+coslunar+sinlunar+plankton, data=mydata))
>
>  m134567<-glm.convert(glm.nb(mantas~site+cosmonth+sinmonth+coslunar+sinlunar+plankton, data=mydata))
>
>  m234567<-glm.convert(glm.nb(mantas~year+cosmonth+sinmonth+coslunar+sinlunar+plankton, data=mydata))
>
> #Seven terms - 1 model
>  m1234567<-glm.convert(glm.nb(mantas~site*year+cosmonth+sinmonth+coslunar+sinlunar+plankton, data=mydata))
>
>
> Tim Clark
> Department of Zoology
> University of Hawaii
>
> ______________________________________________
> R-help at r-project.org mailing list
> https://stat.ethz.ch/mailman/listinfo/r-help
> PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
> and provide commented, minimal, self-contained, reproducible code.
>



-- 
==============================
Xiaogang Su, Ph.D.
Associate Professor, Biostatistician
SON, University of Alabama
Birmingham, AL 35294-1210
(205) 934-2355 [Office]
xgsu at uab.edu
xiaogangsu at gmail.com



More information about the R-help mailing list