### Rscript for categorical predictors ########################## ### dichotomous variable # ########################## library(ISwR) data(cystfibr) names(cystfibr) attach(cystfibr) sex # 0=male, 1=female # exploratory plot of pemax (maximum experitory pressure vs age and gender) # do you think the effect of gender is significant? plot(pemax ~ age, pch=sex+1, col=sex+1) legend(x=7,y=195,legend=c("male","female"),pch=c(1,2),col=c(1,2)) # sex is already coded with 0's and 1's # we don't have to construct a dummy variable m1 <- lm(pemax ~ age + sex) summary(m1) alpha.hat <- m1$coef[1] beta.hat <- m1$coef[2] gamma.hat <- m1$coef[3] plot(pemax ~ age, pch=sex+1, col=sex+1) legend(x=7,y=195,legend=c("male","female"),pch=c(1,2),col=c(1,2)) abline(alpha.hat,beta.hat,col=1,lwd=2) abline(alpha.hat+gamma.hat,beta.hat,col=2,lwd=2) # is the effect of sex significant? # see p-value in summary(lm), or do incremental F-test: m0 <- lm(pemax~age) m1 <- lm(pemax~age+sex) anova(m0,m1) ######################### ### polytomous variable # ######################### library(car) data(Duncan) attach(Duncan) pairs(Duncan[2:4],col=unclass(type),pch=unclass(type), main="",lower.panel=NULL) legend(x=0.13,y=.57,legend=c("bc","prof","wc"),col=c(1,2,3),pch=c(1,2,3)) # the following does not work. why? bc <- (type=="bc")*1 prof <- (type=="prof")*1 wc <- (type=="wc")*1 m1 <- lm(prestige ~ education + income + bc + prof + wc) summary(m1) # we must use only two dummy variables # let's use "bc" as the baseline category m1 <- lm(prestige ~ education + income + prof + wc) summary(m1) # a quicker way: m1 <- lm(prestige ~ education + income + type) summary(m1) # is the effect of type significant? # we can no longer assess this via a t-test; must use the F-test (anova) m0 <- lm(prestige ~ education + income) m1 <- lm(prestige ~ education + income + type) anova(m0,m1) # what do the p-values in 'summary(lm())' mean? # compute p-value for prof: m0 <- lm(prestige ~ education + income + wc) m1 <- lm(prestige ~ education + income + prof + wc) anova(m0,m1) ############################# ### Model with interactions # ############################# m1 <- lm(prestige ~ education + income + type + education*type + income*type) summary(m1) # or less typing (R automatically includes the main effects) m1 <- lm(prestige ~ education*type + income*type) summary(m1) # first test for interaction effects # test interaction between income and type: m0 <- lm(prestige ~ education + income + type + education*type) m1 <- lm(prestige ~ education + income + type + education*type + income*type) anova(m0,m1) # this interaction is not significant. # test interaction between education and type: m0 <- lm(prestige ~ education + income + type + income*type) m1 <- lm(prestige ~ education + income + type + education*type + income*type) anova(m0,m1) # this interaction is not significant. # since interactions were not significant, # we now test for main effects # test for effect of education, # assuming that there are no interaction terms involving education: m0 <- lm(prestige ~ income + type + income*type) m1 <- lm(prestige ~ education + income + type + income*type) anova(m0,m1) # this effect is significant. # test for effect of income, # assuming that there are no interaction terms involving income: m0 <- lm(prestige ~ education + type + education*type) m1 <- lm(prestige ~ education + income + type + education*type) anova(m0,m1) # this effect is significant. # test for effect of type, # assuming that there are no interaction terms involving type: m0 <- lm(prestige ~ education + income) m1 <- lm(prestige ~ education + income + type) anova(m0,m1) # this effect is significant.