### Rscript for categorical predictors

##########################
### dichotomous variable #
##########################

library(ISwR)
data(cystfibr)
names(cystfibr)
attach(cystfibr)
sex
# 0=male, 1=female

# exploratory plot of pemax (maximum experitory pressure vs age and gender)
# do you think the effect of gender is significant?
plot(pemax ~ age, pch=sex+1, col=sex+1)
legend(x=7,y=195,legend=c("male","female"),pch=c(1,2),col=c(1,2))

# sex is already coded with 0's and 1's
# we don't have to construct a dummy variable
m1 <- lm(pemax ~ age + sex)
summary(m1)
alpha.hat <- m1$coef[1]
beta.hat <- m1$coef[2]
gamma.hat <- m1$coef[3]
plot(pemax ~ age, pch=sex+1, col=sex+1)
legend(x=7,y=195,legend=c("male","female"),pch=c(1,2),col=c(1,2))
abline(alpha.hat,beta.hat,col=1,lwd=2)
abline(alpha.hat+gamma.hat,beta.hat,col=2,lwd=2)

# is the effect of sex significant?
# see p-value in summary(lm), or do incremental F-test:
m0 <- lm(pemax~age)
m1 <- lm(pemax~age+sex)
anova(m0,m1)



#########################
### polytomous variable #
#########################

library(car)
data(Duncan)
attach(Duncan)

pairs(Duncan[2:4],col=unclass(type),pch=unclass(type), main="",lower.panel=NULL)
legend(x=0.13,y=.57,legend=c("bc","prof","wc"),col=c(1,2,3),pch=c(1,2,3))

# the following does not work. why?
bc <- (type=="bc")*1
prof <- (type=="prof")*1
wc <- (type=="wc")*1
m1 <- lm(prestige ~ education + income + bc + prof + wc)
summary(m1)

# we must use only two dummy variables
# let's use "bc" as the baseline category

m1 <- lm(prestige ~ education + income + prof + wc)
summary(m1)

# a quicker way:
m1 <- lm(prestige ~ education + income + type)
summary(m1)

# is the effect of type significant?
# we can no longer assess this via a t-test; must use the F-test (anova)
m0 <- lm(prestige ~ education + income)
m1 <- lm(prestige ~ education + income + type)
anova(m0,m1)

# what do the p-values in 'summary(lm())' mean?
# compute p-value for prof:
m0 <- lm(prestige ~ education + income + wc)
m1 <- lm(prestige ~ education + income + prof + wc)
anova(m0,m1)


#############################
### Model with interactions #
#############################

m1 <- lm(prestige ~ education + income + type + education*type + income*type)
summary(m1)

# or less typing (R automatically includes the main effects)
m1 <- lm(prestige ~ education*type + income*type)
summary(m1)

# first test for interaction effects

# test interaction between income and type:
m0 <- lm(prestige ~ education + income + type + education*type)
m1 <- lm(prestige ~ education + income + type + education*type + income*type)
anova(m0,m1)
# this interaction is not significant.

# test interaction between education and type:
m0 <- lm(prestige ~ education + income + type + income*type)
m1 <- lm(prestige ~ education + income + type + education*type + income*type)
anova(m0,m1)
# this interaction is not significant.

# since interactions were not significant, 
# we now test for main effects

# test for effect of education, 
# assuming that there are no interaction terms involving education:
m0 <- lm(prestige ~ income + type + income*type)
m1 <- lm(prestige ~ education + income + type + income*type)
anova(m0,m1)
# this effect is significant.

# test for effect of income,
# assuming that there are no interaction terms involving income:
m0 <- lm(prestige ~ education + type + education*type)
m1 <- lm(prestige ~ education + income + type + education*type)
anova(m0,m1)
# this effect is significant.

# test for effect of type,
# assuming that there are no interaction terms involving type:
m0 <- lm(prestige ~ education + income)
m1 <- lm(prestige ~ education + income + type)
anova(m0,m1)
# this effect is significant.