# fruitfly code data <- read.table("http://www.amstat.org/publications/jse/datasets/fruitfly.dat") names(data) <- c("id","partners","type","longevity","thorax","sleep") attach(data) # make scatterplot matrix pairs(data) # define vectors for the colors (col) and plotting characters (pch) col.partn <- 1*(partners==0)+2*(partners==1)+3*(partners==8) # 1=black, 2=red, 3=green pch.type <- 1*(type==0) + 2*(type==1)+3*(type==9) # 1='circle', 2='triangle', 3='plus' # type=0: pregnant, type=1: virgin, type=9: NA # make scatterplot with different colors and plotting symbols par(mfrow=c(1,1)) plot(thorax, longevity, pch=pch.type, col=col.partn, ylim=c(16,97),xlim=c(.64,.94)) legend(.635,97,c("1 pregnant","8 pregnant","1 virgin","8 virgin","0 partners"), pch=c(1,2,1,2,3),col=c(2,2,3,3,1)) # make three different scatterplots, for 0, 1, and 8 females par(mfrow=c(2,2)) plot(thorax[partners==0],longevity[partners==0],pch=3,col=1, main="0 partners",ylim=c(16,97),xlim=c(.64,.94), xlab="thorax length", ylab="longevity") plot(thorax[partners==1],longevity[partners==1],pch=pch.type[partners==1], col=2,main="1 partners",ylim=c(16,97),xlim=c(.64,.94), xlab="thorax length", ylab="longevity") legend(0.635,97,c("pregnant","virgin"),pch=c(1,2),col=2) plot(thorax[partners==8],longevity[partners==8],pch=pch.type[partners==8], col=3,main="8 partners",ylim=c(16,97),xlim=c(.64,.94), xlab="thorax length", ylab="longevity") legend(0.635,97,c("pregnant","virgin"),pch=c(1,2),col=3) # create dummy variables for the different categories: dummy.1.p <- (partners==1)*(type==0)*1 dummy.1.v <- (partners==1)*(type==1)*1 dummy.8.p <- (partners==8)*(type==0)*1 dummy.8.v <- (partners==8)*(type==1)*1 dummy.0 <- (partners==0) # make boxplot of longevity and thorax for different groups par(mfrow=c(1,2)) boxplot(longevity[dummy.1.p==1],longevity[dummy.1.v==1],longevity[dummy.8.p==1],longevity[dummy.8.v==1],longevity[dummy.0==1], main="Longevity",names=c("1p","8p","1v","8v","0"),col="grey") means <- c(mean(longevity[dummy.1.p==1]),mean(longevity[dummy.1.v==1]), mean(longevity[dummy.8.p==1]),mean(longevity[dummy.8.v==1]), mean(longevity[dummy.0==1])) points(c(1:5),means) boxplot(thorax[dummy.1.p==1],thorax[dummy.1.v==1],thorax[dummy.8.p==1],thorax[dummy.8.v==1],thorax[dummy.0==1], main="Thorax length",names=c("1p","8p","1v","8v","0"),col="grey") means <- c(mean(thorax[dummy.1.p==1]),mean(thorax[dummy.1.v==1]), mean(thorax[dummy.8.p==1]),mean(thorax[dummy.8.v==1]), mean(thorax[dummy.0==1])) points(c(1:5),means) # Is thorax a confounding variable? # Thorax seems to have an effect on longevity: m1 <- lm(longevity ~ thorax) summary(m1) # But thorax is quite similar for the various groups. # This is visible in the boxplot. # We can also test it formally: m2 <- lm(thorax ~ dummy.1.p + dummy.1.v + dummy.8.p + dummy.8.v) m3 <- lm(thorax ~ 1) anova(m3,m2) # So thorax is not much correlated with group. # That was to be expected, since this was a randomized experiment. # So can we leave thorax out of the model? # Consider the following tests: # Test effect of type of female between the groups with 1 partner # First, we do not include thorax m4 <- lm(longevity[partners==1] ~ factor(type[partners==1])) summary(m4) # as a side note: note that we simply fit the means for both groups: mean(longevity[dummy.1.p==1]) mean(longevity[dummy.1.v==1]) mean(longevity[dummy.1.v==1])-mean(longevity[dummy.1.p==1]) # And now we do the same but with including thorax: m5 <- lm(longevity[partners==1] ~ thorax[partners==1] + factor(type[partners==1])) summary(m5) # Why is the result much more significant in the second model? # Let's test for interaction between number of females and type of females # We first try: partners.f <- as.factor(partners) type.f <- as.factor(type) m6 <- lm(longevity ~ thorax + partners.f + type.f + partners.f*type.f) summary(m6) # What went wrong? # We would still like to know if there is a significant interaction # between 'type of female' and 'number of females'? That is, we want to # whether the effect of 'type of female' depends on the value of 'number # of females'. How could you test this? We'll discuss possible solutions # in class next Wednesday.