# Diagnostics # Reproduces code for Fox Chapter 11 and Faraway Ch 7 ########################## # get data and fit model # ########################## library(faraway) data(savings) ?savings savings pairs(savings) # fit the model m <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data=savings) summary(m) ################### # leverage points # ################### # make plot par(mfrow=c(1,1)) plot(hatvalues(m),ylab="Leverages",main="Index plot of leverages") abline(h=2*(4+1)/50) # identify points interactively countries <- row.names(savings) identify(c(1:50), hatvalues(m), countries) savings[c(44,49),] summary(savings) ################# # residual plot # ################# par(mfrow=c(2,2)) plot(fitted(m),resid(m), ylab="Residuals", main="Residuals vs. fitted values") abline(h=0) identify(fitted(m), resid(m), countries) plot(fitted(m),rstandard(m), ylab="Standardized residuals", main="Standardized residuals vs fitted values") abline(h=0) abline(h=-2) abline(h=2) identify(fitted(m), rstandard(m), countries) plot(fitted(m),rstudent(m), ylab="Studentized residuals", main="Studentized residuals vs fitted values") abline(h=0) abline(h=-2) abline(h=2) identify(fitted(m), rstudent(m) , countries) ############# # Influence # ############# par(mfrow=c(1,1)) plot(cooks.distance(m), ylab="Cook's distance", main="Index plot of Cook's distance") n <- nrow(savings) 4/(n-4-1) # cut-off Cook's distance abline(h = 4/(n-4-1)) identify(c(1:50), cooks.distance(m) , countries) #################################### # easy built-in plotting functions # #################################### par(mfrow=c(2,3)) plot(m, which=c(1:5)) # remove Libya, Japan and Zambia: m2 <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data=savings, subset=c(1:50)[-c(23,46,49)]) # some of the coefficients changed a lot: summary(m2) # check diagnostic plots: par(mfrow=c(2,3)) plot(m2, which=c(1:5)) ########################### # Check model assumptions # ########################### # (this code is taken from Faraway Ch 7) library(faraway) data(airquality) ?airquality airquality pairs(airquality[,1:4], panel=panel.smooth) # first model g <- lm(Ozone ~ Solar.R + Wind + Temp,airquality,na.action = na.exclude) summary(g) # Tukey-Anscombe plot par(mfrow=c(1,1)) plot(fitted(g),residuals(g),xlab="Fitted",ylab="Residuals") abline(h=0) lines(loess.smooth(fitted(g),residuals(g)), col="red") # Note nonlinearity and nonconstant variance # Try log transformation of Ozone gl <- lm(log(Ozone) ~ Solar.R + Wind + Temp,airquality,na.action=na.exclude) plot(fitted(gl),residuals(gl),xlab="Fitted",ylab="Residuals") abline(h=0) lines(loess.smooth(fitted(gl),residuals(gl)), col="red") # Looks much better # Check for normality qqnorm(residuals(gl)) qqline(residuals(gl)) # Looks good (except for 1 large outlier that we will leave for now) # Check for correlated errors by plotting residuals vs time # (this is also called an index plot of residuals) plot(residuals(gl),ylab="Residuals") abline(h=0) # Looks OK # Check for correlated errors by plotting the (i+1)th residual # versus the ith residual, for i=1,...,n-1 plot(residuals(gl)[-153],residuals(gl)[-1], xlab=expression(hat(epsilon)[i]),ylab=expression(hat(epsilon)[i+1])) lines(loess.smooth(residuals(gl)[-153],residuals(gl)[-1])) # Looks OK # Check more formally for correlation summary(lm(residuals(gl)[-1] ~ 0+residuals(gl)[-153]))