# Brian Ripley's Applied Stats Notes library(MASS) # Least Squares Fit # modelLS <- lm(calls ~ year, data=phones) summary(modelLS,cor=F) plot(phones$year,phones$calls, lwd=3, main = "Belgium Telephone Data") abline(modelLS$coefficients, lty =2, col=1,lwd=3) # # # Robust fit using Huber psi modelRF <- rlm(calls ~ year, maxit=50, data=phones, psi=psi.huber) summary(modelRF, cor=F) abline(modelRF$coefficients,col=2,lty=4,lwd=3) summary(rlm(calls ~ year, scale.est="proposal 2", data=phones), cor=F) #As Figure 2 shows, in this example there is a batch of outliers from #a different population in #the late 1960s, and these should be rejected completely, #which the Huber M-estimators do not. #Let us try a re-descending estimator. modelRFbisquare <- rlm(calls ~ year, data=phones, psi=psi.bisquare) summary(modelRFbisquare) abline(modelRFbisquare$coefficients,col=3,lty=6,lwd=3) #This happened to work well but can try init="lts" summary(rlm(calls ~ year, data=phones, init="lts", psi=psi.bisquare), cor=F) # #It is possible to combine the resistance of these methods with the #efficiency of M-estimation. #The MM-estimator proposed by Yohai, Stahel & Zamar (1991) # (see also Marazzi, 1993, # Chapter 9.1.3) is an M-estimator starting at the coefficients # given by the S-estimator and with fixed # scale given by the S-estimator. This retains (for # c>c # 0 #) the high-breakdown point of the S- # estimator and the high efficiency at the normal. # At considerable computational expense, this #gives the best of both worlds. #Function #rlm #has an option to implement MM-estimation. modelMMfit <- rlm(calls ~ year, data=phones, method="MM") summary(modelMMfit, cor = F) names(modelMMfit) abline(modelMMfit$coefficients,col=4, lty=8,lwd=3) # # Robert can you workout a legend for these data fits? # legend("topleft",legend=c("Least Squares","Robust Fit-Huber","Robust fit-Tukey","Robust MM-fit"), col=1:4,lty=c(2,4,6,8),lwd=3) # # # And can you work out 95% prediction intervals for # each of the methods at years 1970 and say 1974? # need to use predict? And can you plot the prediction # intervals on the graph? I'm having trouble with the call? # see below phones new <- data.frame(year=c(70,74)) predict(modelLS, newdata =new, interval="prediction", level=0.95) predict(modelRF, newdata =new, interval="prediction", level=0.95) predict(modelRFbisquare,newdata =new, interval="prediction", level=0.95) predict(modelMMfit,newdata =new, interval="prediction", level=0.95)