#                  Brian Ripley's Applied Stats Notes
library(MASS)

#  Least Squares Fit
#
modelLS <- lm(calls ~ year, data=phones)
summary(modelLS,cor=F)
plot(phones$year,phones$calls, lwd=3, main = "Belgium Telephone Data")
abline(modelLS$coefficients, lty =2, col=1,lwd=3)
#
#
#    Robust fit using Huber psi

modelRF <- rlm(calls ~ year, maxit=50, data=phones, psi=psi.huber)
summary(modelRF, cor=F)
abline(modelRF$coefficients,col=2,lty=4,lwd=3)
summary(rlm(calls ~ year, scale.est="proposal 2", data=phones), cor=F)

#As Figure 2 shows, in this example there is a batch of outliers from 
#a different population in
#the late 1960s, and these should be rejected completely,
#which the Huber M-estimators do not.
#Let us try a re-descending estimator.
modelRFbisquare <- rlm(calls ~ year, data=phones, psi=psi.bisquare)
summary(modelRFbisquare)
abline(modelRFbisquare$coefficients,col=3,lty=6,lwd=3)
#This happened to work well but can try init="lts"
summary(rlm(calls ~ year, data=phones, init="lts", psi=psi.bisquare), cor=F)
#
#It is possible to combine the resistance of these methods with the 
#efficiency of M-estimation.
#The  MM-estimator  proposed  by  Yohai,  Stahel  &  Zamar  (1991)  
#  (see  also  Marazzi,  1993,
#     Chapter 9.1.3) is an M-estimator starting at the coefficients 
#      given by the S-estimator and with fixed
#    scale given by the S-estimator.  This retains (for
#                                               c>c
#                                               0
#) the high-breakdown point of the S-
#  estimator and the high efficiency at the normal. 
# At considerable computational expense, this
#gives the best of both worlds.
#Function
#rlm
#has an option to implement MM-estimation.
modelMMfit <- rlm(calls ~ year, data=phones, method="MM")
summary(modelMMfit, cor = F)
names(modelMMfit)
abline(modelMMfit$coefficients,col=4, lty=8,lwd=3)

# 
#     Robert can you workout a legend for these data fits?
#

legend("topleft",legend=c("Least Squares","Robust Fit-Huber","Robust fit-Tukey","Robust MM-fit"),
               col=1:4,lty=c(2,4,6,8),lwd=3) 
#
#
#     And can you work out 95% prediction intervals for
#  each of the methods at years 1970 and say 1974?
#             need to use predict?  And can you plot the prediction
#  intervals on the graph?  I'm having trouble with the call?
#  see below


phones
new <- data.frame(year=c(70,74))
predict(modelLS, newdata =new, interval="prediction", level=0.95)
predict(modelRF, newdata =new, interval="prediction", level=0.95)
predict(modelRFbisquare,newdata =new, interval="prediction", level=0.95)
predict(modelMMfit,newdata =new, interval="prediction", level=0.95)