### Beispiel aus Skript ## Much more (renamed var.s / var.selection!) ## --> ~/Vorl/StatMeth/S/asphalt-s6.S ## ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ url <- "http://stat.ethz.ch/Teaching/Datasets/asphalt.dat" url <- "/u/sfs/datasets/asphalt.dat" d.asphalt <- read.table(url, header = TRUE) head(d.asphalt) str(d.asphalt) pairs(d.asphalt, main = paste("Asphalt Rutting, n = ", nrow(d.asphalt))) ## "Asphaltabnuetzung", ## ## VISC : Viskosität", ## ASPH : % Asphalt in 'surface course'", ## BASE : % Asphalt in ' base course'", ## RUN : 0/1 Indikator für Versuchsreihe", ## FINES: 10* %{fines} in 'surface course'", ## VOIDS: %{voids} in 'surface course'", ## y = RUT: Spurrinnentiefe in inches/million wheel passes", asphalt1 <- data.frame(LOGRUT = log(d.asphalt[,"RUT"]), LOGVISC= log(d.asphalt[,"VISC"]), d.asphalt[, 2:6]) # the rest ## Nicer with lattice's SPLOM := S[catter] PLO[t] M[atrix]: require(lattice) splom(~ asphalt1, main = paste("Asphalt Rutting, n = ", nrow(asphalt1))) mod.lm <- lm(LOGRUT ~ . , data = asphalt1) summary(mod.lm)## --> the model in the lecture! library(sfsmisc)# for this: TA.plot(mod.lm) ## or simply plot(mod.lm)## 4 plots -- the first two are those mentioned in the lecture ## Just the normal plot: plot(mod.lm, which=2) ## compare with random normal plots of 31 points dev.new() # << 2nd graphic window qqnorm(rnorm(31)) ## 1.6.3 Detecting Serial Correlation plot(residuals(mod.lm)) plot(as.ts( residuals(mod.lm)) )# "as time-series" abline(h = 0, lty=2) # horizontal line at y = 0 ##--> "time series analysis" lag.plot( residuals(mod.lm) ) acf ( residuals(mod.lm) ) ## Auto-Correlation-Function ## etc etc acf ( abs(residuals(mod.lm)) ) lag.plot( residuals(mod.lm) , lags = 4) lag.plot( abs(residuals(mod.lm)) , lags = 4) ## __Outlook__ ## "Leverages" := the diagonal entries of the hat matrix P_{i,i} aka "h_ii": plot(mod.lm, which = 5) ## ==> one of them is relatively large ~= 0.5: which(hatvalues(mod.lm) > 0.4) ## --> observation 15: ## regression with*out* that observation: "-15" : mod.lm.o.15 <- lm(LOGRUT ~ . , data = asphalt1, subset = -15) summary(mod.lm.o.15) # .. small change ### ---- 1.7 Model selection ------- m2.lm <- step(mod.lm) summary(m2.lm)