### 1.3.4 Don't do several simple regressions instead of multiple regression ### ------------------------------------------------------------------------ ### Artificial Example ## two predictor variables $x_1, x_2$, response variable $Y$ x1 <- c( 0, 1, 2, 3, 0, 1, 2, 3) x2 <- c(-1, 0, 1, 2, 1, 2, 3, 4) Y <- c( 1, 2, 3, 4,-1, 0, 1, 2) (dat <- data.frame(x1, x2, Y)) pairs(dat) ## or, a bit nicer (for slide): pairs(dat, pch = 20, cex = 1.5, main="Example: multiple or single regressions") pdf.latex("/u/maechler/Vorl/comput-statist/Figs/ex-multi_l.pdf", width=7.6, height=8) pairs(dat, pch = 20, cex = 1.5, main="Example: multiple or single regressions") pdf.end() ### 3D- visualization: library(rgl) plot3d (x1, x2, Y) rgl.spheres(x1, x2, Y, radius = 0.4, col = "tomato") rgl.clear() rgl.close() lm(Y ~ x1) ## (0, 1) lm(Y ~ x2) ## (1.333, 0.111) lm(Y ~ x1 + x2)## (0, 2, -1 ) ## multiple regression describes the data points exactly ## Y[i] = 2 x1[i] - x2[i] for all i, summary(lm(Y ~ x1 + x2)) ## hat(sigma)^2 = 0, since all residuals "= 0" ## Y decreases when x2 increases ! ## if we do a simple regression of $Y$ onto $x_2$ (while ## ignoring the values of $x_1$; and thus, we do not keep them constant), we ## obtain ## \begin{eqnarray*} ## \hat{Y}_i = \frac{1}{9} x_{i2} + \frac{4}{3}\ \mbox{for all i} \qquad ## (\hat{\sigma}^2 = 1.72). ## \end{eqnarray*} ## This least squares regression line describes how $Y$ changes when varying ## $x_2$ when ignoring $x_1$\\ ## in particular: $\hat{Y}$ increases when $x_2$ increases, in contrast to ## multiple regression!