### reading in the dataset ## (use read.csv for comma-separated files or the sep option) dat <- read.table(file="brainsize.txt",header=TRUE) pdf(file="tmp.pdf",width=10,height=10) plot(dat,pch=20,col=rgb(0.2,0.2,0.4,0.4)) dev.off() #### taking a look at the data dim(dat) str(dat) dat[1:8,] ## response Y is FSIQ (full-scale IQ), explanatory variables are ## Gender, Height, Weight, MRI_Count (brainsize) Y <- dat$FSIQ X <- dat[, c("Gender","Weight","Height","MRI_Count")] ## turn factor "Gender" into numeric dummy variable X[,"Gender"] <- as.numeric( X[,"Gender"] == "Female") ## fit a linear model linmod <- lm( Y ~ . , data=X) summary(linmod) ## compute the coefficients "by hand" and add intercept column colX <- colnames(X) X <- cbind( rep(1,nrow(X)), as.matrix(X)) colnames(X) <- c("Intercept", colX) X[1:8,] thetahat <- solve( t(X)%*%X, t(X)%*%Y) print(thetahat) print(coef(linmod)) print(cbind( Y, X)) ## Tukey-Anscombe plot of residuals versus fitted values #pdf(file="brainsize_tukey.pdf",width=6,height=6) plot(fitted(linmod) , residuals(linmod)) abline(h=0,lty=3) #dev.off()