# Rscript for (nonparametric) regression # (You need to install package 'car' beforehand) # Load package 'car' library(car) # Load data set Prestige data(Prestige) Prestige # Get information about data set ?Prestige # education, prestige and other characteristics of 102 # different occupations # get the column names names(Prestige) # Sort the data based on income ind <- order(Prestige$income) sorted <- Prestige[ind,] attach(sorted) # Change one value to show the effect of an outlier: prestige[30] <- 120 # note that R is case sensitive, and that the above command refers to the *column* "prestige" ######################################################## # Nonparametric regression with non-overlapping strips # ######################################################## # Make scatterplot plot(income, prestige, main="Nonparametric regression using strips") # Draw the cut-off values: # abline(a,b) draws a line with intercept a and slope b # abline(v=10) draws a vertical line with x-coordinate 10 # abline(h=10) draws a horizontal line with y-coordinate 10 # lty stands for line type, lty=2 gives a dashed line abline(v=income[20],lty=2) abline(v=income[40],lty=2) abline(v=income[60],lty=2) abline(v=income[80],lty=2) # Compute the mean of income and prestige in each group, and # add these to the plot. We use the command 'c' (concatenate) # to put the values together in a vector xmeans <- c(mean(income[1:20]),mean(income[21:40]), mean(income[41:60]),mean(income[61:80]), mean(income[81:102])) ymeans <- c(mean(prestige[1:20]),mean(prestige[21:40]), mean(prestige[41:60]),mean(prestige[61:80]), mean(prestige[81:102])) # add the points to the plot points(xmeans, ymeans, pch=3, col="red") # connect the points by a line lines(xmeans,ymeans,col="red") #################################################### # Nonparametric regression with overlapping strips # #################################################### # Make scatterplot plot(income,prestige,main="Local averaging and loess") # We now use local averaging (overlapping strips), with # 21 data points. For the first 11 strips, we average # the first 21 values of the data set, using a for-loop: resy <- NULL for(i in 1:11){ resy <- c(resy,mean(prestige[1:21])) } # For the i=12 up to the 91, we average the values from # i-10 up to i+10: for(i in 12:91){ resy <- c(resy,mean(prestige[(i-10):(i+10)])) } # For the last 11 strips, we average the last 21 values # of the data set: for (i in 92:102){ resy <- c(resy,mean(prestige[82:102])) } lines(income,resy,lwd=2, col="red") # identify outlier abline(v=income[30],lty=2) # add a smooth curve computed by loess lines(loess.smooth(income,prestige),lwd=2, col="blue",type="l")