# Rscript for 2. (Nonparametric) regression library(car) # Load data set Prestige data(Prestige) attach(Prestige) # get information about data set ?Prestige # education, prestige and other characteristics of 102 different occupations # Sort the data based on income ind <- order(income) sorted <- Prestige[ind,] attach(sorted) # Change one value to show the effect of an outlier: prestige[30] <- 120 ######################################################## # Nonparametric regression with non-overlapping strips # ######################################################## # Make scatterplot plot(income, prestige,main="Nonparametric regression using strips") # Draw the cut-off values: # abline(a,b) draws a line with intercept a and slope b # abline(v=10) draws a vertical line with x-coordinate 10 # abline(h=10) draws a horizontal line with y-coordinate 10 # lty stands for line type, lty=2 gives a dashed line abline(v=income[20],lty=2) abline(v=income[40],lty=2) abline(v=income[60],lty=2) abline(v=income[80],lty=2) # Compute the mean of income and prestige in each group, and add these to # the plot. We use the command 'c' (concatenate) to put the values together # in a vector xmeans <- c(mean(income[1:20]),mean(income[21:40]),mean(income[41:60]),mean(income[61:80]),mean(income[81:102])) ymeans <- c(mean(prestige[1:20]),mean(prestige[21:40]),mean(prestige[41:60]),mean(prestige[61:80]),mean(prestige[81:102])) # add the points to the plot points(xmeans,ymeans,pch=3) # connect the points by a line lines(xmeans,ymeans) #################################################### # Nonparametric regression with overlapping strips # #################################################### # Make scatterplot plot(income,prestige,main="Local averaging and loess") # We now use local averaging (overlapping strips), with 21 data points # For the first 11 strips, we average the first 21 values of the data set, # using a for-loop resy <- NULL for(i in 1:11){ resy <- c(resy,mean(prestige[1:21])) } # For the i=12 up to the 91, we average the values from i-10 up to i+10 for(i in 12:91){ resy <- c(resy,mean(prestige[(i-10):(i+10)])) } # For the last 11 strips, we average the last 21 values of the data set for (i in 92:102){ resy <- c(resy,mean(prestige[82:102])) } lines(income,resy,lwd=2) # identify outlier abline(v=income[30],lty=2) # add a smooth curve computed by loess lines(loess.smooth(income,prestige),lwd=2,col="green",type="l")