# R-plot for examining data library(car) data(Prestige) attach(Prestige) # stem-and-leaf diagram stem(income) sort(income) # histogram + density hist(income, col="grey") hist(income, col="grey", probability=TRUE, ylim=c(0,1.3E-04)) lines(density(income), col="red") # boxplot boxplot(income) boxplot(income ~ type) # interactive identification of points in scatterplots: plot(income, prestige, col=c("red", "green3", "blue")[unclass(Prestige$type)]) identify(income,prestige) # to stop: click Esc # scatterplot matrix pairs(Prestige[,1:4]) # better: pairs(Prestige[,1:4], col = c("red", "green3", "blue")[unclass(type)]) # red = blue collar, green = professional, blue = white collar # what does "unclass(type)" do? unclass(type) ### QQ-plots # let's look at qqnorm plots for samples from a standard normal # distribution to get an idea of the variation that we may expect # to see # open postscript environment postscript("qq-normal.ps", horizontal=FALSE) # create 3x3 plotting matrix par(mfrow=c(3,3), pty="s") # pty=s gives square plotting regions for (i in 1:9){ # take sample from normal distribution x <- rnorm(100,mean=0,sd=1) qqnorm(x,xlim=c(-3,3),ylim=c(-3,3)) qqline(x) } # close postscript environment dev.off() ##### ##### ##### # QQ plots of normal variables with different mean and sd postscript("qq-normal-shift-scale.ps", horizontal=FALSE) par(mfrow=c(2,2), pty="s") # first plot x <- rnorm(100,mean=0,sd=1) qqnorm(x,xlim=c(-6,6),ylim=c(-6,6)) qqline(x) abline(v=0) abline(h=0) # second plot qqnorm(x+2,xlim=c(-6,6),ylim=c(-6,6)) qqline(x+2) abline(v=0) abline(h=0) # third plot qqnorm(2*x,xlim=c(-6,6),ylim=c(-6,6)) qqline(2*x) abline(v=0) abline(h=0) # fourth plot qqnorm(2*x+2,xlim=c(-6,6),ylim=c(-6,6)) qqline(2*x+2) abline(v=0) abline(h=0) dev.off() ##### ##### ##### # QQ plots of lighter tailed, heavier tailed and # skewed distributions postscript("qq-quiz.ps",horizontal=FALSE) par(mfrow=c(2,2), pty="s") # first plot # take sample from uniform distribution x <- runif(100,min=-0.5,max=0.5) qqnorm(x) qqline(x) abline(v=0) abline(h=0) # second plot # take sample from gamma distribution x <- rgamma(100,shape=1,scale=1) x <- x-1 qqnorm(x) qqline(x) abline(v=0) abline(h=0) # third plot qqnorm(-x) qqline(-x) abline(v=0) abline(h=0) # fourth plot # take sample from t-distribution x <- rt(100, df=3) qqnorm(x) qqline(x) abline(v=0) abline(h=0) dev.off()