# This is an R-script to familiarize you with R, # and show you some of the plotting commands. # Lines starting with `#' contain comments and explanations. # to get help on any command, type `?' before the command ####################### # Some general things # ####################### # You can use R as a calculator: 5+3 # Create a vector, using the command 'c()' (for concatenate) x <- c(1,2,3,4) x # Sum all elements of x: sum(x) # Square all elements of x: x^2 # Get the third element of x: x[3] # Add an extra element: x <- c(x,10) x # Create a vector using the command 'seq()' (for sequence) x <- seq(from=1, to=10, by=.1) x # Obtain the length of x: length(x) # Create a vector using the command 'rep()' (for repeat) x <- rep(0,times=10) x #################### # load a data set: # #################### # load the car library which contains all data sets: # (Before you can do this, you need to install the package 'car', # using the menu: Packages -> Install Package(s) # You only need to do this once on each computer you work on.) library(car) # Get a description of the data set `Duncan': ?Duncan # Load the data set: data(Duncan) # Look at the data: Duncan # Get summary of the data: summary(Duncan) # If there are missing values, then these are denoted by 'NA' ##################################### # Access various parts of the data: # ##################################### # The data are stored in a matrix. # We can access the entries in the data set. For example, # for the entry in the second row and the third column: Duncan[2,3] # Obtain the first column: Duncan[,1] # Obtain the first row: Duncan[1,] # We can exclude a column or row by using the minus sign: Duncan[,-1] # We can also access the columns by their names, using a $ sign to # separate the name of the data set and the name of the column: Duncan$income # Note that the following doesn't work: income # But if we first attach the data, it does work: attach(Duncan) income # This can save you some typing. # To detach the dataset, use the command `detach(Duncan)' or `detach()' ##################### # univariate plots: # ##################### # Stem-and-leaf diagram stem(income) # Frequency histogram: hist(income, col="gray") # Probability histogram (the area under the histogram equals 1): hist(income, col="gray", probability=TRUE) # Probability histrogram with titles hist(income,col="gray",probability=TRUE,main="Histogram of percent of males in occupation earning more than $3500 in 1950",xlab="percent of males in occupation earning more than $3500", ylab="density") # Add density estimate lines(density(income)) # The command `lines' always adds a line to an existing plot. # If you want the density estimate in a new plot, use the command `plot': plot(density(income)) # Boxplot boxplot(income,col="lightgray",main="Boxplot of income") # qq-plot qqnorm(income) qqline(income) ################### # bivariate plots # ################### # Scatterplot plot(income,education) # scatterplot with nonparametric regression: plot(income,education) lines(loess.smooth(income,education)) # or in one line: scatter.smooth(income,education) # parallel boxplot of moral integration, ethnic heterogeneity, # and geographic mobility: boxplot(income, education, prestige, names=c("income", "education", "prestige"), main="Boxplot of Duncan data",col="lightgray") # parallel boxplot of income by type of occupation boxplot(income~type,main="Parallel boxplot of income by type of education", ylab="income",xlab="type of occupation",col="lightgray") ###################### # multivariate plots # ###################### # scatterplot matrix of the last three columns pairs(Duncan[2:4]) # or: pairs(Duncan[,-1]) # use different colors for different types of job, using the command `col': pairs(Duncan[2:4], col = c("red", "green3", "blue")[unclass(type)]) # also use a different plotting symbol, using the command `pch': pairs(Duncan[2:4], pch=c(1,2,3,4)[unclass(type)],col = c("red", "green3", "blue")[unclass(type)])