# This is an R-script to familiarize you with R,
#  and show you some of the plotting commands. 
# Lines starting with `#' contain comments and explanations.
#  to get help on any command, type `?' before the command


#######################
# Some general things #
#######################

# You can use R as a calculator:
5+3

# Create a vector, using the command 'c()' (for concatenate)
x <- c(1,2,3,4)
x

# Sum all elements of x:
sum(x)

# Square all elements of x:
x^2

# Get the third element of x:
x[3]

# Add an extra element:
x <- c(x,10)
x

# Create a vector using the command 'seq()' (for sequence)
x <- seq(from=1, to=10, by=.1)
x

# Obtain the length of x:
length(x)

# Create a vector using the command 'rep()' (for repeat)
x <- rep(0,times=10)
x


####################
# load a data set: #
####################

# load the car library which contains all data sets:
# (Before you can do this, you need to install the package 'car',
#  using the menu: Packages -> Install Package(s)
#  You only need to do this once on each computer you work on.)
library(car)

# Get a description of the data set `Duncan':
?Duncan

# Load the data set:
data(Duncan)

# Look at the data:
Duncan

# Get summary of the data:
summary(Duncan)

# If there are missing values, then these are denoted by 'NA'


#####################################
# Access various parts of the data: #
#####################################

# The data are stored in a matrix. 
# We can access the entries in the data set. For example, 
# for the entry in the second row and the third column:
Duncan[2,3]

# Obtain the first column:
Duncan[,1]

# Obtain the first row:
Duncan[1,]

# We can exclude a column or row by using the minus sign:
Duncan[,-1]

# We can also access the columns by their names, using a $ sign to 
# separate the name of the data set and the name of the column:
Duncan$income

# Note that the following doesn't work:
income

# But if we first attach the data, it does work:
attach(Duncan)
income

# This can save you some typing.
# To detach the dataset, use the command `detach(Duncan)' or `detach()'


#####################
# univariate plots: #
#####################

# Stem-and-leaf diagram
stem(income)

# Frequency histogram:
hist(income, col="gray")

# Probability histogram (the area under the histogram equals 1):
hist(income, col="gray", probability=TRUE)

# Probability histrogram with titles
hist(income,col="gray",probability=TRUE,main="Histogram of percent of males in occupation earning
more than $3500 in 1950",xlab="percent of males in occupation earning more than $3500",
ylab="density")

# Add density estimate
lines(density(income))

# The command `lines' always adds a line to an existing plot. 
# If you want the density estimate in a new plot, use the command `plot': 
plot(density(income))

# Boxplot
boxplot(income,col="lightgray",main="Boxplot of income")

# qq-plot
qqnorm(income)
qqline(income)


###################
# bivariate plots #
###################

# Scatterplot
plot(income,education)

# scatterplot with nonparametric regression:
plot(income,education)
lines(loess.smooth(income,education))

# or in one line:
scatter.smooth(income,education)

# parallel boxplot of moral integration, ethnic heterogeneity, 
# and geographic mobility:
boxplot(income, education, prestige, names=c("income", "education", "prestige"),
main="Boxplot of Duncan data",col="lightgray")

# parallel boxplot of income by type of occupation
boxplot(income~type,main="Parallel boxplot of income by type of education",
ylab="income",xlab="type of occupation",col="lightgray")


######################
# multivariate plots #
######################

# scatterplot matrix of the last three columns
pairs(Duncan[2:4])

# or:
pairs(Duncan[,-1])

# use different colors for different types of job, using the command `col':
pairs(Duncan[2:4], col = c("red", "green3", "blue")[unclass(type)])

# also use a different plotting symbol, using the command `pch':
pairs(Duncan[2:4], pch=c(1,2,3,4)[unclass(type)],col = c("red", "green3", "blue")[unclass(type)])