######### DEMO CODE Kapitel 5 ################## ### 19.10.2009 Vorlesung Using R ################################################ ## Missing Values ############################## d.sport <-read.table("http://stat.ethz.ch/Teaching/Datasets/WBL/sport.dat", header=TRUE) t.kugel <- d.sport[,'kugel'] t.kugel # create artificial missing data t.kugel[2] <- NA t.kugel # Which elements of t.kugel are missing? t.kugel == NA is.na(t.kugel) which(is.na(t.kugel)) # simple statistical functions mean(t.kugel) mean(t.kugel,na.rm=TRUE) # wilcox.test, t.test use argument "na.action=na.omit" wilcox.test(t.kugel) wilcox.test(t.kugel,na.action=na.omit) # find out about default na.action: # getOption("na.action") # [1] "na.omit" # weitere Options mit ?na.action # Drop the NA elements t.kugel[!is.na(t.kugel)] # na.omit(df) drops rows of data.frame df na.omit(t.kugel) #same as above ## ======================================================== ## Distributions ## ======================================================== # The normal distribution is characterized by: # Density function: dnorm(0.5) dnorm(0:5, mean=0, sd=10) # Cumulative Probability function: pnorm(c(1, 1.96)) # Quantile function: qnorm(c(0.25,0.975), mean=0, sd=1) qnorm(c(0.25,0.975), mean=100, sd=10) ## Visualization of distributions ############################## # Discrete distributions plot(0:15, dpois(0:15,lambda=3.5),type="h") lines(0:15, dpois(0:15,lambda=3.5)) #Continuous distributions curve(dnorm(x,5,2), xlim=c(-1,10),xlab="x", ylab="density", main="normal distribution") ## Random Numbers ############################## rnorm(5, mean=2, sd=2) # Poisson distribution: dpois, ppois, qpois, rpois rpois(10, lambda=3.5) # uniform distribution runif(4) runif(4) set.seed(27) runif(1) set.seed(27) runif(1) ## ======================================================== ## Elements of S: Objects ## ======================================================== # Get information about the structure of an object: str(d.sport) ## Object Oriented Programming ############################## # Each object has a class class(d.sport) # spezialised versions of plot etc exist plot(speer~kugel, data=d.sport) # The most basic generic function is print! # Example: data(sleep) r.t <- wilcox.test(extra~group,data=sleep) print(r.t) # same as simply typing "r.t" str(r.t) # has all the information, ## print.htest selects the test outcome automatically # ``S4 classes'', package {methods} methods(print) # to get a list of all print methods available getAnywhere(print.htest) # to get actual function definition of *methods # statistical methods attach a specific class attribute to their result # Example: Linear regression function lm r.lm <- lm(speer~kugel,data=d.sport) class(r.lm) print(r.lm) # actually calls print.lm summary(r.lm) # actually calls print.summary.lm plot(r.lm) ## explained later (next time?) ## Attributes ############################## # class, names names(d.sport) row.names(d.sport) dim(d.sport) # all attributes attributes(d.sport) ## Lists ############################## t.v <- c(Hans=2.0,Fritz=-1.0,Elsa=9.0,Trudi=0.4,Olga=100.0) t.v list(t.v, you='nice') ## most statistical functions produce a list that collects the results t.l <- hist(t.kugel, plot=FALSE) t.l plot(t.l) # actually calls plot.hist # Get a sublist of the list t.l[2:3] str(t.l[2:3]) # result is also a list t.l[c("breaks","intensities")] # Get a component [[ ]] t.l[[2]] str(t.l[[2]]) # result is a vector t.l[['counts']] t.l$counts # Note: t.l['counts'] is a list with one component t.l['counts'] str(t.l['counts']) d.sport$kugel # returns a vector d.sport$kugel[4:6] ## Make a list of subsets of a vector: split(1:7, c(1, 1, 2, 3, 3, 2, 1)) # unlist concatenates all elements of all components into a single vector unlist(t.l[1:2]) ## ======================================================== ## ======================================================== ## pdf(file="NormalDensityPlot.pdf") ## plot(x=seq(-4,4,by=0.1), ## y=dnorm(seq(-4,4,by=0.1),mean=0,sd=1), ## type="l",xaxp=c(-4,4,8),ylim=c(0,0.45), ## xlab="x",ylab="Density",main="Normal Distribution N(mean=0, sd=1)", ## asp=10) ## abline(h=0,lty=5) ## abline(v=0.5,lty=3) ## abline(h=dnorm(0.5,mean=0,sd=1),lty=3) ## dev.off() ## pdf(file="NormalCumulativePlot.pdf") ## plot(x=seq(-4,4,by=0.1), ## y=pnorm(seq(-4,4,by=0.1),mean=0,sd=1), ## type="l",xaxp=c(-4,4,8), ## xlab="x",ylab="Cumulative Probability",ylim=c(0,1), ## main="Normal Distribution N(mean=0, sd=1)", asp=6) ## abline(h=c(0,1),lty=5,col="grey60") ## abline(v=1,lty=3) ## abline(h=pnorm(1,mean=0,sd=1),lty=3) ## abline(v=1.96,lty=4) ## abline(h=pnorm(1.96,mean=0,sd=1),lty=4) ## dev.off() ## pdf(file="NormalQuantilePlot.pdf") ## plot(x=seq(-4,4,by=0.1), ## y=pnorm(seq(-4,4,by=0.1),mean=0,sd=1), ## type="l",xaxp=c(-4,4,8), ## xlab="x",ylab="Cumulative Probability",ylim=c(0,1), ## main="Normal Distribution N(mean=0, sd=1)",asp=6) ## abline(h=c(0,1),lty=5,col="grey60") ## abline(h=0.25,lty=3) ## abline(v=qnorm(0.25, mean=0, sd=1),lty=3) ## abline(h=0.975,lty=4) ## abline(v=qnorm(0.975, mean=0, sd=1),lty=4) ## dev.off()