############# ## SHEET 4 ## ############# ################################################## ## Exercise 1 ################################################## library(MASS) ## a) set.seed(42) m <- c(0,0) Sigma <- matrix(c(1,0.7,0.7,1),nrow=2) n <- 100 data <- mvrnorm(n,m,Sigma) ## b) cor(data[,1],data[,2]) ## c) generateCorValue <- function(n,m,Sigma){ data <- mvrnorm(n,m,Sigma) return(cor(data[,1],data[,2])) } reps <- 1000 res <- replicate(reps, generateCorValue(n,m,Sigma)) #hist hist(res,main="Histogram of correlation estimates", xlab="Correlation estimates") abline(v=0.7, col=2) #sd sd(res) ## d) simSdOfEstimator <- function(reps,n,m,Sigma){ res <- replicate(reps, generateCorValue(n,m,Sigma)) return(sd(res)) } n.all <- c(10,20,50,100,200,500,1000) n.cor <- rep(NA,length(n.all)) for (i in 1:length(n.all)){ n.cor[i] <- simSdOfEstimator(reps,n.all[i],m,Sigma) } plot(n.all,n.cor,pch=19,xlab="number of observations", ylab="estimatd sd of the correlation estimator",type="b") ## e) plot(log(n.all),log(n.cor),xlab="log(number of observations)", ylab="log(estimated sd of the correlation estimator)") k <- lm(log(n.cor)~log(n.all)) abline(k) ################################################## ## Exercise 2 ################################################## library(MASS) f.qqchi=function(D,df) { ## Purpose: Draws chi QQ plot ## ---------------------------------------------------------------------- ## Arguments: D: Mahalanobis distances. ## df: Degrees of freedom for the chi^2 distribution. ## ---------------------------------------------------------------------- n <- length(D) plot(sqrt(qchisq(ppoints(n),df=df)),sort(D), ylab="Sample quantiles", xlab=expression(paste(chi,"-quantiles"))) y <- quantile(D,c(0.25,0.75)) x <- sqrt(qchisq(c(0.25,0.75),df=df)) slope <- diff(y)/diff(x) int <- y[1L]-slope*x[1L] abline(int, slope) } ## b) m <- c(0,0) Sigma <- matrix(c(1,0.7,0.7,1),nrow=2) t.n <- c(10,50,100,200) # varying sample sizes par(mfrow=c(2,2)) for(n in t.n){ data <- as.data.frame(mvrnorm(n,m,Sigma)) D <- sqrt(mahalanobis(data,mean(data),cov(data))) f.qqchi(D,df=2) } ## c) m <- c(0,0) Sigma <- matrix(c(1,0.7,0.7,1),nrow=2) t.n <- c(10,50,100,200) # varying sample sizes par(mfrow=c(2,2)) f.t <- function(x) abs(x) # a nonlinear transformation # try also exp(x), x^2 for(n in t.n){ data <- f.t(as.data.frame(mvrnorm(n,m,Sigma))) D <- sqrt(mahalanobis(data,mean(data),cov(data))) f.qqchi(D,df=2) } ## d) panel.hist <- function(x, ...){ usr <- par("usr"); on.exit(par(usr)) par(usr = c(usr[1:2], 0, 1.5) ) h <- hist(x, plot = FALSE) breaks <- h$breaks; nB <- length(breaks) y <- h$counts; y <- y/max(y) rect(breaks[-nB], 0, breaks[-1], y, col="cyan") } data <- read.table("http://stat.ethz.ch/education/semesters/ss2011/ams/clayton.dat",header=TRUE) pairs(data,diag.panel=panel.hist) ## e) D <- sqrt(mahalanobis(data,mean(data),cov(data))) f.qqchi(D,df=2) ################################################## ## Exercise 3 ################################################## data <- read.table("http://stat.ethz.ch/education/semesters/ss2011/ams/clayton.dat",header=TRUE) n <- dim(data)[1] m <- dim(data)[2] ## b) D2 <- mahalanobis(mean(data),rep(0,m),cov(data)) T2 <- n*D2 # T^2 statistic ## c) pvalue <- 1 - pf(q=(n-m)/m/(n-1)*T2,df1=m,df2=n-m) pvalue