#############
## SHEET 4 ##
#############

##################################################
## Exercise 1
##################################################
library(MASS)

## a)
set.seed(42)
m <- c(0,0)
Sigma <- matrix(c(1,0.7,0.7,1),nrow=2)
n <- 100
data <- mvrnorm(n,m,Sigma)

## b)
cor(data[,1],data[,2])

## c)
generateCorValue <- function(n,m,Sigma){
data <- mvrnorm(n,m,Sigma)
return(cor(data[,1],data[,2]))
}
reps <- 1000
res <- replicate(reps, generateCorValue(n,m,Sigma))
#hist
hist(res,main="Histogram of correlation estimates",
     xlab="Correlation estimates")
abline(v=0.7, col=2)
#sd
sd(res)

## d)
simSdOfEstimator <- function(reps,n,m,Sigma){
  res <- replicate(reps, generateCorValue(n,m,Sigma))
  return(sd(res))
}

n.all <- c(10,20,50,100,200,500,1000)
n.cor <- rep(NA,length(n.all))
for (i in 1:length(n.all)){
  n.cor[i] <- simSdOfEstimator(reps,n.all[i],m,Sigma)
}

plot(n.all,n.cor,pch=19,xlab="number of observations",
     ylab="estimatd sd of the correlation estimator",type="b")

## e)
plot(log(n.all),log(n.cor),xlab="log(number of observations)",
     ylab="log(estimated sd of the correlation estimator)")
k <- lm(log(n.cor)~log(n.all))
abline(k)


##################################################
## Exercise 2
##################################################
library(MASS)

f.qqchi=function(D,df) 
{
  ## Purpose: Draws chi QQ plot
  ## ----------------------------------------------------------------------
  ## Arguments: D:  Mahalanobis distances.
  ##           df:  Degrees of freedom for the chi^2 distribution.
  ## ----------------------------------------------------------------------
 
  n <- length(D)
  plot(sqrt(qchisq(ppoints(n),df=df)),sort(D),
         ylab="Sample quantiles", 
         xlab=expression(paste(chi,"-quantiles")))
    
  y <- quantile(D,c(0.25,0.75))
  x <- sqrt(qchisq(c(0.25,0.75),df=df))
  slope <- diff(y)/diff(x)
  int <- y[1L]-slope*x[1L]
  abline(int, slope)
}


## b)
m <- c(0,0)
Sigma <- matrix(c(1,0.7,0.7,1),nrow=2)
t.n <- c(10,50,100,200) # varying sample sizes
par(mfrow=c(2,2))
for(n in t.n){
    data <- as.data.frame(mvrnorm(n,m,Sigma))
    D <- sqrt(mahalanobis(data,mean(data),cov(data)))
    f.qqchi(D,df=2)
}

## c)
m <- c(0,0)
Sigma <- matrix(c(1,0.7,0.7,1),nrow=2)
t.n <- c(10,50,100,200) # varying sample sizes
par(mfrow=c(2,2))
f.t <- function(x) abs(x) # a nonlinear transformation
                          # try also exp(x), x^2
for(n in t.n){
    data <- f.t(as.data.frame(mvrnorm(n,m,Sigma)))
    D <- sqrt(mahalanobis(data,mean(data),cov(data)))
    f.qqchi(D,df=2)
}

## d)

panel.hist <- function(x, ...){
  usr <- par("usr"); on.exit(par(usr))
  par(usr = c(usr[1:2], 0, 1.5) )
  h <- hist(x, plot = FALSE)
  breaks <- h$breaks; nB <- length(breaks)
  y <- h$counts; y <- y/max(y)
  rect(breaks[-nB], 0, breaks[-1], y, col="cyan")
}

data <- read.table("http://stat.ethz.ch/education/semesters/ss2011/ams/clayton.dat",header=TRUE)
pairs(data,diag.panel=panel.hist)

## e)
D <- sqrt(mahalanobis(data,mean(data),cov(data)))
f.qqchi(D,df=2)


##################################################
## Exercise 3
##################################################

data <- read.table("http://stat.ethz.ch/education/semesters/ss2011/ams/clayton.dat",header=TRUE)
n <- dim(data)[1]
m <- dim(data)[2]

## b)
D2 <- mahalanobis(mean(data),rep(0,m),cov(data))
T2 <- n*D2 # T^2 statistic

## c)
pvalue <- 1 - pf(q=(n-m)/m/(n-1)*T2,df1=m,df2=n-m)
pvalue