#Metropolis-Hastings: Independence sampler and random walk proposals

#Independence sampler, Ziel: F=Normal, Vorschlag: G=2-seitig exponential

f.indep.sampler<- function(nrep)
{
  ## Purpose: Independence sampler with two-sided exponential as proposal
  ##          and normal target
  ## -------------------------------------------------------------------------
  ## Arguments: nrep = sample size
  ## -------------------------------------------------------------------------
  ## Author: Hans-Ruedi Kuensch, Date: 22 Jan 2002, 18:25
  xp <- (1-2*(runif(nrep+1) < 0.5))*log(runif(nrep+1)) #Vorschlags-Stichprobe
  tacc <- rep(T,nrep)
  for (i in (1:nrep)) {
    if (log(runif(1)) > 0.5*((abs(xp[i])-1)^2 - (abs(xp[i+1])-1)^2)) {
      xp[i+1] <- xp[i]
      tacc[i] <- F
    }
  }
  list(sample=xp[-1],tacc=tacc)
}

nrep <- 10000
erg <- f.indep.sampler(nrep)
x <- erg$sample
qqnorm(x)
par(mfrow=c(1,2))
plot(x[-nrep],x[-1])  # scatterplot of two consecutive values of the chain
                      # the rejections show up on the diagonal.
plot(x[1:(nrep-2)],x[3:nrep])
sum(erg$tacc)
plot(x,type="l")
acf(x)

br <- c(-12,seq(-4,4,0.2),12) # class boundaries for a histogram 
hg <- hist(x[erg$tacc],breaks=br,plot=F)$counts
                # Distribution of accepted values
hf <- hist(x[-erg$tacc],breaks=br,plot=F)$counts
                # Distribution of duplicated values
par(mfrow=c(1,1))
barplot(height=rbind(hf,hg-hf),main=
        "Histogram of accepted and duplicated values")

# exchanging target and proposal 

f.indep.sampler.2<- function(nrep)
{
  ## Purpose: Independence sampler with normal as proposal
  ##          and twosided exponential target
  ## -------------------------------------------------------------------------
  ## Arguments: nrep = sample size
  ## -------------------------------------------------------------------------
  ## Author: Hans-Ruedi Kuensch, Date: 22 Jan 2002, 18:25
  xp <- rnorm(nrep+1) #Vorschlags-Stichprobe
  tacc <- rep(T,nrep)
  for (i in (1:nrep)) {
    if (log(runif(1)) > 0.5*((abs(xp[i+1])-1)^2 - (abs(xp[i])-1)^2)) {
      xp[i+1] <- xp[i]
      tacc[i] <- F
    }
  }
  list(sample=xp[-1],tacc=tacc)
}

nrep <- 10000
erg <- f.indep.sampler.2(nrep)
x <- erg$sample
par(mfrow=c(1,2))
qqnorm(x)
q <- log(nrep+1) - log(2*(1:floor(0.5*(nrep+1))))
q <- c(-q,q[length(q):1]) # quantiles of the target 
plot(q,sort(x),xlab="theoretical quantiles", ylab="sample quantiles")
plot(x[-nrep],x[-1])  # scatterplot 
plot(x[1:(nrep-2)],x[3:nrep])
sum(erg$tacc)
plot(x,type="l")
acf(x)

br <- c(-12,seq(-6,6,0.2),12) 
hg <- hist(x[erg$tacc],breaks=br,plot=F)$counts
hf <- hist(x[-erg$tacc],breaks=br,plot=F)$counts
par(mfrow=c(1,1)
barplot(height=rbind(hf,hg-hf),main=
        "Histogram of accepted and duplicated values")


#(Normal) random walk Metropolis: Generic function

f.metrop <- function(x0,sigma,nrep,r.target, ...)
{
  ## Purpose: Metropolis algorithm with Gaussian random walk proposals
  ## -------------------------------------------------------------------------
  ## Arguments: x0 = (vector of) starting values, sigma = vector of
  ##            standard deviations for the random walk, nrep=number of
  ##            replicates, r.target = ratio of target densities
  ## -------------------------------------------------------------------------
  ## Author: Hans-Ruedi Kuensch, Date: 22 Jan 2002, 19:21
  d <- length(x0)
  x <- matrix(rnorm(d*nrep),nrow=nrep,ncol=d)
  y <- matrix(0,nrow=nrep-1,ncol=d)
  x <- x%*%diag(sigma,nrow=d)
  x[1,] <- x0
  nrej <- 0
  for (i in (2:nrep)) {
    x[i,] <- x[i-1,]+x[i,]
    y[i-1,] <- x[i,]
    if (runif(1) > r.target(x[i,],x[i-1,],...)) {
      x[i,] <- x[i-1,]
      nrej <- nrej+1
    }
  }
  list(sample=x,nrej=nrej,prop=y)
}

#Example 1: target = twosided exponential distribution
r.twosided.exp <- function(x1,x2) exp(abs(x2)-abs(x1))
erg <- f.metrop(0,1,5000,r.twosided.exp)
erg$nrej
x <- erg$sample
hist(x,breaks=40,freq=F)
y <- seq(-6,6,length=1000)
lines(y,0.5*exp(-abs(y)))
par(mfrow=c(1,2))
plot(x,type="l")
acf(x,lag.max=100)
par(mfrow=c(1,1))


#Example 2: target = mixture of 2 bivariate normals
f.contour.mixture <- function(p,mu,scale)
{
  ## Purpose: Plotting the density of a mixture of 2 bivariate normals
  ##          The first normal is the standard normal. 
  ## -------------------------------------------------------------------------
  ## Arguments: p = mixture weight, mu=location of second normal,
  ##            scale = scale of second normal (scalar) 
  ## -------------------------------------------------------------------------
  ## Author: Hans-Ruedi Kuensch, Date: 22 Jan 2002, 22:07
  x <- seq(-2.5,4,length=32)
  z <- p*exp(-0.5*outer(x^2,x^2,FUN="+"))+
    (1-p)*exp(-0.5*outer((x-mu[1])^2,(x-mu[2])^2,FUN="+")/scale^2)
  contour(x,x,z,nlevels=20)}


r.mixture <- function(x1,x2,p,mu,scale)
{
  ## Purpose: Computing the ratio of a mixture density at two places x1, x2
  ## -------------------------------------------------------------------------
  ## Arguments: p, mu scale as in the previous function
  ## -------------------------------------------------------------------------
  ## Author: Hans-Ruedi Kuensch, Date: 22 Jan 2002, 21:25
  r <- p*exp(-0.5*sum(x1^2)) + (1-p)*exp(-0.5*sum((x1-mu)^2)/scale^2)
  r <- r/(p*exp(-0.5*sum(x2^2)) + (1-p)*exp(-0.5*sum((x2-mu)^2)/scale^2))
  r
}


mu <- c(3,3)
scale <- 0.5
f.contour.mixture(0.7,mu,scale)
erg <- f.metrop(c(0,0),c(1,1),5000,r.mixture,0.7,mu,scale)
erg$nrej
x <- erg$sample
points(x[,1],x[,2])
par(mfrow=c(2,1))
plot(x[1:500,1],type="l")
lines(x[1:500,2],col=2)
acf(x[,1],lag.max=100)
par(mfrow=c(1,1))

#Example 3: posterior for the parameters of a normal distribution

x <- rnorm(4) #generating data
xb <- mean(x)
n <- length(x)
xs <- var(x)*(n-1)/n

#Choice of hyperparameter
#1. not informative
xi <- 0
ka2 <- 100*xs
gam <- 1
lam <- 1
#2. informative for theta, in agreement with data
xi <- xb
ka2 <- xs/n
gam <- 1
lam <- 1
#3. informative for theta, conflicting the data
xi <- xb+3*sqrt(xs/n)
ka2 <- xs/n
gam <- 1
lam <- 1 

f.post.contour <- function(xi,ka2,gam,lam,n,xb,xs)
{
  ## Purpose: Plotting contours of the posterior. The main task is to define
  ##          a suitable domain where the posterior is concentrated 
  ## -------------------------------------------------------------------------
  ## Arguments: xi,ka2,al,lam=hyperparameters, xb,xs=mle
  ##            n=sample size (n=0 gives the prior !)
  ## -------------------------------------------------------------------------
  ## Author: Hans-Ruedi Kuensch, Date:  1 Nov 1999, 18:26
  s2 <- lam/gam #gam/lam is the prior mean of 1/sigma^2
  a <- s2/(s2+n*ka2)
  mu <- a*xi+(1-a)*xb # posterior mean of theta if sigma^2=s2
  th <- seq(mu-3*sqrt(a*ka2),mu+3*sqrt(a*ka2),length=32)
                                        # defines the range for theta
  lam <- lam+0.5*n*xs 
  gam <- gam+0.5*n
     #if theta=xb, posterior for 1/sigma^2 is Gamma(lam,gam)
  si <- sqrt(lam)*seq(1/sqrt(qgamma(0.99,gam)),1/sqrt(qgamma(0.01,gam)),
                      length=32) # defines the range for sigma 
  z <- 0.5*outer((th-xi)^2,rep(1,32))/ka2+(2*gam+1)*outer(rep(1,32),log(si))+
    outer(lam+0.5*n*(th-xb)^2,si^2,FUN="/")
           #log posterior on the lattice of values for theta and sigma 
  contour(th,si,-z,nlevels=20,xlab="theta",ylab="sigma")
  }


#Ratios of the posterior density
r.post <- function(x1,x2,xi,ka2,gam,lam,n,xb,xs)
{
  ## Purpose: Computing ratio of posteriori densities at x1 and x2
  ##          for an i.i.d. normal model
  ## ----------------------------------------------------------------------
  ## Arguments: x=(mu,log(sigma)) = Parameters of the normal distribution
  ##            xi,ka2,gam,lam = Parameters of the prior
  ##            n=number of data, xb=sample mean, xs=sample variance
  ## ----------------------------------------------------------------------
  ## Author: Hans-Ruedi Kuensch, Date: 24 Jan 2004, 17:25
  quot <- 0.5*((x2[1]-xi)^2-(x1[1]-xi)^2)/ka2 + (2*gam+n)*(x2[2]-x1[2]) +
    (lam+0.5*n*(xs+(x2[1]-xb)^2))*exp(-2*x2[2]) -
    (lam+0.5*n*(xs+(x1[1]-xb)^2))*exp(-2*x1[2])
  exp(quot)
}

f.post.contour(xi,ka2,gam,lam,n,xb,xs)
points(xb,sqrt(xs),pch=15)
title(main="log posterior density")
# Few iterations (with log scale for sigma) to explain the idea
metrop <- f.metrop(x0=c(xb,0.5*log(xs)),sigma=c(0.6,0.3),nrep=21,
                   r.target=r.post,xi,ka2,gam,lam,n,xb,xs)
sample <- metrop$sample
sample[,2] <- exp(sample[,2])
prop <- metrop$prop
prop[,2] <- exp(prop[,2])
points(sample[,1],sample[,2],col=2)
arrows(sample[-21,1],sample[-21,2],prop[,1],prop[,2],col=4)
#Result of many iterations 
metrop <- f.metrop(x0=c(xb,0.5*log(xs)),sigma=c(0.6,0.3),nrep=5000,
                   r.target=r.post,xi,ka2,gam,lam,n,xb,xs)
metrop$nrej  # number of rejections
param <- metrop$sample
points(param[,1]+0.04*(runif(5000)-0.5),
       exp(param[,2])+0.04*(runif(5000)-0.5),cex=0.5)
# overlapping points made visible by jittering
par(mfrow=c(2,1))
plot(param[1:500,1],type="l")
plot(param[1:500,2],type="l")
#Autocorrelations 
acf(param[,1],lag.max=100)
acf(param[,2],lag.max=100)


# Discrete version of random walk Metropolis-Hastings

#defining the target distribution (mixture of 2 discretized Beta-distributions)
a1 <- 0.3
n1 <- 500
a2 <- 0.5
n2 <- 500

k <- 0.01*(1:99)
p1 <- k^(a1*n1)*(1-k)^((1-a1)*n1)
p2 <- k^(a2*n2)*(1-k)^((1-a2)*n2)
# work with logs to avoid underflow in target probabilities
lp1 <- a1*n1*log(k) + (1-a1)*n1*log(1-k) +log(0.4) - log(sum(p1))
lp2 <- a2*n2*log(k) + (1-a2)*n2*log(1-k) +log(0.6) - log(sum(p2))
lmn <- pmin(lp1,lp2)
lmx <- pmax(lp1,lp2)
lp0 <- lmx + log(1+exp(lmn-lmx))
p0 <- exp(lp0) # same as p0 <- 0.4*p1/sum(p1) + 0.6*p2/sum(p2)
plot(k,p0,type="h")

#defining the transition matrix
m <- 15 # range of the random walk
q <- outer(1:99,1:99,FUN=function(x,y) {1<= abs(x-y) & abs(x-y) <= m})
q <- q + outer(1:99,1:99,FUN=function(x,y) {abs(x-y) >=99-m})
q <- q/(2*m) # proposal
a <- outer(lp0,lp0, FUN="-")
a <- exp(pmin(0,t(a))) #acceptance 
p.t <- a*q
rej <- 1-apply(p.t,1,sum) # rejection probabilities
sum(rej*p0) # rejection probability in stationary state
p.t <- p.t + diag(rej) # adjust diagonal

sum(abs(p0-as.vector(t(p.t)%*%p0))) #check stationarity

ev <- eigen(p.t)
plot(abs(ev$values),type="h")
ev$value[2] #geometric convergence rate to the stationary distribution

mimage(q)
mimage(a*q)
p.p <- p.t
mimage(p.p)
p.p <- p.p%*%p.p #P^2, P^4, P^8 etc.


mimage <- function(mat, ncol = 40, zlim=range(mat))
{
  ## Purpose: Color representation of a matrix 
  ## ----------------------------------------------------------------------
  ## Arguments:
  ## ----------------------------------------------------------------------
  ## Author: Hans-Rudolf Kuensch, Date:  3 Dec 2007, 16:47

  ## Interpolating a 'sequential' ColorBrewer palette
  YlOrBr <- c("#FFFFD4", "#FED98E", "#FE9929", "#D95F0E", "#993404")

  stopifnot(length(d <- dim(mat)) == 2)
  image(x=1:d[2], y=1:d[1], z=t(mat)[,d[1]:1], axes = FALSE,
        xlab = "j", ylab  = "i",zlim=zlim,
        col = colorRampPalette(YlOrBr, space = "Lab")(ncol))
  axis(1)
  axis(2, at= 1:d[2], labels= as.character(d[2]:1))
}