#Importance Sampling: # Illustration of the importance of tails #target: F=normal, proposal: G=2-sided exponential, and vice versa par(mfrow=c(2,1)) x <- seq(-3,3,0.01) cst <- 2*exp(0.5)/sqrt(2*pi) weights <- cst*exp(-0.5*(abs(x)-1)^2) plot(x,weights,type="l",ylab="weights",main= "Target: Normal, Proposal: 2-sided exponential") plot(x,1/weights,type="l",ylab="weights",main= "Target: 2-sided exponential, Proposal: Normal") x <- (1-2*(runif(10000) < 0.5))*log(runif(10000)) #Sample from G gew <- cst*exp(-0.5*(abs(x)-1)^2) #Weights for importance sampling plot(sort(gew),ylab="sorted weights",type="l") kmax <- ceiling(10*(max(x)-min(x))) #Histogram of the weighted sample c0 <- min(x) - 0.05 c1 <- c0 + kmax*0.1 z <- ceiling((x-c0)*10) h <- rep(0,kmax) for (k in (1:kmax)) h[k] <- sum(gew[z==k]) plot(c0+((1:kmax)-0.5)*0.1,h*0.001,type="h",xlab="x",ylab="density" ) xx <- seq(-3,3,0.01) lines(xx,dnorm(xx),col=2) plot(x[index],gew[index],type="h") z <- cumsum(gew*(x>3))/(1:10000) z[10000] #Importance sampling approximations for 1 - P_F(3) = 0.00135 plot(1:10000,z,ylim=c(0,0.01),xlab="Number of replicates", ylab="Approximation to P(X>3)",type="l") #Approximation as a function of number of replicates abline(h=0.00135,col=4) z <- cumsum(rnorm(10000)>3)/(1:10000) lines(z,col=2) z[10000] # Direct sampling from F # Exchanging the roles of F und G: par(mfrow=c(2,1)) cst <- 2*exp(0.5)/sqrt(2*pi) n <- 10000 x <- rnorm(n) gew <- exp(0.5*(abs(x)-1)^2)/cst # weights for importance sampling plot(sort(gew)/n,type="l",ylab="sorted weights") z <- cumsum(gew*(x>3))/(1:n) z[n] #Importance sampling approximation of 1 - P_G(3) = 0.0249 plot(1:n,z,type="l",ylim=c(0,0.05), xlab="number of replicates",ylab="Approximation of P(X>3)") abline(h=0.0249,col=4) #Importance sampling for normal tails: Proposal=shifted normal c <- 5 result <- matrix(0,nrow=20,ncol=2) for (i in (1:20)) { x <- rnorm(10000) result[i,1] <- mean(x>c) result[i,2] <- mean((x>0)*exp(-c*x - 0.5*c^2)) } result <- result/(1-pnorm(c)) -1 boxplot(result) mean(abs(result[,1])) mean(abs(result[,2])) #extreme case: P(X > 10), need to use log 1-pnorm(10) # underflow log(pnorm(-10)) -0.5*log(2*pi) - 50 - log(10) # from asymptotics x <- rnorm(10000) log(sum(exp(-10*x)[x>0])) - log(10000) -50 # Importance sampling approximation # 3. Computing the P-value in Fisher's analysis of Mendel's data pchisq(42,84) #exact x <- rgamma(1000,42) # 1. proposal = Gamma(42,1) exp(21-42*log(2))*mean(exp(0.5*(x-42))*(x<42)) # Importance sampling x <- rchisq(1000,42) # 2. proposal = chisquare(42)=Gamma(21,0.5) mean( (x/42)^21*(x<42))*(21)^(21)*gamma(21)/gamma(42) x <- 42 +2*log(runif(1000)) #3. proposal=shifted and inverted exponential mean(exp(-(x-42))*(x/42)^41*(x<42))*exp(-21+41*log(21) - lgamma(42)) #Graphs of the different proposals x <- seq(35,50,length=1000) plot(x,exp(-0.5*(x-42))*(x/42)^41*(x<42),type="l") # h*target (optimal) lines(x,exp(-(x-42))*(x/42)^41,col="2") # 1. proposal lines(x,exp(-0.5*(x-42))*(x/42)^20,col="3") # 2. proposal lines(x,exp(0.5*(x-42))*(x<42),col=4) #3. proposal