##################################################################### ## Functions for hidden Markov models (discrete states) ## #################################################################### #Simulation of Gaussian mixtures with Markov regime f.simul <- function(Tt,mu,sgm,pr,p0) { ## Purpose: Simulating from a Gaussian mixture with Markov regime ## ---------------------------------------------------------------------- ## Arguments: mu,sgm = Parameters of the Gaussian components, ## p0=initial distribution, pr=transition probability matrix ## ---------------------------------------------------------------------- ## Author: Hans-Ruedi Kuensch, Date: 2 Apr 2003, 17:49 x <- rep(1,Tt) m <- length(mu) pc <- pr for (i in (1:m)) pc[i,] <- cumsum(pr[i,]) x[1] <- sample(1:m,1,prob=p0) for (tt in (2:Tt)) { x[tt] <- min((1:m)[pc[x[tt-1],]>runif(1)]) } y <- rnorm(Tt,mu[x],sgm[x]) list(obs=y,states=x) } mu <- c(0,1) #setting the parameters sgm <- c(1,1) p0 <- c(0.5,0.5) pr <- rbind(c(0.8,0.2),c(0.2,0.8)) Tt <- 100 erg.sim <- f.simul(Tt,mu,sgm,pr,p0) #computing observation densities b <- outer(mu,erg.sim$obs,FUN="-")/sgm b <- dnorm(b)/sgm #true states vs. P[x_t=2|y_t] plot(1:Tt,erg.sim$states-1) points(1:Tt,b[2,]/(b[1,]+b[2,]),type="h",col=2) abline(h=0.5,lty=3) #Missclassification using only the observation at the same time sum((erg.sim$obs>0.5)==(erg.sim$states-1)) #Filtering of an arbitrary Hidden Markov Model f.filter <- function(b,pr,p0) { ## Purpose: Computing the filter probabilities fx and the predictions fy ## of the data in a HMM ## ---------------------------------------------------------------------- ## Arguments: b=conditional observation densities (b(k,t)=p(y_t|x_t=k)) ## pr=transition probabilites, p0=initial probability ## ---------------------------------------------------------------------- ## Author: Hans-Ruedi Kuensch, Date: 2.4.03/12.9.03 Tt <- ncol(b) m <- nrow(b) fx <- matrix(p0,nrow=m,ncol=Tt) fy <- rep(1,Tt) fx[,1] <- b[,1]*fx[,1] fy[1] <- sum(fx[,1]) fx[,1] <- fx[,1]/fy[1] for (ti in (2:Tt)) { fx[,ti] <- b[,ti]*(fx[,ti-1]%*%pr) fy[ti] <- sum(fx[,ti]) fx[,ti] <- fx[,ti]/fy[ti] } list(fx=fx,fy=fy) } erg.filter <- f.filter(b,pr,p0) #plotting true states vs. P[x_t=2|y_{1:t}] plot(1:Tt,erg.sim$states-1) points(1:Tt,erg.filter$fx[2,],type="h",col=2) abline(h=0.5,lty=3) #Missclassification using observations up to time t sum((erg.filter$fx[2,]>0.5)==(erg.sim$states-1)) #Smoothing f.smooth <- function(b,fx,fy,pr) { ## Purpose: Smoothing in a HMM ## ---------------------------------------------------------------------- ## Arguments: b=observation densities, fx=filter distributions, ## fy=predictions of observations, ## pr=transition probabilities ## Output: margin=marginal smoother probabilities ## pairs=smoother probabilities for successive pairs ## ---------------------------------------------------------------------- ## Author: Hans-Ruedi Kuensch, Date: 3 Apr 2003, 14:30 Tt <- ncol(b) m <- nrow(b) r <- matrix(1,nrow=m,ncol=Tt) sm2 <- array(1,dim=c(m,m,Tt-1)) for (ti in (Tt:2)) { mat <- t(matrix(r[,ti]*b[,ti],m,m)) mat <- pr*mat/fy[ti] r[,ti-1] <- apply(mat,1,sum) sm2[,,ti-1] <- matrix(fx[,ti-1],m,m)*mat } list(margin=r*fx,pairs=sm2) } erg.smooth <- f.smooth(b,erg.filter$fx,erg.filter$fy,pr) #plotting true states vs. P[x_t=2|y_{1:T}] plot(1:Tt,erg.sim$states-1) points(1:Tt,erg.smooth$margin[2,],type="h",col=2) abline(h=0.5,lty=3) #Missclassification using all observations points((erg.smooth$margin[2,301:400]>0.5)+0.01,col=3) #----------------------------------------------------------------------- #Fitting a Gaussian mixture with Markov regime to #data of Christian Stricker (evoked potentials in a nerve cell). jan07p1 <- read.table("jan07p1.dat",header=T) plot(jan07p1$amplit,type="l") hist(jan07p1$amplit,freq=F) y <- jan07p1$amplit Tt <- length(y) miss <- (1:Tt)[is.na(y)] obs <- (1:Tt)[-miss] #Initialization m <- 4 y1 <- y[(!is.na(y)) & (y <= -15)] y2 <- y[(!is.na(y)) & (y > -15) & (y <=-10)] y3 <- y[(!is.na(y)) & (y > -10) & (y <=-4)] y4 <- y[(!is.na(y)) & (y > -4)] mu <- c(mean(y1),mean(y2),mean(y3),mean(y4)) sgm <- c(var(y1),var(y2),var(y3),var(y4)) sgm <- sqrt(sgm) p0 <- c(length(y1),length(y2),length(y3),length(y4)) p0 <- p0/sum(p0) pr <- t(matrix(p0,m,m)) niter <- 200 mu.save <- matrix(mu,m,niter) sgm.save <- matrix(sgm,m,niter) p0.save <- matrix(p0,m,niter) pr.save <- array(pr,dim=c(m,m,niter)) loglik <- rep(0,niter) for (i.rep in (2:niter)) { #E-step b <- matrix(1,m,Tt) b[,obs] <- outer(mu,y[obs],FUN="-")/sgm b[,obs] <- dnorm(b[,obs])/sgm erg.f <- f.filter(b,pr,p0) loglik[i.rep-1] <- sum(log(erg.f$fy)) erg.s <- f.smooth(b,erg.f$fx,erg.f$fy,pr) margin <- erg.s$margin pairs <- erg.s$pairs #M-step p0 <- apply(margin[,-Tt],1,mean) p0.save[,i.rep] <- p0 pr <- apply(pairs,c(1,2),mean)/p0 pr.save[,,i.rep] <- pr sum.margin <- apply(margin[,obs],1,sum) mu <- drop(margin[,obs]%*%y[obs])/sum.margin mu.save[,i.rep] <- mu sgm <- apply(margin[,obs]*(outer(mu,y[obs],FUN="-"))^2,1,sum) sgm <- sqrt(sgm/sum.margin) sgm.save[,i.rep] <- sgm } #Results mu.save[,niter] sgm.save[,niter] round(pr.save[,,niter],3) p0.save[,niter] sgm.save[,niter]^2 #Checking the iteration, smoothing estimate of the regime, #presentation of results irep <- n.iter+1 b <- matrix(1,m,Tt) b[,obs] <- outer(mu,y[obs],FUN="-")/sgm b[,obs] <- dnorm(b[,obs])/sgm erg.f <- f.filter(b,pr,p0) loglik[i.rep-1] <- sum(log(erg.f$fy)) plot(loglik) # Improvement in the log likelihood during the iteration -2*loglik[niter]+ m*(m+1) #AIC margin <- f.smooth(b,erg.f$fx,erg.f$fy,pr)$margin z <- mu[apply(margin,2,which.max)] par(mfrow=c(2,1)) plot(y[1:500],type="l", xlab="Zeit",ylab="Amplitude") lines(z[1:500],col="4") plot(480:Tt,y[480:Tt],type="l",xlab="Zeit",ylab="Amplitude") lines(480:Tt,z[480:Tt],col="4") #marginal distribution hist(jan07p1$amplit,freq=F,ylim=c(0,0.06)) x <- seq(-30,2,0.1) dichte <- matrix(0,4,length(x)) for (i in (1:4)) { dichte[i,] <- p0.save[i,niter]*dnorm(x,mu.save[i,niter],sgm.save[i,niter]) lines(x,dichte[i,],col=i+1) } lines(x,apply(dichte,2,sum)) #empirical and theoretical correlations mu0 <- mu.save[,niter]-sum(p0.save[,niter]*mu.save[,niter]) kov <- sum(p0.save[,niter]*(sgm.save[,niter]^2 + mu0^2)) pr <- pr2 <- pr.save[,,niter] for (h in (1:20)) { kov <- c(kov,sum(pr2*((p0.save[,niter]*mu0)%o%mu0))) pr2 <- pr%*%pr2 } kov <- kov/kov[1] plot(0:20,kov,type="h") abline(h=2/sqrt(length(y)),col=2) #----------------------------------------------------------------------- ####################################################################### ## Blockwise MCMC for AR(1) with t-distributed observation noise ### ####################################################################### kalman.sim <- function(phi,s2eps,s2v,y) { ## Purpose: Forward computation backward simulation for a Gaussian AR(1) ## Gaussian observation noise. ## ---------------------------------------------------------------------- ## Arguments: phi, s2eps = Parameters of the AR(1) ## s2v = variance of the observation noise (time dependent) ## y = observations ## ---------------------------------------------------------------------- ## Author: Hans-Ruedi Kuensch, Date: 26 May 2005, 10:00 n <- length(y) mf <- rep(0,n) #initializing filter means Rf <- rep(0,n) #initializing filter variances mp <- rep(0,n+1) #initializing prediction means Rp <- rep(s2eps/(1-phi^2),n+1) #initializing prediction variances x <- rnorm(n) #initializing sample for (i in (1:n)){ g <- Rp[i]/(s2v[i]+Rp[i]) #Kalman gain mf[i] <- mp[i] + g*(y[i]-mp[i]) #filter mean from prediction mean Rf[i] <- Rp[i]*(1-g) #filter variance from prediction variance Rp[i+1] <- phi^2*Rf[i] + s2eps #Prediction variance from filter variance mp[i+1] <- phi*mf[i] #Prediction mean from filter mean } x[n] <- mf[n] + sqrt(Rf[n])*x[n] #sampling last value for (i in ((n-1):1)){ #backward sampling g <- phi*Rf[i]/Rp[i+1] x[i] <- mf[i]+g*(x[i+1]-mp[i+1]) + sqrt(Rf[i]*(1-g*phi))*x[i] } x } mcmc.tnoise <- function(phi,s2eps,nu,c,y,niter,thin=5) { ## Purpose: blockwise mcmc for AR(1) with t-distributed observation noise ## ---------------------------------------------------------------------- ## Arguments: phi,s2eps = Parameters of the AR(1) ## nu, c = parameters of the noise distribution which is written ## as c*N(0,1)/sqrt{Gamma(nu/2,nu/2) ## niter=number of iterations of the Gibbs sampler ## thin=thinning factor (only results for iteration equal a ## multiple of thin are returned) ## ---------------------------------------------------------------------- ## Author: Hans-Ruedi Kuensch, Date: 26 May 2005, 10:33 nu <- nu/2 c <- c^2 n <- length(y) x <- matrix(0,nrow=niter%/%thin,ncol=n) s <- matrix(1,nrow=niter%/%thin,ncol=n) s.act <- rep(1,n) for (i in (2:niter)){ x.act <- kalman.sim(phi,s2eps,s2v=c/s.act,y) #updating the states rate <- nu+0.5*(y-x.act)^2/c s.act <- rgamma(n,shape=nu+0.5,rate=rate) #updating the scale if ((i %% thin)==0) { #saving the actual values if i = integer*thin x[i%/%thin,] <- x.act s[i%/%thin,] <- s.act } } list(states=x,scale=1/sqrt(s)) } #Checking with simulations s2eps <- 4 nu <- 1 seps <- sqrt(s2eps) phi <- 0.8 c <- 1 x <- rep(0,100) for (i in (2:100)) x[i] = phi*x[i-1] + seps*rnorm(1) y <- x+rt(100,df=nu) erg <- mcmc.tnoise(phi,s2eps,nu,c,y,niter=5000) #plotting the results for the states plot(y,type="l",col=4) for (i in (1:nrow(erg$states))) lines(erg$states[i,],col=2) lines(x) lines(y,col=4) qt <- apply(erg$states,2,quantile, probs=c(0.1,0.5,0.9)) plot(y,type="l",col=6) lines(x) for (i in (1:3)) lines(qt[i,], col=i+1) #plotting the results for the scale variables plot(abs(y-x),type="l") for (i in (1:nrow(erg$scale))) lines(erg$scale[i,],col=2) lines(abs(y-x)) abline(h=0) abline(h=1) #trace of values at fixed times i <- 22 hist(erg$states[,i]) plot(erg$states[,i],type="l") acf(erg$states[,i]) hist(erg$scale[,i],nclass=100) plot(erg$scale[,i],type="l") acf(erg$scale[,i]) ######################################################################### ## Particle filtering ### ######################################################################## f.particle <- function(x0,y, f.state, b, ...) { ## Purpose: Computing the basic particle filter ## ---------------------------------------------------------------------- ## Arguments: x0=sample from the starting density ## y=observed values ## f.state=function describing the evolution of states ## x_t = f(x_{t-1},u_t,t) with u_t standard normal ## b=observation density (time homogeneous) ## ---------------------------------------------------------------------- ## Author: H. R. Kuensch, Date: 13 Mar 2005, 22:11 n.rep <- length(x0) n <- length(y) x <- matrix(x0,nrow=n.rep,ncol=n+1) for (i in (1:n)){ z <- f.state(x[,i],rnorm(n.rep),i, ...) #propagation w <- b(z,y[i], ...) #reweighting x[,i+1] <- sample(z,size=n.rep,replace=TRUE,prob=w) } x } # Example of Andrade-Netto # (nonlinear AR(1), observation =squared states + noise) f.andrade <- function(x,u,i, ...) { 0.5*x + 25*x/(1+x^2) + 8*cos(1.2*i) + sqrt(10)*u } b.square <- function(z,y, ...) {dnorm(y-0.05*z^2)} #Simulation of the model x <- rnorm(101) x[1] <- 5*x[1] for (i in (1:100)) x[i+1] <- f.andrade(x[i],x[i+1],i) y <- 0.05*x[-1]^2 + rnorm(100) plot(x[-1],type="l") lines(y,col=2) #Particle filter x0 <- 5*rnorm(200) #starting values x.filt <- f.particle(x0,y,f.state=f.andrade,b=b.square) # Stochastic volatility f.ar <- function(x,u,i,phi, seps, ...) {phi*x + seps*u} b.vol <- function(z,y,mu, ...) {exp(-0.5*(z+mu + y^2*exp(-z-mu)))} #Simulation of the model phi <- 0.9 seps <- 0.6 mu <- 0.5 x <- rnorm(101) x[1] <- seps*x[1]/sqrt(1-phi^2) for (i in (1:100)) x[i+1] <- f.ar(x[i],x[i+1],i,phi,seps) y <- exp(0.5*(x[-1]+mu))*rnorm(100) par(mfrow=c(2,1)) plot(x[-1],type="l") plot(y,type="l") par(mfrow=c(1,1)) #Particle filter x0 <- seps*rnorm(200)/sqrt(1-phi^2) #starting values x.filt <- f.particle(x0,y,f.state=f.ar,b=b.vol, phi=phi,seps=seps,mu=mu) #Plotting the quantiles x.filtq <- apply(x.filt,2,quantile,probs=c(0.1,0.5,0.9)) plot(0:100,x,type="l",xlab="time",ylab="state") lines(1:100,y,col=2) lines(0:100,x.filtq[1,],col=3) lines(0:100,x.filtq[2,],col=4) lines(0:100,x.filtq[3,],col=6)