# MVPLN-VAR-monthly.R -- MVPLN model with a VAR dynamic process. # Initial file for setting things up to run the # model through rjags. # 20090618 : Initial version # 20091003 : Revised to work with monthly data # 20100302 : Split the sample into segments for the changepoints # identified in earlier Brandt and Sandler papers. # 20100305 : Added more draws to get a stable posterior # 20100710 : Revised to work with code for different VAR lag lengths # and to compute the DIC statistics library(zoo) library(rjags) # Read in the data load("MonthlySeries1968-2008.RData") # Set up the data series y <- cbind(x.m[[1]][,10], x.m[[2]][,10], x.m[[3]][,10], x.m[[4]][,10]) colnames(y) <- c("O", "M", "B", "PP") # Split DV series into periods defined by the breakpoints y1 <- window(as.ts(y), end=c(1973,1)) y2 <- window(as.ts(y), start=c(1973,2), end=c(1979,12)) y3 <- window(as.ts(y), start=c(1980,1), end=c(1989,11)) y4 <- window(as.ts(y), start=c(1989, 12), end=c(2001,9)) y5 <- window(as.ts(y), start=c(2001, 10)) # Need a leg length specification test for the full sample and the # sub-samples: this can be done with an ACF on the standardized data pdf(file="ACFs.pdf", width=6, height=6) acf(apply(y1, 2, scale)) acf(apply(y2, 2, scale)) acf(apply(y3, 2, scale)) acf(apply(y4, 2, scale)) acf(apply(y5, 2, scale)) dev.off() # Now set up the data for the looped multi-processor runs. Much of # this is prep for JAGS # Put data into a list so we can index it across the runs. Y <- list(y1, y2, y3, y4, y5) # Define p.max = max number of lags. This will then be used as part of the # definition of the number of parameters, priors and initializations. # Later this will be part of defining the inputs to the models and the # output. p.max <- 3 # Loop to set up data and initial conditions for samplers for(i in 1:length(Y)) { # Set up data and constants for JAGS calls from R ytmp <- as.data.frame(Y[[i]]) rownames(ytmp) <- NULL N <- dim(ytmp)[[1]] m <- dim(ytmp)[[2]] # Output the data attach(ytmp) dump(c("O", "M", "B", "PP", "N", "m"), file=paste("MVPLN-data-monthly", i, ".R", sep="")) detach(ytmp) # Set up the prior initializations for each sub-sample, since # these need to be scaled relative to the data and the number of # lags P <- solve(cov(ytmp)) b <- matrix(log(apply(ytmp, 2, mean)), 1, m) # A1 <- 0.9*diag(m) # Generate the number of lag matrices after the first one. ARnames <- paste("A", 1:p.max, sep="") # Now assign the initial conditions for the sampler -- these are # in the neighborhood of the prior. Note that this is going to # work for any number of lags since the dump of the output is # inside this final loop for(j in 1:p.max) { if(j==1){ assign(ARnames[j], 0.9*diag(m)) } else { assign(ARnames[j], matrix(0, m, m)) } # Output the initial conditions for each of the samplers for each # sampler. Files are named by sample index-lag index dump(c("P", "b", ARnames[1:j]), file=paste("MVPLN-inits-monthly-", i, "-", j, ".R", sep="")) } } # Define the list over which we will operate using snow / Rmpi plist <- p.max:1 samplelist <- 1:5 ll <- apply(expand.grid(samplelist, plist), 1, list) # Baseline call to a model / sampler -- we dispatch this to multiple # cores. MVPLNmodel <- function(i, ll, bugprefix, dataprefix, initsprefix, chains=2, n.adapt=1000, n.final=1000) { sampleidx <- ll[[i]][[1]][1] pidx <- ll[[i]][[1]][2] # Use the prefixes and the input list to get the input names. bug <- paste(bugprefix, "-", pidx, ".bug", sep="") data <- paste(dataprefix, sampleidx, ".R", sep="") inits <- paste(initsprefix, "-", sampleidx, "-", pidx, ".R", sep="") # Burn in x1 <- jags.model(file=bug, data=read.data(data), inits=read.data(inits), n.chains=chains, n.adapt) # Final draws and monitoring monitorwhat <- c("b", "Sigma", "C", paste("A", 1:pidx, sep="")) x2 <- coda.samples(x1, monitorwhat, n.final) # Write out the results and clean up the memory save(x2, file=paste("MVPLN-monthly-", sampleidx, "-", pidx, ".RData", sep="")) rm(x2); gc(); gc(); # Compute the DIC and penalized deviance measures dic <- dic.samples(x1, n.iter=n.final, type="pD") # Return the DIC for later manipuation and computations so we know # which posterior sample is optimal return(list(dic=dic, p=pidx, sample=sampleidx)) } ## # Running one process as a check ## tmp <- MVPLNmodel(1, ll, ## bugprefix="MVPLN-VAR", ## dataprefix="MVPLN-data-monthly", ## initsprefix="MVPLN-inits-monthly", ## chains=2, n.adapt=100, n.final=100) # Now set up the loops for the multiprocessor computation of the # models. library(Rmpi) library(snow) # Set up the cluster and pass it the packages it needs and initialize # the random number seeds set.seed(9806) nodes <- 8 cl <- makeCluster(spec=nodes, type="SOCK") clusterEvalQ(cl, library(rjags)) clusterSetupRNGstream(cl) # Set up calling all of this over multiple nodes. Each node works # with one of the nodes datasets. out <- clusterApplyLB(cl, 1:length(ll), MVPLNmodel, ll=ll, bugprefix="MVPLN-VAR", dataprefix="MVPLN-data-monthly", initsprefix="MVPLN-inits-monthly", chains=2, n.adapt=30000, n.final=100000) # Shut down the cluster stopCluster(cl) # Save results and quit nicely -- will summarize in a table later save(out, file="MVPLN-VAR-monthly-DIC.RData") mpi.quit(save="no")