# MVPLN-VAR-monthly.R -- MVPLN model with a VAR dynamic process.
# Initial file for setting things up to run the
# model through rjags.

# 20090618 : Initial version
# 20091003 : Revised to work with monthly data
# 20100302 : Split the sample into segments for the changepoints
#            identified in earlier Brandt and Sandler papers.
# 20100305 : Added more draws to get a stable posterior
# 20100710 : Revised to work with code for different VAR lag lengths
#            and to compute the DIC statistics

library(zoo)
library(rjags)

# Read in the data
load("MonthlySeries1968-2008.RData")

# Set up the data series
y <- cbind(x.m[[1]][,10], x.m[[2]][,10],
           x.m[[3]][,10], x.m[[4]][,10])

colnames(y) <- c("O", "M", "B", "PP")

# Split DV series into periods defined by the breakpoints
y1 <- window(as.ts(y), end=c(1973,1))
y2 <- window(as.ts(y), start=c(1973,2), end=c(1979,12))
y3 <- window(as.ts(y), start=c(1980,1), end=c(1989,11))
y4 <- window(as.ts(y), start=c(1989, 12), end=c(2001,9))
y5 <- window(as.ts(y), start=c(2001, 10))

# Need a leg length specification test for the full sample and the
# sub-samples: this can be done with an ACF on the standardized data
pdf(file="ACFs.pdf", width=6, height=6)
acf(apply(y1, 2, scale))
acf(apply(y2, 2, scale))
acf(apply(y3, 2, scale))
acf(apply(y4, 2, scale))
acf(apply(y5, 2, scale))
dev.off()

# Now set up the data for the looped multi-processor runs.  Much of
# this is prep for JAGS

# Put data into a list so we can index it across the runs.
Y <- list(y1, y2, y3, y4, y5)

# Define p.max = max number of lags.  This will then be used as part of the
# definition of the number of parameters, priors and initializations.
# Later this will be part of defining the inputs to the models and the
# output.

p.max <- 3

# Loop to set up data and initial conditions for samplers
for(i in 1:length(Y))
{

    # Set up data and constants for JAGS calls from R
    ytmp <- as.data.frame(Y[[i]])
    rownames(ytmp) <- NULL
    N <- dim(ytmp)[[1]]
    m <- dim(ytmp)[[2]]

    # Output the data
    attach(ytmp)
    dump(c("O", "M", "B", "PP", "N", "m"),
         file=paste("MVPLN-data-monthly", i, ".R", sep=""))
    detach(ytmp)

    # Set up the prior initializations for each sub-sample, since
    # these need to be scaled relative to the data and the number of
    # lags

    P <- solve(cov(ytmp))
    b <- matrix(log(apply(ytmp, 2, mean)), 1, m)
#    A1 <- 0.9*diag(m)

    # Generate the number of lag matrices after the first one.
    ARnames <- paste("A", 1:p.max, sep="")

    # Now assign the initial conditions for the sampler -- these are
    # in the neighborhood of the prior.  Note that this is going to
    # work for any number of lags since the dump of the output is
    # inside this final loop

    for(j in 1:p.max)
    {
       if(j==1){ assign(ARnames[j], 0.9*diag(m)) } else {

           assign(ARnames[j], matrix(0, m, m))
       }

       # Output the initial conditions for each of the samplers for each
       # sampler.  Files are named by sample index-lag index

       dump(c("P", "b", ARnames[1:j]),
            file=paste("MVPLN-inits-monthly-", i, "-", j, ".R", sep=""))
   }
}


# Define the list over which we will operate using snow / Rmpi
plist <- p.max:1
samplelist <- 1:5
ll <- apply(expand.grid(samplelist, plist), 1, list)

# Baseline call to a model / sampler -- we dispatch this to multiple
# cores.
MVPLNmodel <- function(i, ll, bugprefix, dataprefix, initsprefix,
                       chains=2, n.adapt=1000, n.final=1000)
{
    sampleidx <- ll[[i]][[1]][1]
    pidx <- ll[[i]][[1]][2]

    # Use the prefixes and the input list to get the input names.
    bug <- paste(bugprefix, "-", pidx, ".bug", sep="")
    data <- paste(dataprefix, sampleidx, ".R", sep="")
    inits <- paste(initsprefix, "-", sampleidx, "-", pidx, ".R", sep="")

    # Burn in
    x1 <- jags.model(file=bug, data=read.data(data),
                     inits=read.data(inits),
                     n.chains=chains, n.adapt)

    # Final draws and monitoring
    monitorwhat <- c("b", "Sigma", "C", paste("A", 1:pidx, sep=""))

    x2 <- coda.samples(x1,  monitorwhat, n.final)

    # Write out the results and clean up the memory
    save(x2, file=paste("MVPLN-monthly-", sampleidx, "-", pidx, ".RData", sep=""))
    rm(x2); gc(); gc();

    # Compute the DIC and penalized deviance measures
    dic <- dic.samples(x1, n.iter=n.final, type="pD")

    # Return the DIC for later manipuation and computations so we know
    # which posterior sample is optimal

    return(list(dic=dic, p=pidx, sample=sampleidx))
}

## # Running one process as a check
## tmp <- MVPLNmodel(1, ll,
##                   bugprefix="MVPLN-VAR",
##                   dataprefix="MVPLN-data-monthly",
##                   initsprefix="MVPLN-inits-monthly",
##                   chains=2, n.adapt=100, n.final=100)

# Now set up the loops for the multiprocessor computation of the
# models.

library(Rmpi)
library(snow)

# Set up the cluster and pass it the packages it needs and initialize
# the random number seeds

set.seed(9806)
nodes <- 8
cl <- makeCluster(spec=nodes, type="SOCK")
clusterEvalQ(cl, library(rjags))
clusterSetupRNGstream(cl)

# Set up calling all of this over multiple nodes.  Each node works
# with one of the nodes datasets.

out <- clusterApplyLB(cl, 1:length(ll),
                      MVPLNmodel,
                      ll=ll,
                      bugprefix="MVPLN-VAR",
                      dataprefix="MVPLN-data-monthly",
                      initsprefix="MVPLN-inits-monthly",
                      chains=2, n.adapt=30000, n.final=100000)

# Shut down the cluster
stopCluster(cl)

# Save results and quit nicely -- will summarize in a table later

save(out, file="MVPLN-VAR-monthly-DIC.RData")

mpi.quit(save="no")