[R] modifying a built in function from the stats package (fixing arima) (CONCLUSIONS)

Fri Mar 6 11:53:47 CET 2009

Thanks a lot to everybody that helped me out with this.

Conclusions:

(1)
In order to edit arima in R:
>fix(arima)

or alternatively:
>arima<-edit(arima)

(2)
This is not contained in the "Introduction to R" manual.

(3)
A "productive" fix of arima is attached (arma coefficients printed out and
error catched so that it doesn't halt parent loops to search for candidate
coefficients):
Note 1: "productive" means I'm a beginner in R so there is probably a better
way to print the error message and fill the output arguments (I only return
NA in aic,var and sigma2).
Note 2: Changing BFGS to Nelder–Mead in "exitpoint 0" changes the
coefficients for which arima can't fit a model but results in terms of aic
and sigma2 also change significantly. By visual inspection I think that BFGS
works better.

function (x, order = c(0, 0, 0), seasonal = list(order = c(0,
    0, 0), period = NA), xreg = NULL, include.mean = TRUE, transform.pars =
TRUE,
    fixed = NULL, init = NULL, method = c("CSS-ML", "ML", "CSS"),
    n.cond, optim.control = list(), kappa = 1e+06)
{
    "%+%" <- function(a, b) .Call(R_TSconv, a, b)
    upARIMA <- function(mod, phi, theta) {
        p <- length(phi)
        q <- length(theta)
        mod$phi <- phi
        mod$theta <- theta
        r <- max(p, q + 1)
        if (p > 0)
            mod$T[1:p, 1] <- phi
        if (r > 1)
            mod$Pn[1:r, 1:r] <- .Call(R_getQ0, phi, theta)
        else if (p > 0)
            mod$Pn[1, 1] <- 1/(1 - phi^2)
        else mod$Pn[1, 1] <- 1
        mod$a[] <- 0
        mod
    }
    arimaSS <- function(y, mod) {
        .Call(R_ARIMA_Like, y, mod$phi, mod$theta, mod$Delta,
            mod$a, mod$P, mod$Pn, as.integer(0), TRUE)
    }
    armafn <- function(p, trans) {
        par <- coef
        par[mask] <- p
        trarma <- .Call(R_ARIMA_transPars, par, arma, trans)
        Z <- upARIMA(mod, trarma[[1]], trarma[[2]])
        if (ncxreg > 0)
            x <- x - xreg %*% par[narma + (1:ncxreg)]
        res <- .Call(R_ARIMA_Like, x, Z$phi, Z$theta, Z$Delta,
            Z$a, Z$P, Z$Pn, as.integer(0), FALSE)
        s2 <- res[1]/res[3]
        0.5 * (log(s2) + res[2]/res[3])
    }
    armaCSS <- function(p) {
        par <- as.double(fixed)
        par[mask] <- p
        trarma <- .Call(R_ARIMA_transPars, par, arma, FALSE)
        if (ncxreg > 0)
            x <- x - xreg %*% par[narma + (1:ncxreg)]
        res <- .Call(R_ARIMA_CSS, x, arma, trarma[[1]], trarma[[2]],
            as.integer(ncond), FALSE)
        0.5 * log(res)
    }
    arCheck <- function(ar) {
        p <- max(which(c(1, -ar) != 0)) - 1
        if (!p)
            return(TRUE)
        all(Mod(polyroot(c(1, -ar[1:p]))) > 1)
    }
    maInvert <- function(ma) {
        q <- length(ma)
        q0 <- max(which(c(1, ma) != 0)) - 1
        if (!q0)
            return(ma)
        roots <- polyroot(c(1, ma[1:q0]))
        ind <- Mod(roots) < 1
        if (all(!ind))
            return(ma)
        if (q0 == 1)
            return(c(1/ma[1], rep(0, q - q0)))
        roots[ind] <- 1/roots[ind]
        x <- 1
        for (r in roots) x <- c(x, 0) - c(0, x)/r
        c(Re(x[-1]), rep(0, q - q0))
    }
    series <- deparse(substitute(x))
    if (NCOL(x) > 1)
        stop("only implemented for univariate time series")
    method <- match.arg(method)
    x <- as.ts(x)
    if (!is.numeric(x))
        stop("'x' must be numeric")
    storage.mode(x) <- "double"
    dim(x) <- NULL
    n <- length(x)
    if (!missing(order))
        if (!is.numeric(order) || length(order) != 3 || any(order <
            0))
            stop("'order' must be a non-negative numeric vector of length
3")
    if (!missing(seasonal))
        if (is.list(seasonal)) {
            if (is.null(seasonal$order))
                stop("'seasonal' must be a list with component 'order'")
            if (!is.numeric(seasonal$order) || length(seasonal$order) !=
                3 || any(seasonal$order < 0))
                stop("'seasonal$order' must be a non-negative numeric vector
of length 3")
        }
        else if (is.numeric(order)) {
            if (length(order) == 3)
                seasonal <- list(order = seasonal)
            else ("'seasonal' is of the wrong length")
        }
        else stop("'seasonal' must be a list with component 'order'")
    if (is.null(seasonal$period) || is.na(seasonal$period) ||
        seasonal$period == 0)
        seasonal$period <- frequency(x)
    arma <- as.integer(c(order[-2], seasonal$order[-2], seasonal$period,
        order[2], seasonal$order[2]))
    narma <- sum(arma[1:4])
    xtsp <- tsp(x)
    tsp(x) <- NULL
    Delta <- 1
    for (i in seq_len(order[2])) Delta <- Delta %+% c(1, -1)
    for (i in seq_len(seasonal$order[2])) Delta <- Delta %+%
        c(1, rep(0, seasonal$period - 1), -1)
    Delta <- -Delta[-1]
    nd <- order[2] + seasonal$order[2]
    n.used <- sum(!is.na(x)) - length(Delta)
    if (is.null(xreg)) {
        ncxreg <- 0
    }
    else {
        nmxreg <- deparse(substitute(xreg))
        if (NROW(xreg) != n)
            stop("lengths of 'x' and 'xreg' do not match")
        ncxreg <- NCOL(xreg)
        xreg <- as.matrix(xreg)
        storage.mode(xreg) <- "double"
    }
    class(xreg) <- NULL
    if (ncxreg > 0 && is.null(colnames(xreg)))
        colnames(xreg) <- if (ncxreg == 1)
            nmxreg
        else paste(nmxreg, 1:ncxreg, sep = "")
    if (include.mean && (nd == 0)) {
        xreg <- cbind(intercept = rep(1, n), xreg = xreg)
        ncxreg <- ncxreg + 1
    }
    if (method == "CSS-ML") {
        anyna <- any(is.na(x))
        if (ncxreg)
            anyna <- anyna || any(is.na(xreg))
        if (anyna)
            method <- "ML"
    }
    if (method == "CSS" || method == "CSS-ML") {
        ncond <- order[2] + seasonal$order[2] * seasonal$period
        ncond1 <- order[1] + seasonal$period * seasonal$order[1]
        ncond <- if (!missing(n.cond))
            ncond + max(n.cond, ncond1)
        else ncond + ncond1
    }
    else ncond <- 0
    if (is.null(fixed))
        fixed <- rep(NA_real_, narma + ncxreg)
    else if (length(fixed) != narma + ncxreg)
        stop("wrong length for 'fixed'")
    mask <- is.na(fixed)
    no.optim <- !any(mask)
    if (no.optim)
        transform.pars <- FALSE
    if (transform.pars) {
        ind <- arma[1] + arma[2] + seq_len(arma[3])
        if (any(!mask[seq_len(arma[1])]) || any(!mask[ind])) {
            warning("some AR parameters were fixed: setting transform.pars =
FALSE")
            transform.pars <- FALSE
        }
    }
    init0 <- rep(0, narma)
    parscale <- rep(1, narma)
    if (ncxreg) {
        cn <- colnames(xreg)
        orig.xreg <- (ncxreg == 1) || any(!mask[narma + 1:ncxreg])
        if (!orig.xreg) {
            S <- svd(na.omit(xreg))
            xreg <- xreg %*% S$v
        }
        fit <- lm(x ~ xreg - 1, na.action = na.omit)
        n.used <- sum(!is.na(resid(fit))) - length(Delta)
        init0 <- c(init0, coef(fit))
        ses <- summary(fit)$coefficients[, 2]
        parscale <- c(parscale, 10 * ses)
    }
    if (n.used <= 0)
        stop("too few non-missing observations")
    if (!is.null(init)) {
        if (length(init) != length(init0))
            stop("'init' is of the wrong length")
        if (any(ind <- is.na(init)))
            init[ind] <- init0[ind]
        if (method == "ML") {
            if (arma[1] > 0)
                if (!arCheck(init[1:arma[1]]))
                  stop("non-stationary AR part")
            if (arma[3] > 0)
                if (!arCheck(init[sum(arma[1:2]) + 1:arma[3]]))
                  stop("non-stationary seasonal AR part")
            if (transform.pars)
                init <- .Call(R_ARIMA_Invtrans, as.double(init),
                  arma)
        }
    }
    else init <- init0
    coef <- as.double(fixed)
    skip<-0
    if (!("parscale" %in% names(optim.control)))
        optim.control$parscale <- parscale[mask]
    if (method == "CSS") {
        res <- if (no.optim)
            list(convergence = 0, par = numeric(0), value =
armaCSS(numeric(0)))
        else optim(init[mask], armaCSS, method = "BFGS", hessian = TRUE,
            control = optim.control)
        if (res$convergence > 0)
            warning("possible convergence problem: optim gave code=",
                res$convergence)
        coef[mask] <- res$par
        trarma <- .Call(R_ARIMA_transPars, coef, arma, FALSE)
        mod <- makeARIMA(trarma[[1]], trarma[[2]], Delta, kappa)
        if (ncxreg > 0)
            x <- x - xreg %*% coef[narma + (1:ncxreg)]
        arimaSS(x, mod)
        val <- .Call(R_ARIMA_CSS, x, arma, trarma[[1]], trarma[[2]],
            as.integer(ncond), TRUE)
        sigma2 <- val[[1]]
        var <- if (no.optim)
            numeric(0)
        else solve(res$hessian * n.used)
    }
    else {
        if (method == "CSS-ML") {
		exitpoint<-(-1);
            res <- if (no.optim)
                list(convergence = 0, par = numeric(0), value =
armaCSS(numeric(0)))
            else optim(init[mask], armaCSS, method = "BFGS",
                hessian = FALSE, control = optim.control)
            if (res$convergence == 0)
                init[mask] <- res$par
            if (arma[1] > 0)
                if (!arCheck(init[1:arma[1]]))
                  stop("non-stationary AR part from CSS")
            if (arma[3] > 0)
                if (!arCheck(init[sum(arma[1:2]) + 1:arma[3]]))
                  stop("non-stationary seasonal AR part from CSS")
            ncond <- 0
        }
        if (transform.pars) {
            init <- .Call(R_ARIMA_Invtrans, init, arma)
            if (arma[2] > 0) {
                ind <- arma[1] + 1:arma[2]
                init[ind] <- maInvert(init[ind])
            }
            if (arma[4] > 0) {
                ind <- sum(arma[1:3]) + 1:arma[4]
                init[ind] <- maInvert(init[ind])
            }
        }
        trarma <- .Call(R_ARIMA_transPars, init, arma, transform.pars)
        mod <- makeARIMA(trarma[[1]], trarma[[2]], Delta, kappa)
	  exitpoint<-(0);

	  tryCatch({
	  res <- if (no.optim)
            list(convergence = 0, par = numeric(0), value =
armafn(numeric(0),
                as.logical(transform.pars)))
        else optim(init[mask], armafn, method = "BFGS", hessian = TRUE,
            control = optim.control, trans = as.logical(transform.pars))
        if (res$convergence > 0)
            warning("possible convergence problem: optim gave code=",
                res$convergence)
        coef[mask] <- res$par
        if (transform.pars) {
            if (arma[2] > 0) {
                ind <- arma[1] + 1:arma[2]
                if (all(mask[ind]))
                  coef[ind] <- maInvert(coef[ind])
            }
            if (arma[4] > 0) {
                ind <- sum(arma[1:3]) + 1:arma[4]
                if (all(mask[ind]))
                  coef[ind] <- maInvert(coef[ind])
            }
            if (any(coef[mask] != res$par)) {
                oldcode <- res$convergence
		    exitpoint<-1
                res <- optim(coef[mask], armafn, method = "BFGS",
                  hessian = TRUE, control = list(maxit = 0, parscale =
optim.control$parscale),
                  trans = TRUE)
                res$convergence <- oldcode
                coef[mask] <- res$par
            }
            A <- .Call(R_ARIMA_Gradtrans, as.double(coef), arma)
            A <- A[mask, mask]
            var <- t(A) %*% solve(res$hessian * n.used) %*% A
            coef <- .Call(R_ARIMA_undoPars, coef, arma)
        }
        else var <- if (no.optim)
            numeric(0)
        else solve(res$hessian * n.used)
        trarma <- .Call(R_ARIMA_transPars, coef, arma, FALSE)
        mod <- makeARIMA(trarma[[1]], trarma[[2]], Delta, kappa)
        val <- if (ncxreg > 0)
            arimaSS(x - xreg %*% coef[narma + (1:ncxreg)], mod)
        else arimaSS(x, mod)
        sigma2 <- val[[1]][1]/n.used
	  skip<-0
	  }, error=function(e) {
            print(paste(order[1],order[2],order[3],seasonal$order[1],seasona
l$order[2],seasonal$order[3],"SARIMA couldn't be fitted
(exitpoint:",exitpoint,")"))
		skip<<-1
	  })
    }
    if (skip==0){
    value <- 2 * n.used * res$value + n.used + n.used * log(2 *
        pi)
    aic <- if (method != "CSS")
        value + 2 * sum(mask) + 2
    else NA
    nm <- NULL
    if (arma[1] > 0)
        nm <- c(nm, paste("ar", 1:arma[1], sep = ""))
    if (arma[2] > 0)
        nm <- c(nm, paste("ma", 1:arma[2], sep = ""))
    if (arma[3] > 0)
        nm <- c(nm, paste("sar", 1:arma[3], sep = ""))
    if (arma[4] > 0)
        nm <- c(nm, paste("sma", 1:arma[4], sep = ""))
    if (ncxreg > 0) {
        nm <- c(nm, cn)
        if (!orig.xreg) {
            ind <- narma + 1:ncxreg
            coef[ind] <- S$v %*% coef[ind]
            A <- diag(narma + ncxreg)
            A[ind, ind] <- S$v
            A <- A[mask, mask]
            var <- A %*% var %*% t(A)
        }
    }
    names(coef) <- nm
    if (!no.optim)
        dimnames(var) <- list(nm[mask], nm[mask])
    resid <- val[[2]]
    tsp(resid) <- xtsp
    class(resid) <- "ts"
    res <- list(coef = coef, sigma2 = sigma2, var.coef = var,
        mask = mask, loglik = -0.5 * value, aic = aic, arma = arma,
        residuals = resid, call = match.call(), series = series,
        code = res$convergence, n.cond = ncond, model = mod)
    class(res) <- "Arima"
    res}
    else{
    aic<-NA
    var<-NA
    sigma2<-NA
    res <- list(var.coef = var, aic = aic, arma = arma)
    class(res) <- "Arima"
    res
    }
}