--- constrOptim.R 2010-06-22 15:44:28.352204298 -0400 +++ constrOptim_NEW.R 2010-06-22 16:00:38.223490505 -0400 @@ -15,22 +15,28 @@ # http://www.r-project.org/Licenses/ -constrOptim <- - function(theta, f, grad, ui, ci, mu = 0.0001, control = list(), - method = if(is.null(grad)) "Nelder-Mead" else "BFGS", - outer.iterations = 100, outer.eps = 0.00001, ..., hessian = FALSE) -{ +constrOptim <- function (theta, f, grad=NULL, ui, ci, mu = 1e-04, control = list(), + method = c("BFGS", "Nelder-Mead"), outer.iterations = 100, outer.eps = 1e-07, grad.eps = 1.e-07, trace.outer=TRUE, ...) +{ +# Bugs fixed and improvements made by: Ravi Varadhan, Johns Hopkins University, June 22, 2010. +# +# The user can call either "Nelder-Mead" or "BFGS" +# "BFGS" can be called even when the gradient function is not specified +# in which case, a finite-difference approximation is computed +# if (!is.null(control$fnscale) && control$fnscale < 0) - mu <- -mu ##maximizing + mu <- -mu R <- function(theta, theta.old, ...) { ui.theta <- ui%*%theta gi <- ui.theta-ci - if (any(gi<0)) return(NaN) + if (any(gi < 0)) + return(NaN) gi.old <- ui%*%theta.old-ci bar <- sum(gi.old*log(gi)-ui.theta) - if (!is.finite(bar)) bar <- -Inf + if (!is.finite(bar)) + bar <- -Inf f(theta, ...)-mu*bar } @@ -42,44 +48,84 @@ grad(theta, ...) - mu*dbar } + method <- match.arg(method, choices=c("BFGS", "Nelder-Mead")) + + if (is.null(grad) & method == "BFGS") { + grad <- function(par, ...) { + df <- rep(NA, length(par)) + fval <- f(par, ...) + for (i in 1:length(par)) { + dx <- par + dx[i] <- dx[i] + grad.eps + df[i] <- (f(dx, ...) - fval)/grad.eps + } + df + } +} + if (any(ui%*%theta-ci <= 0)) - stop("initial value is not in the interior of the feasible region") + stop("initial value not feasible") obj <- f(theta, ...) r <- R(theta, theta, ...) - fun <- function(theta, ...) R(theta, theta.old, ...) - gradient <- if(method == "SANN") { - if(missing(grad)) NULL else grad - } else function(theta, ...) dR(theta, theta.old, ...) + feval <- 0 + geval <- 0 - for(i in seq_len(outer.iterations)) { + for (i in 1:outer.iterations) { + if (trace.outer) { + cat("par: ", theta, "\n") + cat("fval: ", obj, "\n") + } obj.old <- obj r.old <- r theta.old <- theta - + fun <- function(theta, ...) { + R(theta, theta.old, ...) + } + gradient <- function(theta, ...) { + dR(theta, theta.old, ...) + } a <- optim(theta.old, fun, gradient, control = control, - method = method, hessian = hessian, ...) + method = method, ...) r <- a$value - if (is.finite(r) && is.finite(r.old) && - abs(r-r.old)/(outer.eps+abs(r-r.old)) < outer.eps) break +# Following "relative" convergence criterion is flawed +# if (is.finite(r) && is.finite(r.old) && abs(r - r.old)/(outer.eps + +# abs(r - r.old) ) < outer.eps) +# break +# Here is a correct way to do it: +# if (is.finite(r) && is.finite(r.old) && abs(r - r.old)/(1 + +# abs(r + r.old)/2 ) < outer.eps) +# break +# But we use the simpler "absolute" convergence criterion: + if (is.finite(r) && is.finite(r.old) && abs(r - r.old) < outer.eps) + break theta <- a$par +# We need to accumulate function and gradient evaluation counts + feval <- feval + a$counts[1] + geval <- geval + a$counts[2] obj <- f(theta, ...) - if (obj > obj.old) break +# The following line is a bug when control$fnscale = -1 +# if (obj > obj.old) break +# This is the correct version + if (obj > obj.old * sign(mu)) break } if (i == outer.iterations) { a$convergence <- 7 - a$message <- gettext("Barrier algorithm ran out of iterations and did not converge") + a$message <- "Barrier algorithm ran out of iterations and did not converge" } if (mu > 0 && obj > obj.old) { a$convergence <- 11 - a$message <- gettextf("Objective function increased at outer iteration %d", i) + a$message <- paste("Objective function increased at outer iteration", + i) } if (mu < 0 && obj < obj.old) { a$convergence <- 11 - a$message <- gettextf("Objective function decreased at outer iteration %d", i) + a$message <- paste("Objective function decreased at outer iteration", + i) } a$outer.iterations <- i a$barrier.value <- a$value a$value <- f(a$par, ...) a$barrier.value <- a$barrier.value - a$value + a$counts <- c(feval, geval) a } --- constrOptim.R 2010-06-22 15:44:28.352204298 -0400 +++ constrOptim_NEW_NC.R 2010-06-22 16:05:03.736204242 -0400 @@ -15,22 +15,23 @@ # http://www.r-project.org/Licenses/ -constrOptim <- - function(theta, f, grad, ui, ci, mu = 0.0001, control = list(), - method = if(is.null(grad)) "Nelder-Mead" else "BFGS", - outer.iterations = 100, outer.eps = 0.00001, ..., hessian = FALSE) + +constrOptim <- function (theta, f, grad=NULL, ui, ci, mu = 1e-04, control = list(), + method = c("BFGS", "Nelder-Mead"), outer.iterations = 100, outer.eps = 1e-07, grad.eps = 1.e-07, trace.outer=TRUE, ...) { if (!is.null(control$fnscale) && control$fnscale < 0) - mu <- -mu ##maximizing + mu <- -mu R <- function(theta, theta.old, ...) { ui.theta <- ui%*%theta gi <- ui.theta-ci - if (any(gi<0)) return(NaN) + if (any(gi < 0)) + return(NaN) gi.old <- ui%*%theta.old-ci bar <- sum(gi.old*log(gi)-ui.theta) - if (!is.finite(bar)) bar <- -Inf + if (!is.finite(bar)) + bar <- -Inf f(theta, ...)-mu*bar } @@ -42,44 +43,71 @@ grad(theta, ...) - mu*dbar } + method <- match.arg(method, choices=c("BFGS", "Nelder-Mead")) + + if (is.null(grad) & method == "BFGS") { + grad <- function(par, ...) { + df <- rep(NA, length(par)) + fval <- f(par, ...) + for (i in 1:length(par)) { + dx <- par + dx[i] <- dx[i] + grad.eps + df[i] <- (f(dx, ...) - fval)/grad.eps + } + df + } +} + if (any(ui%*%theta-ci <= 0)) - stop("initial value is not in the interior of the feasible region") + stop("initial value not feasible") obj <- f(theta, ...) r <- R(theta, theta, ...) - fun <- function(theta, ...) R(theta, theta.old, ...) - gradient <- if(method == "SANN") { - if(missing(grad)) NULL else grad - } else function(theta, ...) dR(theta, theta.old, ...) + feval <- 0 + geval <- 0 - for(i in seq_len(outer.iterations)) { + for (i in 1:outer.iterations) { + if (trace.outer) { + cat("par: ", theta, "\n") + cat("fval: ", obj, "\n") + } obj.old <- obj r.old <- r theta.old <- theta - + fun <- function(theta, ...) { + R(theta, theta.old, ...) + } + gradient <- function(theta, ...) { + dR(theta, theta.old, ...) + } a <- optim(theta.old, fun, gradient, control = control, - method = method, hessian = hessian, ...) + method = method, ...) r <- a$value - if (is.finite(r) && is.finite(r.old) && - abs(r-r.old)/(outer.eps+abs(r-r.old)) < outer.eps) break + if (is.finite(r) && is.finite(r.old) && abs(r - r.old) < outer.eps) + break theta <- a$par + feval <- feval + a$counts[1] + geval <- geval + a$counts[2] obj <- f(theta, ...) - if (obj > obj.old) break + if (obj > obj.old * sign(mu)) break } if (i == outer.iterations) { a$convergence <- 7 - a$message <- gettext("Barrier algorithm ran out of iterations and did not converge") + a$message <- "Barrier algorithm ran out of iterations and did not converge" } if (mu > 0 && obj > obj.old) { a$convergence <- 11 - a$message <- gettextf("Objective function increased at outer iteration %d", i) + a$message <- paste("Objective function increased at outer iteration", + i) } if (mu < 0 && obj < obj.old) { a$convergence <- 11 - a$message <- gettextf("Objective function decreased at outer iteration %d", i) + a$message <- paste("Objective function decreased at outer iteration", + i) } a$outer.iterations <- i a$barrier.value <- a$value a$value <- f(a$par, ...) a$barrier.value <- a$barrier.value - a$value + a$counts <- c(feval, geval) a }