[R-sig-Geo] Problem with lagsarlm (spdep) with SparseM method
Roger Bivand
Roger.Bivand at nhh.no
Fri Jan 28 20:35:59 CET 2005
Dear Gilles,
Thanks for this bug report - it turned out that you must have an intercept
and just one right hand side variable, and in calculating the drop-one
log-likelihoods for testing, the "thisx" matrix was converted to a vector:
thisx <- x[,-i]
but should have been:
thisx <- x[,-i, drop = FALSE]
This is corrected in the attached file; could you try it, please, and let
me know if this solves your problem (source("lag.spsarlm.R"))?
Best wishes,
Roger
On Fri, 28 Jan 2005, Gilles Spielvogel wrote:
> Hello,
>
> when running the command:
>
> lagsarc2<-lagsarlm(vary ~ varx , mydata, c2.listw, na.action=na.fail,
> type="lag", method="SparseM", quiet=FALSE, zero.policy=TRUE)
>
> R returns the following error:
>
> Spatial lag model
> Jacobian calculated using sparse matrix techniques using SparseM
> Error in lm.fit(thisx, y) : `x' must be a matrix
>
> The listw object I use comes originally from a queen contiguity GAL object
> made with GeoDA. The command works perfectly if I use the option "eigen"
> instead of "SparseM", but it takes a lot of time.
>
> Does anyone have any idea about this problem ?
>
> Thanks in advance for your help.
>
> Gilles
>
> _______________________________________________
> R-sig-Geo mailing list
> R-sig-Geo at stat.math.ethz.ch
> https://stat.ethz.ch/mailman/listinfo/r-sig-geo
>
--
Roger Bivand
Economic Geography Section, Department of Economics, Norwegian School of
Economics and Business Administration, Breiviksveien 40, N-5045 Bergen,
Norway. voice: +47 55 95 93 55; fax +47 55 95 93 93
e-mail: Roger.Bivand at nhh.no
-------------- next part --------------
# Copyright 1998-2004 by Roger Bivand and Andrew Bernat
#
lagsarlm <- function(formula, data = list(), listw,
na.action=na.fail, type="lag", method="eigen", quiet=TRUE,
zero.policy=FALSE, interval=c(-1,0.999), tol.solve=1.0e-10,
tol.opt=.Machine$double.eps^0.5, control, optim=FALSE) {
mt <- terms(formula, data = data)
mf <- lm(formula, data, na.action=na.action,
method="model.frame")
na.act <- attr(mf, "na.action")
if (!inherits(listw, "listw")) stop("No neighbourhood list")
can.sim <- as.logical(NA)
if (listw$style %in% c("W", "S"))
can.sim <- spdep:::can.be.simmed(listw)
if (!is.null(na.act)) {
subset <- !(1:length(listw$neighbours) %in% na.act)
listw <- subset(listw, subset, zero.policy=zero.policy)
}
switch(type, lag = if (!quiet) cat("\nSpatial lag model\n"),
mixed = if (!quiet) cat("\nSpatial mixed autoregressive model\n"),
stop("\nUnknown model type\n"))
if (!quiet) cat("Jacobian calculated using ")
switch(method,
eigen = if (!quiet) cat("neighbourhood matrix eigenvalues\n"),
SparseM = {
if (listw$style %in% c("W", "S") && !can.sim)
stop("SparseM method requires symmetric weights")
if (listw$style %in% c("B", "C", "U") &&
!(is.symmetric.glist(listw$neighbours, listw$weights)))
stop("SparseM method requires symmetric weights")
if (!quiet) cat("sparse matrix techniques using SparseM\n")
},
stop("...\nUnknown method\n"))
y <- model.extract(mf, "response")
# y <- model.response(mf, "numeric")
# if (any(is.na(y))) stop("NAs in dependent variable")
x <- model.matrix(mt, mf)
# if (any(is.na(x))) stop("NAs in independent variable")
if (NROW(x) != length(listw$neighbours))
stop("Input data and weights have different dimensions")
n <- NROW(x)
m <- NCOL(x)
xcolnames <- colnames(x)
K <- ifelse(xcolnames[1] == "(Intercept)", 2, 1)
wy <- lag.listw(listw, y, zero.policy=zero.policy)
if (any(is.na(wy))) stop("NAs in lagged dependent variable")
if (type != "lag") {
# check if there are enough regressors
if (m > 1) {
WX <- matrix(nrow=n,ncol=(m-(K-1)))
for (k in K:m) {
wx <- lag.listw(listw, x[,k],
zero.policy=zero.policy)
if (any(is.na(wx)))
stop("NAs in lagged independent variable")
WX[,(k-(K-1))] <- wx
}
}
if (K == 2) {
# unnormalized weight matrices
if (!(listw$style == "W")) {
intercept <- as.double(rep(1, n))
wx <- lag.listw(listw, intercept,
zero.policy = zero.policy)
if (m > 1) {
WX <- cbind(wx, WX)
} else {
WX <- matrix(wx, nrow = n, ncol = 1)
}
}
}
m1 <- m + 1
mm <- NCOL(x) + NCOL(WX)
xxcolnames <- character(mm)
for (k in 1:m) xxcolnames[k] <- xcolnames[k]
for (k in m1:mm)
xxcolnames[k] <- paste("lag.", xcolnames[k-mm+m], sep="")
x <- cbind(x, WX)
colnames(x) <- xxcolnames
m <- NCOL(x)
rm(wx, WX)
}
similar <- FALSE
if (missing(control)) {
control <- list(trace=0, fnscale=-1, factr=tol.opt,
pgtol=tol.opt)
}
if (method == "eigen") {
if (!quiet) cat("Computing eigenvalues ...\n")
if (listw$style %in% c("W", "S") && can.sim) {
eig <- eigenw(similar.listw(listw))
similar <- TRUE
} else eig <- eigenw(listw)
if (!quiet) cat("\n")
#range inverted 031031, email from Salvati Nicola (and Rein Halbersma)
if (is.complex(eig)) eig.range <- 1/range(Re(eig))
else eig.range <- 1/range(eig)
lm.null <- lm(y ~ x - 1)
lm.w <- lm.fit(x, wy)
e.null <- lm.null$residuals
e.w <- lm.w$residuals
e.a <- t(e.null) %*% e.null
e.b <- t(e.w) %*% e.null
e.c <- t(e.w) %*% e.w
if (optim) {
lm.rho <- lm.fit(cbind(x, wy), y)
rho <- coef(lm.rho)[length(coef(lm.rho))]
if (rho < eig.range[1]+.Machine$double.eps) rho <- 0.0
if (rho > eig.range[2]-.Machine$double.eps) rho <- 0.0
opt <- optim(par=c(rho), sar.lag.mixed.f,
method="L-BFGS-B",
lower=eig.range[1]+.Machine$double.eps,
upper=eig.range[2]-.Machine$double.eps,
control=control,
eig=eig, e.a=e.a, e.b=e.b, e.c=e.c, n=n, quiet=quiet)
if (opt$convergence == 1) warning("iteration limit reached")
if (opt$convergence == 51) warning(opt$message)
if (opt$convergence == 52) warning(opt$message)
rho <- c(opt$par[1])
names(rho) <- "rho"
LL <- c(opt$value)
} else {
opt <- optimize(sar.lag.mixed.f,
lower=eig.range[1]+.Machine$double.eps,
upper=eig.range[2]-.Machine$double.eps, maximum=TRUE,
tol=tol.opt, eig=eig, e.a=e.a, e.b=e.b, e.c=e.c,
n=n, quiet=quiet)
rho <- opt$maximum
names(rho) <- "rho"
LL <- opt$objective
optres <- opt
}
} else {
opt <- dosparse(listw=listw, y=y, x=x, wy=wy, K=K, quiet=quiet,
tol.opt=tol.opt, control=control, method=method,
interval=interval, can.sim=can.sim, optim=optim)
rho <- c(opt$maximum)
names(rho) <- "rho"
LL <- c(opt$objective)
similar <- opt$similar
optres <- opt$opt
}
lm.lag <- lm((y - rho*wy) ~ x - 1)
r <- residuals(lm.lag)
fit <- y - r
names(r) <- names(fit)
coef.rho <- coefficients(lm.lag)
names(coef.rho) <- colnames(x)
SSE <- deviance(lm.lag)
s2 <- SSE/n
if (method != "eigen") {
LLs <- opt$LLs
lm.null <- opt$lm.null
rest.se <- NULL
rho.se <- NULL
LMtest <- NULL
ase <- FALSE
varb <- FALSE
} else {
LLs <- NULL
tr <- function(A) sum(diag(A))
# beware of complex eigenvalues!
O <- (eig/(1-rho*eig))^2
omega <- sum(O)
if (is.complex(omega)) omega <- Re(omega)
W <- listw2mat(listw)
A <- solve(diag(n) - rho*W)
AW <- A %*% W
zero <- rbind(rep(0,length(coef.rho)))
xtawxb <- s2*(t(x) %*% AW %*% x %*% coef.rho)
V <- s2*(s2*tr(t(AW) %*% AW) +
t(AW %*% x %*% coef.rho) %*%
(AW %*% x %*% coef.rho)) + omega*s2^2
inf1 <- rbind(n/2, s2*tr(AW), t(zero))
inf2 <- rbind(s2*tr(AW), V, xtawxb)
xtx <- s2*t(x) %*% x
inf3 <- rbind(zero, t(xtawxb), xtx)
inf <- cbind(inf1, inf2, inf3)
varb <- (s2^2) * solve(inf, tol=tol.solve)
rownames(varb) <- colnames(varb) <-
c("sigma", "rho", colnames(x))
rest.se <- sqrt(diag(varb))[-c(1:2)]
rho.se <- sqrt(varb[2,2])
TW <- (W %*% W) + (t(W) %*% W)
T22 <- sum(diag(TW))
T21A <- sum(diag(TW %*% A))
LMtest <- ((t(r) %*% W %*% r)/s2)^2
LMtest <- LMtest/(T22 - ((T21A^2)*(rho.se^2)))
ase <- TRUE
}
call <- match.call()
ret <- structure(list(type=type, rho=rho,
coefficients=coef.rho, rest.se=rest.se,
LL=LL, s2=s2, SSE=SSE, parameters=(m+2), lm.model=lm.null,
method=method, call=call, residuals=r, opt=optres,
lm.target=lm.lag, fitted.values=fit,
se.fit=NULL, formula=formula, similar=similar,
ase=ase, LLs=LLs, rho.se=rho.se, LMtest=LMtest,
resvar=varb, zero.policy=zero.policy), class=c("sarlm"))
if (zero.policy) {
zero.regs <- attr(listw$neighbours,
"region.id")[which(card(listw$neighbours) == 0)]
if (length(zero.regs) > 0)
attr(ret, "zero.regs") <- zero.regs
}
if (!is.null(na.act))
ret$na.action <- na.act
ret
}
sar.lag.mixed.f <- function(rho, eig, e.a, e.b, e.c, n, quiet)
{
SSE <- e.a - 2*rho*e.b + rho*rho*e.c
s2 <- SSE/n
if (is.complex(eig)) det <- Re(prod(1 - rho*eig))
else det <- prod(1 - rho*eig)
ret <- (log(det) - ((n/2)*log(2*pi)) - (n/2)*log(s2)
- (1/(2*s2))*SSE)
if (!quiet) cat("(eigen) rho:\t", rho, "\tfunction value:\t", ret, "\n")
ret
}
sar.lag.mix.f.sM <- function(rho, W, I, e.a, e.b, e.c, n, tmpmax, quiet)
{
SSE <- e.a - 2*rho*e.b + rho*rho*e.c
s2 <- SSE/n
Jacobian <- log(det(chol((I - rho * W), tmpmax=tmpmax))^2)
gc(FALSE)
ret <- (Jacobian
- ((n/2)*log(2*pi)) - (n/2)*log(s2) - (1/(2*s2))*SSE)
if (!quiet)
cat("(SparseM) rho:\t", rho, "\tfunction value:\t", ret, "\n")
ret
}
dosparse <- function (listw, y, x, wy, K, quiet, tol.opt,
control, method, interval, can.sim, optim) {
similar <- FALSE
m <- ncol(x)
n <- nrow(x)
if (method == "SparseM") {
if (listw$style %in% c("W", "S") && can.sim) {
W <- asMatrixCsrListw(similar.listw(listw))
similar <- TRUE
} else W <- asMatrixCsrListw(listw)
I <- asMatrixCsrI(n)
tmpmax <- sum(card(listw$neighbours)) + n
# tmpmax and gc() calls: Danlin Yu 20041213
gc(FALSE)
}
m <- ncol(x)
n <- nrow(x)
LLs <- vector(mode="list", length=length(K:m))
j <- 1
for (i in K:m) {
# drop bug found by Gilles Spielvogel
thisx <- x[,-i, drop = FALSE]
lm.null <- lm.fit(thisx, y)
lm.w <- lm.fit(thisx, wy)
e.null <- lm.null$residuals
e.w <- lm.w$residuals
e.a <- t(e.null) %*% e.null
e.b <- t(e.w) %*% e.null
e.c <- t(e.w) %*% e.w
if (optim) {
lm.rho <- lm.fit(cbind(thisx, wy), y)
rho <- coef(lm.rho)[length(coef(lm.rho))]
if (rho <= interval[1]) rho <- 0.0
if (rho >= interval[2]) rho <- 0.0
opt <- optim(par=c(rho),
sar.lag.mix.f.sM, method="L-BFGS-B",
lower=interval[1],
upper=interval[2],
control=control,
W=W, I=I, e.a=e.a, e.b=e.b, e.c=e.c, n=n,
tmpmax=tmpmax, quiet=quiet)
if (opt$convergence == 1)
warning("iteration limit reached")
if (opt$convergence == 51) warning(opt$message)
if (opt$convergence == 52) warning(opt$message)
LLs[[j]] <- c(opt$value)
} else {
LLs[[j]] <- optimize(sar.lag.mix.f.sM,
interval=interval, maximum=TRUE, tol=tol.opt, W=W, I=I,
e.a=e.a, e.b=e.b, e.c=e.c, n=n, tmpmax=tmpmax,
quiet=quiet)$objective
}
gc(FALSE)
attr(LLs[[j]], "nall") <- n
attr(LLs[[j]], "nobs") <- n
attr(LLs[[j]], "df") <- (m+2)-1
attr(LLs[[j]], "name") <- colnames(x)[i]
class(LLs[[j]]) <- "logLik"
j <- j + 1
}
lm.null <- lm(y ~ x - 1)
lm.w <- lm.fit(x, wy)
e.null <- lm.null$residuals
e.w <- lm.w$residuals
e.a <- t(e.null) %*% e.null
e.b <- t(e.w) %*% e.null
e.c <- t(e.w) %*% e.w
sn <- listw2sn(listw)
if (optim) {
lm.rho <- lm.fit(cbind(x, wy), y)
rho <- coef(lm.rho)[length(coef(lm.rho))]
if (rho <= interval[1]) rho <- 0.0
if (rho >= interval[2]) rho <- 0.0
opt <- optim(par=c(rho),
sar.lag.mix.f.sM, method="L-BFGS-B",
lower=interval[1],
upper=interval[2],
control=control,
W=W, I=I, e.a=e.a, e.b=e.b, e.c=e.c, n=n,
tmpmax=tmpmax, quiet=quiet)
if (opt$convergence == 1) warning("iteration limit reached")
if (opt$convergence == 51) warning(opt$message)
if (opt$convergence == 52) warning(opt$message)
maximum <- c(opt$par[1])
objective <- c(opt$value)
} else {
opt <- optimize(sar.lag.mix.f.sM,
interval=interval, maximum=TRUE, tol=tol.opt, W=W, I=I,
e.a=e.a, e.b=e.b, e.c=e.c, n=n, tmpmax=tmpmax, quiet=quiet)
maximum <- opt$maximum
objective <- opt$objective
}
gc(FALSE)
res <- list(maximum=maximum, objective=objective, LLs=LLs,
lm.null=lm.null, similar=similar, opt=opt)
}
More information about the R-sig-Geo
mailing list