[R-sig-Geo] Problem with lagsarlm (spdep) with SparseM method

Fri Jan 28 20:35:59 CET 2005

Dear Gilles,

Thanks for this bug report - it turned out that you must have an intercept 
and just one right hand side variable, and in calculating the drop-one 
log-likelihoods for testing, the "thisx" matrix was converted to a vector:

		thisx <- x[,-i]

but should have been:

		thisx <- x[,-i, drop = FALSE]

This is corrected in the attached file; could you try it, please, and let 
me know if this solves your problem (source("lag.spsarlm.R"))?

Best wishes,

Roger

On Fri, 28 Jan 2005, Gilles Spielvogel wrote:

> Hello,
> 
> when running the command:
> 
> lagsarc2<-lagsarlm(vary ~ varx , mydata, c2.listw, na.action=na.fail,
> type="lag", method="SparseM", quiet=FALSE, zero.policy=TRUE)
> 
> R returns the following error:
> 
> Spatial lag model
> Jacobian calculated using sparse matrix techniques using SparseM
> Error in lm.fit(thisx, y) : `x' must be a matrix
> 
> The listw object I use comes originally from a queen contiguity GAL object
> made with GeoDA. The command works perfectly if I use the option "eigen"
> instead of "SparseM", but it takes a lot of time.
> 
> Does anyone have any idea about this problem ?
> 
> Thanks in advance for your help.
> 
> Gilles
> 
> _______________________________________________
> R-sig-Geo mailing list
> R-sig-Geo at stat.math.ethz.ch
> https://stat.ethz.ch/mailman/listinfo/r-sig-geo
> 

-- 
Roger Bivand
Economic Geography Section, Department of Economics, Norwegian School of
Economics and Business Administration, Breiviksveien 40, N-5045 Bergen,
Norway. voice: +47 55 95 93 55; fax +47 55 95 93 93
e-mail: Roger.Bivand at nhh.no

-------------- next part --------------
# Copyright 1998-2004 by Roger Bivand and Andrew Bernat
#

lagsarlm <- function(formula, data = list(), listw, 
	na.action=na.fail, type="lag", method="eigen", quiet=TRUE, 
	zero.policy=FALSE, interval=c(-1,0.999), tol.solve=1.0e-10, 
	tol.opt=.Machine$double.eps^0.5, control, optim=FALSE) {
	mt <- terms(formula, data = data)
	mf <- lm(formula, data, na.action=na.action, 
		method="model.frame")
	na.act <- attr(mf, "na.action")
	if (!inherits(listw, "listw")) stop("No neighbourhood list")
	can.sim <- as.logical(NA)
	if (listw$style %in% c("W", "S")) 
		can.sim <- spdep:::can.be.simmed(listw)
	if (!is.null(na.act)) {
	    subset <- !(1:length(listw$neighbours) %in% na.act)
	    listw <- subset(listw, subset, zero.policy=zero.policy)
	}
	switch(type, lag = if (!quiet) cat("\nSpatial lag model\n"),
	    mixed = if (!quiet) cat("\nSpatial mixed autoregressive model\n"),
	    stop("\nUnknown model type\n"))
	if (!quiet) cat("Jacobian calculated using ")
	switch(method, 
		eigen = if (!quiet) cat("neighbourhood matrix eigenvalues\n"),
	        SparseM = {
		    if (listw$style %in% c("W", "S") && !can.sim)
		    stop("SparseM method requires symmetric weights")
		    if (listw$style %in% c("B", "C", "U") && 
			!(is.symmetric.glist(listw$neighbours, listw$weights)))
		    stop("SparseM method requires symmetric weights")
		    if (!quiet) cat("sparse matrix techniques using SparseM\n")
		},
		stop("...\nUnknown method\n"))
	y <- model.extract(mf, "response")
#	y <- model.response(mf, "numeric")
#	if (any(is.na(y))) stop("NAs in dependent variable")
	x <- model.matrix(mt, mf)
#	if (any(is.na(x))) stop("NAs in independent variable")
	if (NROW(x) != length(listw$neighbours))
		stop("Input data and weights have different dimensions")
	n <- NROW(x)
	m <- NCOL(x)
	xcolnames <- colnames(x)
	K <- ifelse(xcolnames[1] == "(Intercept)", 2, 1)
	wy <- lag.listw(listw, y, zero.policy=zero.policy)
	if (any(is.na(wy))) stop("NAs in lagged dependent variable")
	if (type != "lag") {
		# check if there are enough regressors
	        if (m > 1) {
			WX <- matrix(nrow=n,ncol=(m-(K-1)))
			for (k in K:m) {
				wx <- lag.listw(listw, x[,k], 
				    zero.policy=zero.policy)
				if (any(is.na(wx))) 
				    stop("NAs in lagged independent variable")
				WX[,(k-(K-1))] <- wx
			}
		}
		if (K == 2) {
         	    # unnormalized weight matrices
                	if (!(listw$style == "W")) {
 	      			intercept <- as.double(rep(1, n))
       	        		wx <- lag.listw(listw, intercept, 
					zero.policy = zero.policy)
                    		if (m > 1) {
                        		WX <- cbind(wx, WX)
                    		} else {
			      		WX <- matrix(wx, nrow = n, ncol = 1)
                    		}
                	} 
            	}   
		m1 <- m + 1
		mm <- NCOL(x) + NCOL(WX)
            	xxcolnames <- character(mm)
		for (k in 1:m) xxcolnames[k] <- xcolnames[k]
		for (k in m1:mm) 
		    xxcolnames[k] <- paste("lag.", xcolnames[k-mm+m], sep="")
		x <- cbind(x, WX)
		colnames(x) <- xxcolnames
		m <- NCOL(x)
		rm(wx, WX)
	}
	similar <- FALSE
	if (missing(control)) {
		control <- list(trace=0, fnscale=-1, factr=tol.opt,
			pgtol=tol.opt)
	}
	if (method == "eigen") {
		if (!quiet) cat("Computing eigenvalues ...\n")
		if (listw$style %in% c("W", "S") && can.sim) {
			eig <- eigenw(similar.listw(listw))
			similar <- TRUE
		} else eig <- eigenw(listw)
		if (!quiet) cat("\n")
#range inverted 031031, email from Salvati Nicola (and Rein Halbersma)
		if (is.complex(eig)) eig.range <- 1/range(Re(eig))
		else eig.range <- 1/range(eig)
		lm.null <- lm(y ~ x - 1)
		lm.w <- lm.fit(x, wy)
		e.null <- lm.null$residuals
		e.w <- lm.w$residuals
		e.a <- t(e.null) %*% e.null
		e.b <- t(e.w) %*% e.null
		e.c <- t(e.w) %*% e.w
		if (optim) {
		    lm.rho <- lm.fit(cbind(x, wy), y)
		    rho <- coef(lm.rho)[length(coef(lm.rho))]
		    if (rho < eig.range[1]+.Machine$double.eps) rho <- 0.0
		    if (rho > eig.range[2]-.Machine$double.eps) rho <- 0.0
		    opt <- optim(par=c(rho), sar.lag.mixed.f, 
			method="L-BFGS-B", 
			lower=eig.range[1]+.Machine$double.eps, 
			upper=eig.range[2]-.Machine$double.eps, 
			control=control, 
			eig=eig, e.a=e.a, e.b=e.b, e.c=e.c, n=n, quiet=quiet)
		    if (opt$convergence == 1) warning("iteration limit reached")
		    if (opt$convergence == 51) warning(opt$message)
		    if (opt$convergence == 52) warning(opt$message)
		    rho <- c(opt$par[1])
		    names(rho) <- "rho"
		    LL <- c(opt$value)
		} else {
		    opt <- optimize(sar.lag.mixed.f, 
			lower=eig.range[1]+.Machine$double.eps, 
			upper=eig.range[2]-.Machine$double.eps, maximum=TRUE,
			tol=tol.opt, eig=eig, e.a=e.a, e.b=e.b, e.c=e.c,
			n=n, quiet=quiet)
		    rho <- opt$maximum
		    names(rho) <- "rho"
		    LL <- opt$objective
		    optres <- opt
		}
	} else {
		opt <- dosparse(listw=listw, y=y, x=x, wy=wy, K=K, quiet=quiet,
			tol.opt=tol.opt, control=control, method=method, 
			interval=interval, can.sim=can.sim, optim=optim)
		rho <- c(opt$maximum)
		names(rho) <- "rho"
		LL <- c(opt$objective)
		similar <- opt$similar
		optres <- opt$opt
	}
	lm.lag <- lm((y - rho*wy) ~ x - 1)
	r <- residuals(lm.lag)
	fit <- y - r
	names(r) <- names(fit)
	coef.rho <- coefficients(lm.lag)
	names(coef.rho) <- colnames(x)
	SSE <- deviance(lm.lag)
	s2 <- SSE/n
	if (method != "eigen") {
		LLs <- opt$LLs
		lm.null <- opt$lm.null
		rest.se <- NULL
		rho.se <- NULL
		LMtest <- NULL
		ase <- FALSE
		varb <- FALSE
	} else {
		LLs <- NULL
		tr <- function(A) sum(diag(A))
# beware of complex eigenvalues!
		O <- (eig/(1-rho*eig))^2
		omega <- sum(O)
		if (is.complex(omega)) omega <- Re(omega)
		W <- listw2mat(listw)
		A <- solve(diag(n) - rho*W)
		AW <- A %*% W
		zero <- rbind(rep(0,length(coef.rho)))
		xtawxb <- s2*(t(x) %*% AW %*% x %*% coef.rho)
		V <- s2*(s2*tr(t(AW) %*% AW) +
			t(AW %*% x %*% coef.rho) %*%
			(AW %*% x %*% coef.rho)) + omega*s2^2
		inf1 <- rbind(n/2, s2*tr(AW), t(zero))
		inf2 <- rbind(s2*tr(AW), V, xtawxb)
		xtx <- s2*t(x) %*% x
		inf3 <- rbind(zero, t(xtawxb), xtx)
		inf <- cbind(inf1, inf2, inf3)
		varb <- (s2^2) * solve(inf, tol=tol.solve)
		rownames(varb) <- colnames(varb) <- 
			c("sigma", "rho", colnames(x))
		rest.se <- sqrt(diag(varb))[-c(1:2)]
		rho.se <- sqrt(varb[2,2])
		TW <- (W %*% W) + (t(W) %*% W)
		T22 <- sum(diag(TW))
		T21A <- sum(diag(TW %*% A))
		LMtest <- ((t(r) %*% W %*% r)/s2)^2
		LMtest <- LMtest/(T22 - ((T21A^2)*(rho.se^2)))
		ase <- TRUE
	}
	call <- match.call()
	ret <- structure(list(type=type, rho=rho, 
		coefficients=coef.rho, rest.se=rest.se, 
		LL=LL, s2=s2, SSE=SSE, parameters=(m+2), lm.model=lm.null,
		method=method, call=call, residuals=r, opt=optres,
		lm.target=lm.lag, fitted.values=fit,
		se.fit=NULL, formula=formula, similar=similar,
		ase=ase, LLs=LLs, rho.se=rho.se, LMtest=LMtest, 
		resvar=varb, zero.policy=zero.policy), class=c("sarlm"))
	if (zero.policy) {
		zero.regs <- attr(listw$neighbours, 
			"region.id")[which(card(listw$neighbours) == 0)]
		if (length(zero.regs) > 0)
			attr(ret, "zero.regs") <- zero.regs
	}
	if (!is.null(na.act))
		ret$na.action <- na.act
	ret
}

sar.lag.mixed.f <- function(rho, eig, e.a, e.b, e.c, n, quiet)
{
	SSE <- e.a - 2*rho*e.b + rho*rho*e.c
	s2 <- SSE/n
	if (is.complex(eig)) det <- Re(prod(1 - rho*eig)) 
	else det <- prod(1 - rho*eig)
	ret <- (log(det) - ((n/2)*log(2*pi)) - (n/2)*log(s2)
		- (1/(2*s2))*SSE)
	if (!quiet) cat("(eigen) rho:\t", rho, "\tfunction value:\t", ret, "\n")
	ret
}

sar.lag.mix.f.sM <- function(rho, W, I, e.a, e.b, e.c, n, tmpmax, quiet)
{
	SSE <- e.a - 2*rho*e.b + rho*rho*e.c
	s2 <- SSE/n
	Jacobian <- log(det(chol((I - rho * W), tmpmax=tmpmax))^2)
	gc(FALSE)
	ret <- (Jacobian
		- ((n/2)*log(2*pi)) - (n/2)*log(s2) - (1/(2*s2))*SSE)
	if (!quiet) 
	    cat("(SparseM) rho:\t", rho, "\tfunction value:\t", ret, "\n")
	ret
}

dosparse <- function (listw, y, x, wy, K, quiet, tol.opt, 
	control, method, interval, can.sim, optim) {
	similar <- FALSE
	m <- ncol(x)
	n <- nrow(x)
	if (method == "SparseM") {
		if (listw$style %in% c("W", "S") && can.sim) {
			W <- asMatrixCsrListw(similar.listw(listw))
			similar <- TRUE
		} else W <- asMatrixCsrListw(listw)
		I <- asMatrixCsrI(n)
		tmpmax <- sum(card(listw$neighbours)) + n
		# tmpmax and gc() calls: Danlin Yu 20041213
		gc(FALSE)
	}
	m <- ncol(x)
	n <- nrow(x)
	LLs <- vector(mode="list", length=length(K:m))
	j <- 1
	for (i in K:m) {
		# drop bug found by Gilles Spielvogel
		thisx <- x[,-i, drop = FALSE]
		lm.null <- lm.fit(thisx, y)
		lm.w <- lm.fit(thisx, wy)
		e.null <- lm.null$residuals
		e.w <- lm.w$residuals
		e.a <- t(e.null) %*% e.null
		e.b <- t(e.w) %*% e.null
		e.c <- t(e.w) %*% e.w
		if (optim) { 
			lm.rho <- lm.fit(cbind(thisx, wy), y)
			rho <- coef(lm.rho)[length(coef(lm.rho))]
			if (rho <= interval[1]) rho <- 0.0
			if (rho >= interval[2]) rho <- 0.0
			opt <- optim(par=c(rho), 
			    sar.lag.mix.f.sM, method="L-BFGS-B", 
			    lower=interval[1],
			    upper=interval[2],
			    control=control, 
			    W=W, I=I, e.a=e.a, e.b=e.b, e.c=e.c, n=n, 
			    tmpmax=tmpmax, quiet=quiet)
			if (opt$convergence == 1) 
				warning("iteration limit reached")
			if (opt$convergence == 51) warning(opt$message)
			if (opt$convergence == 52) warning(opt$message)
			LLs[[j]] <- c(opt$value)
		} else {
			LLs[[j]] <- optimize(sar.lag.mix.f.sM,
			interval=interval, maximum=TRUE, tol=tol.opt, W=W, I=I,
			e.a=e.a, e.b=e.b, e.c=e.c, n=n, tmpmax=tmpmax, 
			quiet=quiet)$objective
		}
		gc(FALSE)
		attr(LLs[[j]], "nall") <- n
		attr(LLs[[j]], "nobs") <- n
		attr(LLs[[j]], "df") <- (m+2)-1
		attr(LLs[[j]], "name") <- colnames(x)[i]
		class(LLs[[j]]) <- "logLik"
		j <- j + 1
	}
	lm.null <- lm(y ~ x - 1)
	lm.w <- lm.fit(x, wy)
	e.null <- lm.null$residuals
	e.w <- lm.w$residuals
	e.a <- t(e.null) %*% e.null
	e.b <- t(e.w) %*% e.null
	e.c <- t(e.w) %*% e.w
	sn <- listw2sn(listw)
	if (optim) {
		lm.rho <- lm.fit(cbind(x, wy), y)
		rho <- coef(lm.rho)[length(coef(lm.rho))]
		if (rho <= interval[1]) rho <- 0.0
		if (rho >= interval[2]) rho <- 0.0
		opt <- optim(par=c(rho), 
		    sar.lag.mix.f.sM, method="L-BFGS-B", 
		    lower=interval[1],
		    upper=interval[2],
		    control=control, 
		    W=W, I=I, e.a=e.a, e.b=e.b, e.c=e.c, n=n, 
		    tmpmax=tmpmax, quiet=quiet)	
		if (opt$convergence == 1) warning("iteration limit reached")
		if (opt$convergence == 51) warning(opt$message)
		if (opt$convergence == 52) warning(opt$message)
		maximum <- c(opt$par[1])
		objective <- c(opt$value)
	} else {
		opt <- optimize(sar.lag.mix.f.sM,
		    interval=interval, maximum=TRUE, tol=tol.opt, W=W, I=I,
		    e.a=e.a, e.b=e.b, e.c=e.c, n=n, tmpmax=tmpmax, quiet=quiet)
		maximum <- opt$maximum
		objective <- opt$objective
	}
	gc(FALSE)
	res <- list(maximum=maximum, objective=objective, LLs=LLs,
		lm.null=lm.null, similar=similar, opt=opt)
}