[R-sig-Geo] spml function and the spatial.error option
Ariel Ortiz-Bobea
aortizbobea at arec.umd.edu
Thu May 3 03:17:44 CEST 2012
Hi,
I'm trying to run a panel model (of N counties over T years) with
contemporaneous/spatial correlation in the errors. So this is a "spatial
error model" in this literature's terminology.
In order to explore this function I'm simulating spatially correlated data
and errors and comparing it to the results from the plm() function with some
Monte Carlo analysis.
However, I'm getting /exactly/ the same (wrong) results when specifying "b"
or "kkp" or "none" in the spatial.error option.
On the other hand, while the empirical distributions I get with spml() are
more efficient (narrower) the standard errors I get are virtually identical
to the "naive" standard errors returned by the plm canned routine.
You will find my reproducible example below, although you need to download
the US shape file:
http://dl.dropbox.com/u/45311184/spatial/co99_d00.shp
So, my very broad question is, what's wrong? More specifically,
- Is it a matter of spatial weights?
- Am I specifying the call for spml incorrectly?
- Why does the "p" "kkp" and "none" options return the same result?
- Why is there a "rho" estimated in the case in which spatial.error="none"?
- Why is the "rho" reported in the "coefficients" table with my beta and not
in a "Error variance parameters" table as it is shown here (page 9):
www.jstatsoft.org/v47/i01/paper
Any help, hints, suggestions would be greatly appreciated.
Let's hope my question and code are clear enough!
Thanks,
Ariel
----------------------------------
----- CODE FROM HERE ON ----
----------------------------------
# Preliminary
#~~~~~~~~
library(rgdal)
library(spdep)
library(plm)
library(splm)
# download US shape file:
http://dl.dropbox.com/u/45311184/spatial/co99_d00.shp
counties <- readOGR("co99_d00.shp","co99_d00")
counties <- counties[counties$STATE %in% c("17"),] # only keep Illinois for
the example
plot(counties)
n <- length(counties at polygons) # number of counties
# Neigbhors and weights
nb <- poly2nb(counties)
nbw <- nb2listw(nb, style="W") # Row-standardised weights
# Parameters
beta = matrix(1, nrow=1, ncol=1)
sig2 = 25
# creating spatial correlation
distmatrix <- spDists(counties, longlat = F)
SD <- max(distmatrix)/5 # standard deviation in m
Sigma1.nor <- distmatrix
Sigma1.nor <- dnorm(Sigma1.nor, mean=0,sd=SD) / dnorm(0,mean=0,sd=SD)
Sigma1.nor <- sig2 * Sigma1.nor
Sigma1.nor[1:10,1:10]
rm(SD)
# Ability to generate multivariate normals
vs.nor <- svd(Sigma1.nor)
vcsqrt.nor <- t(vs.nor$v %*% (t(vs.nor$u) * sqrt(vs.nor$d)))
# function to generate multivariate normals
mrnorm.nor <- function(num.vals, mu.vec) {
k <- ncol(Sigma1.nor)
ans.mat <- sweep(matrix(rnorm(num.vals*k), nrow = num.vals) %*%
vcsqrt.nor,2,mu.vec,"+") # 1 sec
c(t(ans.mat))
}
# Artificial data:
#~~~~~~~~~
# Panel structure
years <- 1980:2010 # time periods
t <- length(years) # number of time periods
dataset <- data.frame( fips =
as.numeric(paste(counties at data$STATE, counties at data$COUNTY, sep="")),
year=rep(years, each=n) )
# The Xs
set.seed(1)
dataset$Xcor <- mrnorm.nor(t , rep(1,times=n) ) # generate contemporaneously
correlated Xs over space
dataset$alpha<- rep(rnorm(n,mean=5,sd=sqrt(sig2)), times=t) # county
effect/intercept
head(dataset)
# Mapping function
choropleth <- function(dataframe, var, years, title,colors){
df <- data.frame(matrix( dataframe[,var][dataframe$year %in% years] ,
ncol=length(years)))
names(df) <- years
temp <- SpatialPolygonsDataFrame(counties, data.frame(counties at data,
df))
arrow = list("SpatialPolygonsRescale", layout.north.arrow(), offset =
c(-76,34), scale = 0.5, which = 2)
spplot(temp, zcol= paste("X",years,sep=""),
par.settings = list(axis.line = list(col = 'transparent')), #
removes lines around plot
names.attr = paste(years), # names on top of each panel e.g. year
col.regions = colorRampPalette(colors)(100), #cm.colors(100), #,
heat.colors(), topo.colors()
colorkey=list(space="right"), scales = list(draw = F), # location
of legend
main = title,
sp.layout = list(arrow), as.table = TRUE)
}
# Visualize Xs for year 1995 for example
choropleth(dataset, "Xcor", 1995, "Correlated X for
1995",colors=c("blue","cyan","red"))
# Same thing for errors (note Xs and Errors are not correlated )
dataset$Ecor <- mrnorm.nor(t, rep(0,times=n))
choropleth(dataset, "Ecor", 1995, "Spatially correlated errors - 1995",
colors=c("red","white","black"))
# Compute theoretical standard errors for beta
Sigma.nor <- bdiag(lapply(1:t, function(x) Sigma1.nor) ) # block diagonal
covariance matrix
D <- outer(dataset$fips, unique(dataset$fips), "==") + 0 # fixed-effect
dummy matrix
colnames(D) <- paste(unique(dataset$fips))
Z <- cbind(dataset$Xcor, D) # the regressor matrix
ZZ_inv <- solve(t(Z) %*% Z)
vcov <- ZZ_inv %*% t(Z) %*% Sigma.nor %*% Z %*% ZZ_inv
rm(Z,D)
sqrt(vcov[1,1]) # teh correct standard error of beta
# Monte carlo analysis:
#~~~~~~~~~~~~~~
ndraws <- 200
fitfe.bsave = matrix(0, nrow=ndraws, ncol=4)
fitfe.sesave = matrix(0, nrow=ndraws, ncol=4)
# Monte Carlo experiment
system.time({ # takes ~120 secs for 200 draws in my 3-year old computer
for (i in 1:ndraws) {
print(i)
# Generate vector of errors
Ecor <- mrnorm.nor(t, rep(0,times=n))
# Compute the implied y variable
dataset$y <- as.matrix(dataset[,c("Xcor")]) %*% beta +
dataset$alpha + Ecor
# Run regressions
fitfe <- plm( y ~ Xcor, data = dataset, model="within", index
= c("fips","year"))
fitfe.b <- spml(formula = y ~ Xcor, data = dataset,
model="within", index = c("fips","year"), listw = nbw, lag = FALSE,
spatial.error = "b")
fitfe.kkp <- spml(formula = y ~ Xcor, data = dataset,
model="within", index = c("fips","year"), listw = nbw, lag = FALSE,
spatial.error = "kkp")
fitfe.none <- spml(formula = y ~ Xcor, data = dataset,
model="within", index = c("fips","year"), listw = nbw, lag = FALSE,
spatial.error = "none")
# Save
fitfe.bsave[i,1] <- coefficients(fitfe)
fitfe.sesave[i,1] <- sqrt(diag(vcov(fitfe)))
fitfe.bsave[i,2] <- fitfe.b$coefficients[2]
fitfe.sesave[i,2] <- sqrt(fitfe.b$vcov[2,2])
fitfe.bsave[i,3] <- fitfe.kkp$coefficients[2]
fitfe.sesave[i,3] <- sqrt(fitfe.kkp$vcov[2,2])
fitfe.bsave[i,4] <- fitfe.none$coefficients[2]
fitfe.sesave[i,4] <- sqrt(fitfe.none$vcov[2,2])
# Clean up
dataset$y <- NULL
}
})
# Something I dont understand: why is there a "rho" estimated in the model
where spatial.error="none"?
# Also, why is this rho in the coefficients table and not in a error
parameters table by itself?
summary(fitfe.none)
# Visualize:
#~~~~~~~~~~~
# Initial plot: distribution of beta with Xcor and Ecor
plot( density(fitfe.bsave[,1]), ylim=c(0,10), xlim=c(0.8,1.3), type="l",
lwd=2, col= "black", xlab="coefficient", ylab="density", main=paste("Monte
Carlo analysis - ",ndraws,"draws"))
# Theoretical distribution of beta
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=beta, sd= sqrt(vcov[1,1])) ,lty="dashed", col="black",lwd=2)
# Average "naive" distributions from canned FE routine
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=mean(fitfe.bsave[,1]), sd= mean(fitfe.sesave[,1] )),lty="solid",
col="red",lwd=2)
# Empirical Spatial Error Model (SER) distribution of beta
lines(density(fitfe.bsave[,2]),lty="solid", col="blue",lwd=2) # option "b"
lines(density(fitfe.bsave[,3]),lty="solid", col="blue",lwd=2) # option "kkp"
lines(density(fitfe.bsave[,4]),lty="solid", col="blue",lwd=2) # option
"none"
# Average Spatial Error Model (SER) distribution of beta from canned routine
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=mean(fitfe.bsave[,2]), sd= mean(fitfe.sesave[,2] )),lty="solid",
col="cyan",lwd=2) # option "b"
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=mean(fitfe.bsave[,2]), sd= mean(fitfe.sesave[,3] )),lty="solid",
col="cyan",lwd=2) # option "kkp"
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=mean(fitfe.bsave[,2]), sd= mean(fitfe.sesave[,4] )),lty="solid",
col="cyan",lwd=2) # option "none"
# Legend and reference line
legend("topright", cex=0.7, title= "Distribution of beta",
legend=c("Empirical FE", "Theoretical FE", "Avg. naive FE", "Empirical SER
(3 lines)", "Avg. reported SER (3 lines)"),
lty=c("solid","dashed","solid","solid","solid"),
lwd=2,col=c("black","black","red","blue","cyan"))
abline(v=beta, lty=3)
-----
Ariel Ortiz-Bobea
PhD Candidate in Agricultural & Resource Economics
University of Maryland - College Park
--
View this message in context: http://r-sig-geo.2731867.n2.nabble.com/spml-function-and-the-spatial-error-option-tp7522051.html
Sent from the R-sig-geo mailing list archive at Nabble.com.
More information about the R-sig-Geo
mailing list