[R-sig-Geo] spml function and the spatial.error option

Thu May 3 03:17:44 CEST 2012

Hi,

I'm trying to run a panel model (of N counties over T years) with
contemporaneous/spatial correlation in the errors. So this is a "spatial
error model" in this literature's terminology.

In order to explore this function I'm simulating spatially correlated data
and errors and comparing it to the results from the plm() function with some
Monte Carlo analysis.

However, I'm getting /exactly/ the same (wrong) results when specifying "b"
or "kkp" or "none" in the spatial.error option. 

On the other hand, while the empirical distributions I get with spml() are
more efficient (narrower) the standard errors I get are virtually identical
to the "naive" standard errors returned by the plm canned routine.

You will find my reproducible example below, although you need to download
the US shape file:
http://dl.dropbox.com/u/45311184/spatial/co99_d00.shp

So, my very broad question is, what's wrong? More specifically, 
- Is it a matter of spatial weights? 
- Am I specifying the call for spml incorrectly? 
- Why does the "p" "kkp" and "none" options return the same result? 
- Why is there a "rho" estimated in the case in which spatial.error="none"?
- Why is the "rho" reported in the "coefficients" table with my beta and not
in a "Error variance parameters" table as it is shown here (page 9):
www.jstatsoft.org/v47/i01/paper

Any help, hints, suggestions would be greatly appreciated.

Let's hope my question and code are clear enough!

Thanks,

Ariel

----------------------------------
----- CODE FROM HERE ON ----
----------------------------------

# Preliminary
#~~~~~~~~
library(rgdal)
library(spdep)
library(plm)
library(splm)

# download US shape file:
http://dl.dropbox.com/u/45311184/spatial/co99_d00.shp
counties  <- readOGR("co99_d00.shp","co99_d00")
counties  <- counties[counties$STATE %in% c("17"),] # only keep Illinois for
the example
plot(counties)
n        <- length(counties at polygons) # number of counties

# Neigbhors and weights
nb  <- poly2nb(counties)
nbw <- nb2listw(nb, style="W") # Row-standardised weights

# Parameters
beta = matrix(1, nrow=1, ncol=1)
sig2 = 25

# creating spatial correlation
distmatrix <- spDists(counties, longlat = F)
SD <- max(distmatrix)/5 # standard deviation in m
Sigma1.nor <- distmatrix
Sigma1.nor <- dnorm(Sigma1.nor, mean=0,sd=SD) / dnorm(0,mean=0,sd=SD) 
Sigma1.nor <- sig2 * Sigma1.nor
Sigma1.nor[1:10,1:10]
rm(SD)

# Ability to generate multivariate normals
vs.nor       <- svd(Sigma1.nor) 
vcsqrt.nor   <- t(vs.nor$v %*% (t(vs.nor$u) * sqrt(vs.nor$d))) 
# function to generate multivariate normals 
mrnorm.nor <- function(num.vals, mu.vec) {
   k        <- ncol(Sigma1.nor)
   ans.mat  <- sweep(matrix(rnorm(num.vals*k), nrow = num.vals) %*%
vcsqrt.nor,2,mu.vec,"+") # 1 sec
   c(t(ans.mat))
}

# Artificial data:
#~~~~~~~~~

# Panel structure
years       <- 1980:2010     # time periods
t           <- length(years) # number of time periods
dataset     <- data.frame( fips        =
as.numeric(paste(counties at data$STATE, counties at data$COUNTY, sep="")),
                           year=rep(years, each=n) )
# The Xs
set.seed(1)
dataset$Xcor <- mrnorm.nor(t , rep(1,times=n) ) # generate contemporaneously
correlated Xs over space
dataset$alpha<- rep(rnorm(n,mean=5,sd=sqrt(sig2)), times=t) # county
effect/intercept   
head(dataset)

# Mapping function 
choropleth <- function(dataframe, var, years, title,colors){   
   df     <- data.frame(matrix( dataframe[,var][dataframe$year %in% years] ,
ncol=length(years)))
   names(df)   <- years
   temp   <- SpatialPolygonsDataFrame(counties, data.frame(counties at data,
df))
   arrow = list("SpatialPolygonsRescale", layout.north.arrow(), offset =
c(-76,34), scale = 0.5, which = 2)
   spplot(temp, zcol= paste("X",years,sep=""), 
          par.settings = list(axis.line = list(col =  'transparent')), #
removes lines around plot
          names.attr = paste(years), # names on top of each panel e.g. year
          col.regions = colorRampPalette(colors)(100), #cm.colors(100), #,
heat.colors(), topo.colors()
          colorkey=list(space="right"), scales = list(draw = F), # location
of legend
          main = title,
          sp.layout = list(arrow), as.table = TRUE)
}

# Visualize Xs for year 1995 for example
choropleth(dataset, "Xcor", 1995, "Correlated X for
1995",colors=c("blue","cyan","red"))
# Same thing for errors (note Xs and Errors are not correlated )
dataset$Ecor <- mrnorm.nor(t, rep(0,times=n))
choropleth(dataset, "Ecor", 1995, "Spatially correlated errors - 1995",
colors=c("red","white","black"))

# Compute theoretical standard errors for beta
Sigma.nor    <- bdiag(lapply(1:t, function(x) Sigma1.nor) ) # block diagonal
covariance matrix
D  <- outer(dataset$fips, unique(dataset$fips), "==") + 0   # fixed-effect
dummy matrix
colnames(D) <- paste(unique(dataset$fips))
Z <- cbind(dataset$Xcor, D) # the regressor matrix
ZZ_inv <- solve(t(Z)  %*% Z)
vcov <- ZZ_inv %*% t(Z) %*% Sigma.nor %*% Z %*% ZZ_inv
rm(Z,D)
sqrt(vcov[1,1]) # teh correct standard error of beta

# Monte carlo analysis:
#~~~~~~~~~~~~~~
ndraws <-  200
fitfe.bsave  = matrix(0, nrow=ndraws, ncol=4) 
fitfe.sesave = matrix(0, nrow=ndraws, ncol=4)
# Monte Carlo experiment
system.time({ # takes ~120 secs for 200 draws in my 3-year old computer
   for (i in 1:ndraws) {  
      print(i)
      # Generate vector of errors
         Ecor <- mrnorm.nor(t, rep(0,times=n))
      # Compute the implied y variable
         dataset$y      <- as.matrix(dataset[,c("Xcor")]) %*% beta +
dataset$alpha + Ecor         
      # Run regressions
         fitfe      <-  plm( y ~ Xcor, data = dataset, model="within", index
= c("fips","year"))
         fitfe.b    <- spml(formula = y ~ Xcor, data = dataset,
model="within", index = c("fips","year"), listw = nbw, lag = FALSE,
spatial.error = "b")
         fitfe.kkp  <- spml(formula = y ~ Xcor, data = dataset,
model="within", index = c("fips","year"), listw = nbw, lag = FALSE,
spatial.error = "kkp")
         fitfe.none <- spml(formula = y ~ Xcor, data = dataset,
model="within", index = c("fips","year"), listw = nbw, lag = FALSE,
spatial.error = "none")
      # Save 
         fitfe.bsave[i,1]  <- coefficients(fitfe)
         fitfe.sesave[i,1] <- sqrt(diag(vcov(fitfe)))
         fitfe.bsave[i,2]  <- fitfe.b$coefficients[2]
         fitfe.sesave[i,2] <- sqrt(fitfe.b$vcov[2,2])
         fitfe.bsave[i,3]  <- fitfe.kkp$coefficients[2]
         fitfe.sesave[i,3] <- sqrt(fitfe.kkp$vcov[2,2])
         fitfe.bsave[i,4]  <- fitfe.none$coefficients[2]
         fitfe.sesave[i,4] <- sqrt(fitfe.none$vcov[2,2])
      # Clean up
         dataset$y      <- NULL
   }
})

# Something I dont understand: why is there a "rho" estimated in the model
where spatial.error="none"?
# Also, why is this rho in the coefficients table and not in a error
parameters table by itself?
summary(fitfe.none)

# Visualize:
#~~~~~~~~~~~

# Initial plot: distribution of beta with Xcor and Ecor
plot( density(fitfe.bsave[,1]),  ylim=c(0,10), xlim=c(0.8,1.3), type="l",
lwd=2, col= "black", xlab="coefficient", ylab="density", main=paste("Monte
Carlo analysis - ",ndraws,"draws"))

# Theoretical distribution of beta
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=beta, sd= sqrt(vcov[1,1])) ,lty="dashed", col="black",lwd=2)                   

# Average "naive" distributions from canned FE routine
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=mean(fitfe.bsave[,1]), sd= mean(fitfe.sesave[,1] )),lty="solid",
col="red",lwd=2) 

# Empirical Spatial Error Model (SER) distribution of beta 
lines(density(fitfe.bsave[,2]),lty="solid", col="blue",lwd=2) # option "b"
lines(density(fitfe.bsave[,3]),lty="solid", col="blue",lwd=2) # option "kkp"
lines(density(fitfe.bsave[,4]),lty="solid", col="blue",lwd=2) # option
"none"

# Average Spatial Error Model (SER) distribution of beta from canned routine
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=mean(fitfe.bsave[,2]), sd= mean(fitfe.sesave[,2] )),lty="solid",
col="cyan",lwd=2) # option "b"
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=mean(fitfe.bsave[,2]), sd= mean(fitfe.sesave[,3] )),lty="solid",
col="cyan",lwd=2) # option "kkp"
lines(seq(beta-2,beta+2,0.0001), dnorm( seq(beta-2,beta+2,0.0001),
mean=mean(fitfe.bsave[,2]), sd= mean(fitfe.sesave[,4] )),lty="solid",
col="cyan",lwd=2) # option "none"

# Legend and reference line
legend("topright", cex=0.7, title= "Distribution of beta",
legend=c("Empirical FE", "Theoretical FE", "Avg. naive FE", "Empirical SER
(3 lines)", "Avg. reported SER (3 lines)"),
lty=c("solid","dashed","solid","solid","solid"),
lwd=2,col=c("black","black","red","blue","cyan"))
abline(v=beta, lty=3)

-----
Ariel Ortiz-Bobea
PhD Candidate in Agricultural & Resource Economics
University of Maryland - College Park
--
View this message in context: http://r-sig-geo.2731867.n2.nabble.com/spml-function-and-the-spatial-error-option-tp7522051.html
Sent from the R-sig-geo mailing list archive at Nabble.com.