Areal / lattice data.

For typical lattice data, observed values are associated with areas, and the union of the areas observed cover the region of interest. The boundaries of the areas do not follow from the observed phenomen's properties, but typically from external constraints to the observation process.

Examples include:

Problems include:

Typical analysis (see R package spdep) follows a path:

The following is heavily inspired by Ch 17 and 18 of https://r-spatial.org/book .

library(sf)
## Linking to GEOS 3.8.0, GDAL 3.0.2, PROJ 6.2.1
data(pol_pres15, package="spDataLarge")
head(pol_pres15[, c(1, 4, 6)])
## Simple feature collection with 6 features and 3 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 235157.1 ymin: 366913.3 xmax: 281431.7 ymax: 413016.6
## epsg (SRID):    NA
## proj4string:    +proj=tmerc +lat_0=0 +lon_0=18.99999999999998 +k=0.9993 +x_0=500000 +y_0=-5300000 +ellps=GRS80 +towgs84=0,0,0 +units=m +no_defs
##    TERYT                name       types                       geometry
## 1 020101         BOLESŁAWIEC       Urban MULTIPOLYGON (((261089.5 38...
## 2 020102         BOLESŁAWIEC       Rural MULTIPOLYGON (((254150 3837...
## 3 020103            GROMADKA       Rural MULTIPOLYGON (((275346 3846...
## 4 020104        NOWOGRODZIEC Urban/rural MULTIPOLYGON (((251769.8 37...
## 5 020105          OSIECZNICA       Rural MULTIPOLYGON (((263423.9 40...
## 6 020106 WARTA BOLESŁAWIECKA       Rural MULTIPOLYGON (((267030.7 38...
library(spdep)
## Loading required package: sp
## Loading required package: spData
## 
## Attaching package: 'spData'
## The following objects are masked _by_ '.GlobalEnv':
## 
##     x, y
p = poly2nb(pol_pres15)
plot(st_geometry(pol_pres15), border = 'grey')
plot(p, st_centroid(st_geometry(pol_pres15)), pch = 3, cex = .2, add = TRUE)

plot of chunk unnamed-chunk-2

Finding queen neighbours using sf:

st_queen <- function(a, b = a) st_relate(a, b, pattern = "F***T****")
as.nb.sgbp <- function(x, ...) {
  attrs <- attributes(x)
  x <- lapply(x, function(i) { if(length(i) == 0L) 0L else i } )
  attributes(x) <- attrs
  class(x) <- "nb"
  x
}
nb_sf_q <- as.nb.sgbp(st_queen(pol_pres15))
nb_q <- poly2nb(pol_pres15, queen=TRUE)
all.equal(nb_q, nb_q, check.attributes=FALSE)
## [1] TRUE

Moran's I: \[ I = \frac{n \sum_{(2)} w_{ij} z_i z_j}{S_0 \sum_{i=1}^{n} z_i^2} \] where \(x_i, i=1, \ldots, n\) are \(n\) observations on the numeric variable of interest, \(z_i = x_i - \bar{x}\), \(\bar{x} = \sum_{i=1}^{n} x_i / n\), \(\sum_{(2)} = \stackrel{\sum_{i=1}^{n} \sum_{j=1}^{n}}{i \neq j}\), \(w_{ij}\) are the spatial weights, and \(S_0 = \sum_{(2)} w_{ij}\).

First we test a random variable using the Moran test, here under the normality assumption (argument randomisation=FALSE, default TRUE):

x <- rnorm(nrow(pol_pres15))
(mt <- moran.test(x, nb2listw(nb_q), randomisation=FALSE))
## 
##  Moran I test under normality
## 
## data:  x  
## weights: nb2listw(nb_q)    
## 
## Moran I statistic standard deviate = -0.31691, p-value = 0.6243
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##     -0.0043270080     -0.0004009623      0.0001534707

The test variable is \(Z(I)=(I-E(I))/\sqrt{(Var(I))}\), which is standard normally distributed under the \(H_0\) of spatial independence.

Now try a real variable

plot(pol_pres15["I_turnout"])

plot of chunk unnamed-chunk-5

moran.test(pol_pres15$I_turnout, nb2listw(nb_q), randomisation=FALSE)
## 
##  Moran I test under normality
## 
## data:  pol_pres15$I_turnout  
## weights: nb2listw(nb_q)    
## 
## Moran I statistic standard deviate = 55.481, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.6869115119     -0.0004009623      0.0001534707

Alternatively, we could use a randomisation to compute \(Var(I)\):

moran.test(pol_pres15$I_turnout, nb2listw(nb_q), randomisation=TRUE)
## 
##  Moran I test under randomisation
## 
## data:  pol_pres15$I_turnout  
## weights: nb2listw(nb_q)    
## 
## Moran I statistic standard deviate = 55.479, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##      0.6869115119     -0.0004009623      0.0001534786

or use a permutation test:

moran.mc(pol_pres15$I_turnout, nb2listw(nb_q), nsim = 999)
## 
##  Monte-Carlo simulation of Moran I
## 
## data:  pol_pres15$I_turnout 
## weights: nb2listw(nb_q)  
## number of simulations + 1: 1000 
## 
## statistic = 0.68691, observed rank = 1000, p-value = 0.001
## alternative hypothesis: greater

In case we are curious whether this could be a trend in northing and easting, we could try

cc = st_coordinates(st_centroid(pol_pres15))
## Warning in st_centroid.sf(pol_pres15): st_centroid assumes attributes are
## constant over geometries of x
pol_pres15$x = cc[,1]
pol_pres15$y = cc[,2]
lm.morantest(lm(I_turnout~x+y, pol_pres15), nb2listw(nb_q))
## 
##  Global Moran I for regression residuals
## 
## data:  
## model: lm(formula = I_turnout ~ x + y, data = pol_pres15)
## weights: nb2listw(nb_q)
## 
## Moran I statistic standard deviate = 51.164, p-value < 2.2e-16
## alternative hypothesis: greater
## sample estimates:
## Observed Moran I      Expectation         Variance 
##     0.6316916567    -0.0012018116     0.0001530128
library(spatialreg)
## Loading required package: Matrix
## 
## Attaching package: 'spatialreg'
## The following objects are masked from 'package:spdep':
## 
##     anova.sarlm, as_dgRMatrix_listw, as_dsCMatrix_I,
##     as_dsCMatrix_IrW, as_dsTMatrix_listw, as.spam.listw,
##     bptest.sarlm, can.be.simmed, cheb_setup, coef.gmsar,
##     coef.sarlm, coef.spautolm, coef.stsls, create_WX,
##     deviance.gmsar, deviance.sarlm, deviance.spautolm,
##     deviance.stsls, do_ldet, eigen_pre_setup, eigen_setup, eigenw,
##     errorsarlm, fitted.gmsar, fitted.ME_res, fitted.sarlm,
##     fitted.SFResult, fitted.spautolm, get.ClusterOption,
##     get.coresOption, get.mcOption, get.VerboseOption,
##     get.ZeroPolicyOption, GMargminImage, GMerrorsar,
##     griffith_sone, gstsls, Hausman.test, HPDinterval.lagImpact,
##     impacts, intImpacts, Jacobian_W, jacobianSetup, l_max,
##     lagmess, lagsarlm, lextrB, lextrS, lextrW, lmSLX,
##     logLik.sarlm, logLik.spautolm, LR.sarlm, LR1.sarlm,
##     LR1.spautolm, LU_prepermutate_setup, LU_setup, Matrix_J_setup,
##     Matrix_setup, mcdet_setup, MCMCsamp, ME, mom_calc,
##     mom_calc_int2, moments_setup, powerWeights, predict.sarlm,
##     predict.SLX, print.gmsar, print.ME_res, print.sarlm,
##     print.sarlm.pred, print.SFResult, print.spautolm, print.stsls,
##     print.summary.gmsar, print.summary.sarlm,
##     print.summary.spautolm, print.summary.stsls, residuals.gmsar,
##     residuals.sarlm, residuals.spautolm, residuals.stsls,
##     sacsarlm, SE_classic_setup, SE_interp_setup,
##     SE_whichMin_setup, set.ClusterOption, set.coresOption,
##     set.mcOption, set.VerboseOption, set.ZeroPolicyOption,
##     similar.listw, spam_setup, spam_update_setup,
##     SpatialFiltering, spautolm, spBreg_err, spBreg_lag,
##     spBreg_sac, stsls, subgraph_eigenw, summary.gmsar,
##     summary.sarlm, summary.spautolm, summary.stsls, trW,
##     vcov.sarlm, Wald1.sarlm
(e1 = errorsarlm(I_turnout~x+y, pol_pres15, nb2listw(nb_q)))
## 
## Call:
## errorsarlm(formula = I_turnout ~ x + y, data = pol_pres15, listw = nb2listw(nb_q))
## Type: error 
## 
## Coefficients:
##        lambda   (Intercept)             x             y 
##  8.136417e-01  4.493338e-01  6.051636e-08 -6.195580e-08 
## 
## Log likelihood: 4450.706
summary(e1)
## 
## Call:
## errorsarlm(formula = I_turnout ~ x + y, data = pol_pres15, listw = nb2listw(nb_q))
## 
## Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.1437602 -0.0254295 -0.0010257  0.0239821  0.1777382 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                Estimate  Std. Error z value Pr(>|z|)
## (Intercept)  4.4933e-01  2.0061e-02 22.3982  < 2e-16
## x            6.0516e-08  2.5249e-08  2.3968  0.01654
## y           -6.1956e-08  2.6857e-08 -2.3069  0.02106
## 
## Lambda: 0.81364, LR test value: 1914.3, p-value: < 2.22e-16
## Asymptotic standard error: 0.013834
##     z-value: 58.815, p-value: < 2.22e-16
## Wald statistic: 3459.2, p-value: < 2.22e-16
## 
## Log likelihood: 4450.706 for error model
## ML residual variance (sigma squared): 0.0013991, (sigma: 0.037405)
## Number of observations: 2495 
## Number of parameters estimated: 5 
## AIC: -8891.4, (AIC for lm: -6979.1)
(e2 = errorsarlm(I_turnout~types, pol_pres15, nb2listw(nb_q)))
## 
## Call:
## errorsarlm(formula = I_turnout ~ types, data = pol_pres15, listw = nb2listw(nb_q))
## Type: error 
## 
## Coefficients:
##              lambda         (Intercept)          typesUrban 
##          0.83827726          0.44072360          0.03664039 
##    typesUrban/rural typesWarsaw Borough 
##          0.01063703          0.05344443 
## 
## Log likelihood: 4592.881
summary(e2)
## 
## Call:
## errorsarlm(formula = I_turnout ~ types, data = pol_pres15, listw = nb2listw(nb_q))
## 
## Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.13294913 -0.02326745 -0.00057984  0.02204095  0.14572490 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##                      Estimate Std. Error z value  Pr(>|z|)
## (Intercept)         0.4407236  0.0044307 99.4707 < 2.2e-16
## typesUrban          0.0366404  0.0021609 16.9564 < 2.2e-16
## typesUrban/rural    0.0106370  0.0016293  6.5284 6.646e-11
## typesWarsaw Borough 0.0534444  0.0171505  3.1162  0.001832
## 
## Lambda: 0.83828, LR test value: 2214, p-value: < 2.22e-16
## Asymptotic standard error: 0.012727
##     z-value: 65.865, p-value: < 2.22e-16
## Wald statistic: 4338.2, p-value: < 2.22e-16
## 
## Log likelihood: 4592.881 for error model
## ML residual variance (sigma squared): 0.0012299, (sigma: 0.035069)
## Number of observations: 2495 
## Number of parameters estimated: 6 
## AIC: -9173.8, (AIC for lm: -6961.8)

Models used:

Useful spdep vignettes:

More modern approaches, using R-INLA:

Exercises

Produc data

Load the produc dataset by

# get US state delineations:
library(maps) 
states.m = map('state', plot=FALSE, fill=TRUE)
IDs <- sapply(strsplit(states.m$names, ":"), function(x) x[1])
library(maptools)
## Checking rgeos availability: TRUE
states = map2SpatialPolygons(states.m, IDs=IDs)
# get the Produc dataset from plm:
library(plm)
data(Produc)
# convert into sf object:
library(sf)
Produc86 = subset(Produc, year == 1986)
st_geometry(Produc86) = st_as_sfc(states[-8])
plot(Produc86)
## Warning: plotting the first 10 out of 11 attributes; use max.plot = 11 to
## plot all

plot of chunk unnamed-chunk-10

library(spdep)
nb = poly2nb(Produc86, queen = TRUE)
moran.test(Produc86$gsp, nb2listw(nb))
## 
##  Moran I test under randomisation
## 
## data:  Produc86$gsp  
## weights: nb2listw(nb)    
## 
## Moran I statistic standard deviate = 0.28232, p-value = 0.3889
## alternative hypothesis: greater
## sample estimates:
## Moran I statistic       Expectation          Variance 
##       0.003623260      -0.021276596       0.007778976
Produc.ca = subset(Produc, state == "CALIFORNIA")
plot(gsp ~ year, Produc.ca)

plot of chunk unnamed-chunk-12

acf(Produc.ca$gsp)

plot of chunk unnamed-chunk-12

plot(residuals(lm(gsp~year, Produc.ca)))

plot of chunk unnamed-chunk-12

acf(residuals(lm(gsp~year, Produc.ca)))

plot of chunk unnamed-chunk-12

# do this on Monday
names(Produc86)
##  [1] "state"    "year"     "region"   "pcap"     "hwy"      "water"   
##  [7] "util"     "pc"       "gsp"      "emp"      "unemp"    "geometry"
e = errorsarlm(pcap~gsp, Produc86, nb2listw(nb))
## Warning in errorsarlm(pcap ~ gsp, Produc86, nb2listw(nb)): inversion of asymptotic covariance matrix failed for tol.solve = 2.22044604925031e-16 
##   reciprocal condition number = 7.7815e-19 - using numerical Hessian.
summary(e)
## 
## Call:
## errorsarlm(formula = pcap ~ gsp, data = Produc86, listw = nb2listw(nb))
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -15000.87  -2763.06   -495.88   1830.89  27329.75 
## 
## Type: error 
## Coefficients: (asymptotic standard errors) 
##               Estimate Std. Error z value  Pr(>|z|)
## (Intercept) 2.8662e+03 1.0040e+03  2.8546  0.004309
## gsp         3.2578e-01 1.0001e-02 32.5765 < 2.2e-16
## 
## Lambda: -0.30974, LR test value: 1.9597, p-value: 0.16155
## Approximate (numerical Hessian) standard error: 0.2187
##     z-value: -1.4163, p-value: 0.1567
## Wald statistic: 2.0058, p-value: 0.1567
## 
## Log likelihood: -486.4771 for error model
## ML residual variance (sigma squared): 36418000, (sigma: 6034.8)
## Number of observations: 48 
## Number of parameters estimated: 4 
## AIC: 980.95, (AIC for lm: 980.91)
e_alt = lm(pcap~gsp, Produc86)
summary(e_alt)
## 
## Call:
## lm(formula = pcap ~ gsp, data = Produc86)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -14874.9  -2443.0   -555.9   2160.3  28778.5 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 2.855e+03  1.220e+03    2.34   0.0237 *  
## gsp         3.252e-01  1.065e-02   30.54   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6359 on 46 degrees of freedom
## Multiple R-squared:  0.953,  Adjusted R-squared:  0.952 
## F-statistic: 932.5 on 1 and 46 DF,  p-value: < 2.2e-16