Answers to exercises

Exercise 1

use the dataset found in system.file("shape/nc.shp", package="sf")
Read this dataset with sf::st_read (or sf::read_sf)

library(sf)

## Linking to GEOS 3.7.0, GDAL 2.4.0, PROJ 5.2.0

nc = read_sf(system.file("shape/nc.shp", package="sf"))

plot the dataset, then plot the second attribute

plot(nc)

## Warning: plotting the first 10 out of 14 attributes; use max.plot = 14 to
## plot all

plot(nc[,2])

# or, e.g. 
# library(tidyverse)
# nc %>% select(2) %>% plot

try to manipulate the color scheme and color breaks

plot(nc[,2], pal = viridis::viridis) # note that this is a function, not a set of colors

plot(nc[,2], nbreaks = 5, pal = viridis::viridis)  # note that we don't get exactly 5, because of "pretty" style

plot the first 20 features of the dataset

plot(nc[1:20,2])

create a plot with axes

plot(nc[1:20,2], axes = TRUE)

what is the coordinate reference system of the dataset?

st_crs(nc)

## Coordinate Reference System:
##   EPSG: 4267 
##   proj4string: "+proj=longlat +datum=NAD27 +no_defs"

For the nc dataset, compute the area of the polygons with st_area
compare the areas with the AREA field in the dataset; is there a discrepancy?

a = st_area(nc)
plot(a, nc$AREA)

select the polygon that intersects with POINT (-81.49826 36.4314)

pt = st_as_sfc("POINT (-81.49826 36.4314)", crs = st_crs(nc))
nc[pt,]

## although coordinates are longitude/latitude, st_intersects assumes that they are planar

## Simple feature collection with 1 feature and 14 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -81.74107 ymin: 36.23436 xmax: -81.23989 ymax: 36.58965
## epsg (SRID):    4267
## proj4string:    +proj=longlat +datum=NAD27 +no_defs
## # A tibble: 1 x 15
##    AREA PERIMETER CNTY_ CNTY_ID NAME  FIPS  FIPSNO CRESS_ID BIR74 SID74
##   <dbl>     <dbl> <dbl>   <dbl> <chr> <chr>  <dbl>    <int> <dbl> <dbl>
## 1 0.114      1.44  1825    1825 Ashe  37009  37009        5  1091     1
## # … with 5 more variables: NWBIR74 <dbl>, BIR79 <dbl>, SID79 <dbl>,
## #   NWBIR79 <dbl>, geometry <MULTIPOLYGON [°]>

Exercises (2)

Read the file system.file("tif/L7_ETMs.tif", package = "stars") with stars::read_stars
print the dataset, and describe what it contains
plot the dataset; how are the color breaks chosen?

library(stars)

## Loading required package: abind

r = read_stars(system.file("tif/L7_ETMs.tif", package = "stars"))
r

## stars object with 3 dimensions and 1 attribute
## attribute(s):
##   L7_ETMs.tif    
##  Min.   :  1.00  
##  1st Qu.: 54.00  
##  Median : 69.00  
##  Mean   : 68.91  
##  3rd Qu.: 86.00  
##  Max.   :255.00  
## dimension(s):
##      from  to  offset delta                       refsys point values    
## x       1 349  288776  28.5 +proj=utm +zone=25 +south... FALSE   NULL [x]
## y       1 352 9120761 -28.5 +proj=utm +zone=25 +south... FALSE   NULL [y]
## band    1   6      NA    NA                           NA    NA   NULL

plot(r)

The color breaks are chosen by the default method for breaks, which is “quantile”; this means that (regularly chosen) quantiles of the pixel values are chosen; this means that the map shows equal amounts of each color.

find the maximum pixel value for each of the bands (st_apply)

st_apply(r, 3, max)

## stars object with 1 dimensions and 1 attribute
## attribute(s):
##       max     
##  Min.   :255  
##  1st Qu.:255  
##  Median :255  
##  Mean   :255  
##  3rd Qu.:255  
##  Max.   :255  
## dimension(s):
##      from to offset delta refsys point values
## band    1  6     NA    NA     NA    NA   NULL

find and plot the maximum reflectance over all bands for all pixels

plot(st_apply(r, 1:2, max))

* find and plot the mean reflectance over all bands for all pixels

plot(st_apply(r, 1:2, mean))

* plot the subset of these data with the first 10 rows, the second 10 colums, and the last 3 bands

r[,1:10, 11:20, 4:6]

## stars object with 3 dimensions and 1 attribute
## attribute(s):
##   L7_ETMs.tif    
##  Min.   : 19.00  
##  1st Qu.: 55.00  
##  Median : 72.00  
##  Mean   : 70.13  
##  3rd Qu.: 83.00  
##  Max.   :130.00  
## dimension(s):
##      from to  offset delta                       refsys point values    
## x       1 10  288776  28.5 +proj=utm +zone=25 +south... FALSE   NULL [x]
## y      11 20 9120761 -28.5 +proj=utm +zone=25 +south... FALSE   NULL [y]
## band    4  6      NA    NA                           NA    NA   NULL

use st_crop to crop the dataset to the area 300 m around POINT (293749.5 9115745)

pt = st_as_sfc("POINT (293749.5 9115745)", crs = st_crs(r))
cr = st_crop(r, st_buffer(pt, units::set_units(300, m)))
plot(cr)

* is this cropped dataset still a (rectangular) raster?

yes: the values outside the circle are simply set to NA, and are not plotted.

Exercise 3

data(DE_RB_2005, package = "gstat")
library(spacetime)
library(stars)
s = st_as_stars(as(DE_RB_2005, "STFDF"))
# select a single day, omit NA values:
s1 = na.omit(st_as_sf(s[,,100]))
names(s1)[1] = "PM10"
library(gstat)
variogram(PM10~1, s1)

##     np      dist     gamma dir.hor dir.ver   id
## 1    4  18898.55  7.020488       0       0 var1
## 2   23  34364.92  6.440393       0       0 var1
## 3   32  56345.09 11.255562       0       0 var1
## 4   39  77732.46  7.906209       0       0 var1
## 5   55  99506.37  8.341365       0       0 var1
## 6   71 122440.26  9.902675       0       0 var1
## 7   85 143636.56 15.158993       0       0 var1
## 8   80 164572.07 13.839935       0       0 var1
## 9   91 187694.48 12.821504       0       0 var1
## 10  88 209843.86 14.708895       0       0 var1
## 11  98 231000.08 17.078920       0       0 var1
## 12 110 254882.02 22.609394       0       0 var1
## 13 104 276259.20 22.452120       0       0 var1
## 14 101 297576.88 18.176579       0       0 var1
## 15  91 320326.70 28.031442       0       0 var1

plot(variogram(PM10~1, s1))

Building on the above dataset, create out plot where:

setting the cutoff to a much larger, and a much smaller value

plot(variogram(PM10~1, s1, cutoff = 1e5))

setting the width argument to a larger and smaller value

plot(variogram(PM10~1, s1, width =  10000, cutoff = 5e5))

plot(variogram(PM10~1, s1, width = 100000, cutoff = 5e5))

the number of point pairs involved is also shown in the plot

plot(variogram(PM10~1, s1, width = 100000, cutoff = 5e5, plot.numbers = TRUE))

## Warning in variogram.default(y, locations, X, trend.beta = beta, grid =
## grid, : the following arguments are ignored: TRUE

setting cloud=TRUE - what do the points in this plot represent? (hint: plot the number of point pairs)

plot(variogram(PM10~1, s1, cutoff = 5e5, cloud = TRUE)) # width ignored

Save a variogram you like to an object, and fit a model to it using fit.variogram; use vgm() to list the possible models and show.vgms() to get an idea how they look.

v = variogram(PM10~1, s1)
init = vgm(20, "Sph", 300000, 5)
f = fit.variogram(v, init)

## Warning in fit.variogram(v, init): No convergence after 200 iterations: try
## different initial values?

plot(v, f)

If you get errors on fit, don’t worry:
- plot the variogram with fitted model (like: plot(v, fit), and examine how it looks
- try to choose better initial values for nugget (offset), sill (final level), range (distance where correlation no longer increases)

Exercises 4

Compare the ranges of the kriging and idw map, and the range of the PM10 data. Explain differences and correspondences.

# load German boundaries
data(air, package = "spacetime")
de <- st_transform(st_as_sf(DE_NUTS1), 32632)
bb = st_bbox(de)
dx = seq(bb[1], bb[3], 10000)
dy = seq(bb[4], bb[2], -10000) # decreases!
st_as_stars(matrix(0., length(dx), length(dy))) %>%
  st_set_dimensions(1, dx) %>%
  st_set_dimensions(2, dy) %>%
  st_set_crs(32632) -> grd

i = idw(PM10~1, s1, grd)

## [inverse distance weighted interpolation]

v = variogram(PM10~1, s1)
f = fit.variogram(v, vgm(20, "Lin", 0, 4))
kr = krige(PM10~1, s1, grd, f)

## [using ordinary kriging]

range(i[["var1.pred"]])

## [1]  4.03311 29.36141

range(kr[["var1.pred"]])

## [1]  7.254416 24.375381

range(s1$PM10)

## [1]  3.625 29.667

Kriging smoothes much more, mostly because the variogram model used (here) has a considerable relative nugget (nugget component as fraction of the total variance)

Examine the kriging variance error map.

plot(kr[2])

Compute a standard error map by taking the square root of the kriging variance

kr$stderr = sqrt(kr$var1.var)

Plot this map with the data points overlayed: can you explain the pattern in the errors?

plot(kr["stderr"], reset = FALSE)
plot(st_geometry(de), add = TRUE, col = NA, border = 'red')
plot(s1, add = TRUE, col = 'green', pch = 3)

Repeat the exercise with the same linear model but with a zero nugget effect, and compare the ranges again.

# brute-force: setting the nugget to zero:
f$psill[1] = 0
# somewhat "nicer": fitting the same model, but without a nugget:
f = fit.variogram(v, vgm(20, "Lin", 0))
kr0 = krige(PM10~1, s1, grd, f)

## [using ordinary kriging]

range(kr0[[1]])

## [1]  3.999323 29.490683

Repeat the exercise with the a spherical model with a short range and zero nugget, and compare the ranges again.

# use a short range spherical model, range 100 km, zero nugget:
f = vgm(20, "Sph", 100000)
kr.sph = krige(PM10~1, s1, grd, f)

## [using ordinary kriging]

range(kr.sph[[1]])

## [1]  4.146075 28.374769

Repeat the exercise with the a pure nugget effect model (e.g., vgm(20, “Nug”, 0)), and compare the ranges again.

# fit a pure nugget model:
(f = fit.variogram(v, vgm(1, "Nug", 0)))

##   model    psill range
## 1   Nug 10.16699     0

kr.nug = krige(PM10~1, s1, grd, f)

## [using ordinary kriging]

range(kr.nug[["var1.pred"]])

## [1] 12.50885 12.50885

mean(s1$PM10)

## [1] 12.50885

range(kr.nug[["var1.var"]])

## [1] 10.3234 10.3234

var(s1$PM10)

## [1] 20.12527

mean(variogram(PM10~1, s1, cloud=TRUE)$gamma)

## [1] 16.55404

mean(variogram(PM10~1, s1, cloud=TRUE, cutoff = 1e6)$gamma)

## [1] 20.12527

Compute (approximate) 95% prediction intervals by adding +/- 2 standard errors to the kriging

kr$stderr = sqrt(kr$var1.var)
kr$upper = kr$var1.pred + 2 * kr$stderr
kr$lower = kr$var1.pred - 2 * kr$stderr
plot(merge(kr[c("lower", "upper")]))

Create a map with locations where the threshold of 25 has been exceeded, or might have been exceeded (is inside the prediction interval)

kr$classify = ifelse(kr$lower > 25, 1, ifelse(kr$upper < 25, 3, 2))
plot(kr["classify"]) # legend 1: above 25, 2: ?, 3: below 25

Exercises 5

Carry out a block kriging, setting block=c(10000,10000) to estimate 10 km x 10 km block mean values
Compare the kriged values to those obtained by point kriging, compare the ranges
Compare the standard errors between point and block kriging

f = fit.variogram(v, vgm(20, "Lin", 0, 4))
kr = krige(PM10~1, s1, grd, f) # point kriging

## [using ordinary kriging]

bl = krige(PM10~1, s1, grd, f, block = c(10000, 10000)) # block kriging

## [using ordinary kriging]

kr$point = kr$var1.pred
kr$block = bl$var1.pred
plot(merge(kr[c("point", "block")]))

kr$point.se = sqrt(kr$var1.var)
kr$block.se = sqrt(bl$var1.var)
plot(merge(kr[c("point.se", "block.se")]))

range(kr$point)

## [1]  7.254416 24.375381

range(kr$block)

## [1]  7.282503 24.344914

range(kr$point.se)

## [1] 2.681161 5.069061

range(kr$block.se)

## [1] 1.295284 4.488885