7  sf, stars

Exercise 7.1

Find the names of the nc counties that intersect LINESTRING(-84 35,-78 35); use [ for this, and use st_join() for this.

library(sf)
# Linking to GEOS 3.12.1, GDAL 3.8.4, PROJ 9.4.0; sf_use_s2() is TRUE
library(stars)
# Loading required package: abind
(file = system.file("gpkg/nc.gpkg", package="sf"))
# [1] "/home/runner/work/_temp/Library/sf/gpkg/nc.gpkg"
nc = st_read(file)
# Reading layer `nc.gpkg' from data source 
#   `/home/runner/work/_temp/Library/sf/gpkg/nc.gpkg' 
#   using driver `GPKG'
# Simple feature collection with 100 features and 14 fields
# Geometry type: MULTIPOLYGON
# Dimension:     XY
# Bounding box:  xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
# Geodetic CRS:  NAD27
line = st_as_sfc("LINESTRING(-84 35,-78 35)", crs = st_crs(nc))
nc[line,]$NAME
#  [1] "Jackson"     "Mecklenburg" "Macon"       "Sampson"    
#  [5] "Cherokee"    "Cumberland"  "Union"       "Anson"      
#  [9] "Hoke"        "Duplin"      "Richmond"    "Clay"       
# [13] "Scotland"
st_join(st_sf(line), nc)$NAME # left join: `line` should be first argument
#  [1] "Jackson"     "Mecklenburg" "Macon"       "Sampson"    
#  [5] "Cherokee"    "Cumberland"  "Union"       "Anson"      
#  [9] "Hoke"        "Duplin"      "Richmond"    "Clay"       
# [13] "Scotland"

Exercise 7.2

Repeat this after setting sf_use_s2(FALSE), and compute the difference (hint: use setdiff()), and color the counties of the difference using color ‘#00880088’.

# save names first:
sf_use_s2(TRUE)
names_with_s2 = nc[line,]$NAME
sf_use_s2(FALSE)
# Spherical geometry (s2) switched off
nc[line,]$NAME
# although coordinates are longitude/latitude, st_intersects assumes
# that they are planar
#  [1] "Macon"      "Sampson"    "Cherokee"   "Cumberland"
#  [5] "Union"      "Anson"      "Hoke"       "Duplin"    
#  [9] "Richmond"   "Clay"       "Scotland"
(diff = setdiff(names_with_s2, nc[line,]$NAME))
# although coordinates are longitude/latitude, st_intersects assumes
# that they are planar
# [1] "Jackson"     "Mecklenburg"
par(mar = rep(.1, 4))
plot(st_geometry(nc))
plot(st_geometry(nc)[nc$NAME %in% diff], col = "#00880088", add = TRUE)

Exercise 7.3

Plot the two different lines in a single plot; note that R will plot a straight line always straight in the projection currently used; st_segmentize can be used to add points on straight line, or on a great circle for ellipsoidal coordinates.

par(mar = rep(.1, 4))
plot(st_geometry(nc))
plot(st_geometry(nc)[nc$NAME %in% diff], col = "#00880088", add = TRUE)
plot(line, add = TRUE)
plot(st_segmentize(line, units::set_units(10, km)), add = TRUE, col = 'red')

To show that the red line is curved, but only curved in plate carree, and not e.g. in an orthographic projection centered at this region, we can also plot it in an orthographic projection:

l.gc = st_segmentize(line, units::set_units(10, km))
l.pc = st_segmentize(st_set_crs(line, NA), 0.1) %>% st_set_crs(st_crs(l.gc))
o = st_crs("+proj=ortho +lon_0=-80 +lat_0=35")
plot(st_transform(st_geometry(nc), o), axes = TRUE)
plot(st_transform(st_geometry(nc), o)[nc$NAME %in% diff],
     col = "#00880088", add = TRUE)
plot(st_transform(l.gc, o), col = 'red', add = TRUE)
plot(st_transform(l.pc, o), col = 'black', add = TRUE)
plot(st_transform(line, o), col = 'green', add = TRUE)

The fact that the unsegmented line line is straight (R plotted it as straight, it contains only the two endpoints) and that it covers the red line supports that in this plot, the great circle line (red) is plotted straight, and the “straight in plate carree” line is not.

Exercise 7.4

NDVI, normalized differenced vegetation index, is computed as (NIR-R)/(NIR+R), with NIR the near infrared and R the red band. Read the L7_ETMs.tif file into object x, and distribute the band dimensions over attributes by split(x, "band"). Then, add attribute NDVI to this object by using an expression that uses the NIR (band 4) and R (band 3) attributes directly.

library(stars)
(x = read_stars(system.file("tif/L7_ETMs.tif", package = "stars")))
# stars object with 3 dimensions and 1 attribute
# attribute(s):
#              Min. 1st Qu. Median     Mean 3rd Qu. Max.
# L7_ETMs.tif     1      54     69 68.91242      86  255
# dimension(s):
#      from  to  offset delta            refsys point x/y
# x       1 349  288776  28.5 SIRGAS 2000 / ... FALSE [x]
# y       1 352 9120761 -28.5 SIRGAS 2000 / ... FALSE [y]
# band    1   6      NA    NA                NA    NA
(x.spl = split(x)) # note the names of the attributes
# stars object with 2 dimensions and 6 attributes
# attribute(s):
#     Min. 1st Qu. Median     Mean 3rd Qu. Max.
# X1    47      67     78 79.14772      89  255
# X2    32      55     66 67.57465      79  255
# X3    21      49     63 64.35886      77  255
# X4     9      52     63 59.23541      75  255
# X5     1      63     89 83.18266     112  255
# X6     1      32     60 59.97521      88  255
# dimension(s):
#   from  to  offset delta            refsys point x/y
# x    1 349  288776  28.5 SIRGAS 2000 / ... FALSE [x]
# y    1 352 9120761 -28.5 SIRGAS 2000 / ... FALSE [y]
x.spl$NDVI = (x.spl$X4 - x.spl$X3)/(x.spl$X4 + x.spl$X3)
plot(x.spl["NDVI"])

Exercise 7.5

Compute NDVI for the L7_ETMs.tif image by reducing the band dimension, using st_apply and an a function ndvi = function(x) { (x[4]-x[3])/(x[4]+x[3]) }. Plot the result, and write the result to a GeoTIFF.

ndvi_fn = function(x) { (x[4]-x[3])/(x[4]+x[3]) }
ndvi = st_apply(x, 1:2, ndvi_fn)
plot(ndvi)

write_stars(ndvi, "ndvi.tif")

an alternative function is

ndvi_fn = function(x1,x2,x3,x4,x5,x6) { (x4-x3)/(x4+x3) }
ndvi2 = st_apply(x, 1:2, ndvi_fn)
all.equal(ndvi, ndvi2)
# [1] TRUE

This latter function can be much faster, as it is called for chunks of data rather than for individual pixels.

Exercise 7.6

Use st_transform to transform the stars object read from L7_ETMs.tif to EPSG:4326. Print the object. Is this a regular grid? Plot the first band using arguments axes=TRUE and explain why this takes such a long time.

(x_t = st_transform(x, 'EPSG:4326'))
plot(x_t[,,,1], axes = TRUE)

the printed summary shows that this is a curvilinear grid. Plotting takes so long because for curvilinear grids, each cell is converted to a small polygon and then plotted.

Exercise 7.7

Use st_warp to warp the L7_ETMs.tif object to EPSG:4326, and plot the resulting object with axes=TRUE. Why is the plot created much faster than after st_transform?

x_w = st_warp(x, crs = 'EPSG:4326')
plot(x_w[,,,1], reset = FALSE)
plot(st_as_sfc(st_bbox(x_w)), col = NA, border = 'red', add = TRUE)

Plotting is faster now because we created a new regular grid. Note that the grid border does not align perfectly with the square formed by the bounding box (using straight lines in an equidistant rectangular projection): white grid cells indicate the misalignment due to warping/transforming.

Exercise 7.8

Using a vector representation of the raster L7_ETMs, plot the intersection with a circular area around POINT(293716 9113692) with radius 75 m, and compute the area-weighted mean pixel values for this circle. Compare the area-weighted values with those obtained by aggregate using the vector data, and by aggregate using the raster data, using exact=FALSE (default) and exact=TRUE. Explain the differences.

l7 = st_as_sf(x)
st_agr(l7) = "constant"
a = st_as_sfc("POINT(293716 9113692)", crs = st_crs(l7)) %>%
    st_buffer(units::set_units(74, m))
plot(st_intersection(l7, a), key.pos = 4)

(aw = st_interpolate_aw(l7, a, mean, extensive = FALSE))
# Simple feature collection with 1 feature and 6 fields
# Attribute-geometry relationships: aggregate (6)
# Geometry type: POLYGON
# Dimension:     XY
# Bounding box:  xmin: 293642 ymin: 9113618 xmax: 293790 ymax: 9113766
# Projected CRS: SIRGAS 2000 / UTM zone 25S
#   L7_ETMs.tif.V1 L7_ETMs.tif.V2 L7_ETMs.tif.V3 L7_ETMs.tif.V4
# 1       88.54362       73.50115       80.69708       49.98497
#   L7_ETMs.tif.V5 L7_ETMs.tif.V6                       geometry
# 1       111.8357       97.56707 POLYGON ((293790 9113692, 2...
(ag_vector  = aggregate(l7, a, mean))
# Simple feature collection with 1 feature and 6 fields
# Geometry type: POLYGON
# Dimension:     XY
# Bounding box:  xmin: 293642 ymin: 9113618 xmax: 293790 ymax: 9113766
# Projected CRS: SIRGAS 2000 / UTM zone 25S
#   L7_ETMs.tif.V1 L7_ETMs.tif.V2 L7_ETMs.tif.V3 L7_ETMs.tif.V4
# 1        88.0303       72.84848       79.30303       50.33333
#   L7_ETMs.tif.V5 L7_ETMs.tif.V6                       geometry
# 1       111.9697       96.72727 POLYGON ((293790 9113692, 2...
(ag_rasterF = st_as_sf(aggregate(x, a, mean)))
# Simple feature collection with 1 feature and 6 fields
# Geometry type: POLYGON
# Dimension:     XY
# Bounding box:  xmin: 293642 ymin: 9113618 xmax: 293790 ymax: 9113766
# Projected CRS: SIRGAS 2000 / UTM zone 25S
#   L7_ETMs.tif.V1 L7_ETMs.tif.V2 L7_ETMs.tif.V3 L7_ETMs.tif.V4
# 1           88.6          73.55           81.1          49.55
#   L7_ETMs.tif.V5 L7_ETMs.tif.V6                       geometry
# 1          112.1           98.4 POLYGON ((293790 9113692, 2...
(ag_rasterT = st_as_sf(aggregate(x, a, mean, exact = TRUE)))
# Simple feature collection with 1 feature and 6 fields
# Geometry type: POLYGON
# Dimension:     XY
# Bounding box:  xmin: 293642 ymin: 9113618 xmax: 293790 ymax: 9113766
# Projected CRS: SIRGAS 2000 / UTM zone 25S
#   L7_ETMs.tif.V1 L7_ETMs.tif.V2 L7_ETMs.tif.V3 L7_ETMs.tif.V4
# 1       88.54362       73.50115       80.69708       49.98497
#   L7_ETMs.tif.V5 L7_ETMs.tif.V6                       geometry
# 1       111.8357       97.56707 POLYGON ((293790 9113692, 2...
rbind(area_w = st_drop_geometry(aw),
      agg_vec = st_drop_geometry(ag_vector), 
      agg_rasF = st_drop_geometry(ag_rasterF), 
      agg_rasT = st_drop_geometry(ag_rasterT))
#          L7_ETMs.tif.V1 L7_ETMs.tif.V2 L7_ETMs.tif.V3
# area_w         88.54362       73.50115       80.69708
# agg_vec        88.03030       72.84848       79.30303
# agg_rasF       88.60000       73.55000       81.10000
# agg_rasT       88.54362       73.50115       80.69708
#          L7_ETMs.tif.V4 L7_ETMs.tif.V5 L7_ETMs.tif.V6
# area_w         49.98497       111.8357       97.56707
# agg_vec        50.33333       111.9697       96.72727
# agg_rasF       49.55000       112.1000       98.40000
# agg_rasT       49.98497       111.8357       97.56707
all.equal(st_drop_geometry(aw), st_drop_geometry(ag_rasterT),
    check.attributes = FALSE)
# [1] TRUE

Area-weighted interpolation computes the area-weighted mean of the areas shown in the plot; aggregate on the vector values computes the unweighted mean over all polygonized pixels that intersect with the circle (black lines); aggregate on the raster values only averages (unweighted) the cells with pixel centers intersecting with the circle (light red):

plot(st_geometry(l7)[a])
plot(a, add = TRUE, col = NA, border = 'red')
plot(st_as_sf(L7_ETMs[a])[1], add = TRUE, col = '#ff000066')
plot(st_as_sf(L7_ETMs[a], as_points = TRUE)[1], add = TRUE, pch = 3, col = 1)