Spatial Data in R: New Directions

Jul 4, 2017, UseR! tutorial · Edzer Pebesma

install.packages(c("sf", "tidyverse", "devtools"))
devtools::install_github("tidyverse/ggplot2")
library(sf)
## Linking to GEOS 3.5.1, GDAL 2.1.3, proj.4 4.9.2, lwgeom 2.3.2 r15302
(pt = st_point(c(2,4)))
## POINT(2 4)
(pt_bin = st_as_binary(pt))
##  [1] 01 01 00 00 00 00 00 00 00 00 00 00 40 00 00 00 00 00 00 10 40
st_as_sfc(list(pt_bin))[[1]]
## POINT(2 4)
st_dimension(pt)
## [1] 0
st_intersects(pt, pt, sparse = FALSE)
##      [,1]
## [1,] TRUE
str(pt)
## Classes 'XY', 'POINT', 'sfg'  num [1:2] 2 4
str(st_linestring(rbind(c(0,0), c(0,1), c(1,1))))
##  XY [1:3, 1:2] 0 0 1 0 1 1
str(st_polygon(list(rbind(c(0,0), c(0,1), c(1,1), c(0,0)))))
## List of 1
##  $ : num [1:4, 1:2] 0 0 1 0 0 1 1 0
##  - attr(*, "class")= chr [1:3] "XY" "POLYGON" "sfg"
st_crs("+proj=longlat +datum=WGS84")  # "Proj.4 string"
## $epsg
## [1] 4326
## 
## $proj4string
## [1] "+proj=longlat +datum=WGS84 +no_defs"
## 
## attr(,"class")
## [1] "crs"
st_crs(3857)                          # EPSG code
## $epsg
## [1] 3857
## 
## $proj4string
## [1] "+proj=merc +a=6378137 +b=6378137 +lat_ts=0.0 +lon_0=0.0 +x_0=0.0 +y_0=0 +k=1.0 +units=m +nadgrids=@null +wktext +no_defs"
## 
## attr(,"class")
## [1] "crs"
st_crs(3857)$units                    # reveal units
## [1] "m"
st_crs(NA)                            # unknown (assumed planar/Cartesian)
## $epsg
## [1] NA
## 
## $proj4string
## [1] NA
## 
## attr(,"class")
## [1] "crs"
fname = system.file("shape/nc.shp", package = "sf")
nc = read_sf(fname)
print(nc, n = 3)
## Simple feature collection with 100 features and 14 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -84.32385 ymin: 33.88199 xmax: -75.45698 ymax: 36.58965
## epsg (SRID):    4267
## proj4string:    +proj=longlat +datum=NAD27 +no_defs
## First 3 features:
##    AREA PERIMETER CNTY_ CNTY_ID      NAME  FIPS FIPSNO CRESS_ID BIR74
## 1 0.114     1.442  1825    1825      Ashe 37009  37009        5  1091
## 2 0.061     1.231  1827    1827 Alleghany 37005  37005        3   487
## 3 0.143     1.630  1828    1828     Surry 37171  37171       86  3188
##   SID74 NWBIR74 BIR79 SID79 NWBIR79                       geometry
## 1     1      10  1364     0      19 MULTIPOLYGON(((-81.47275543...
## 2     0      10   542     3      12 MULTIPOLYGON(((-81.23989105...
## 3     5     208  3616     6     260 MULTIPOLYGON(((-80.45634460...
plot(nc)
## Warning: plotting the first 10 out of 14 attributes; use max.plot = 14 to
## plot all
(a1 = st_area(nc[1,]))                      # area, using geosphere::areaPolygon
## 1137388604 m^2
(a2 = st_area(st_transform(nc[1,], 32119))) # NC state plane, m
## 1137598162 m^2
(a3 = st_area(st_transform(nc[1,], 2264)))  # NC state plane, US foot
## 12244955726 US_survey_foot^2
units::set_units(a1, km^2)
## 1137.389 km^2
units::set_units(a2, km^2)
## 1137.598 km^2
units::set_units(a3, km^2)
## 1137.598 km^2
nc = st_transform(nc, 32119) # NC state plane, m
methods(class = "sf")
nc1 = nc[1:5,]
st_intersects(nc1, nc1)
## [[1]]
## [1] 1 2
## 
## [[2]]
## [1] 1 2 3
## 
## [[3]]
## [1] 2 3
## 
## [[4]]
## [1] 4
## 
## [[5]]
## [1] 5
st_intersects(nc1, nc1, sparse = FALSE)
##       [,1]  [,2]  [,3]  [,4]  [,5]
## [1,]  TRUE  TRUE FALSE FALSE FALSE
## [2,]  TRUE  TRUE  TRUE FALSE FALSE
## [3,] FALSE  TRUE  TRUE FALSE FALSE
## [4,] FALSE FALSE FALSE  TRUE FALSE
## [5,] FALSE FALSE FALSE FALSE  TRUE
opar = par(mfrow = c(1,2))
ncg = st_geometry(nc[1:3,])
plot(ncg, col = sf.colors(3, categorical = TRUE))
u = st_union(ncg)
plot(u, lwd = 2)
plot(st_intersection(ncg[1], ncg[2]), col = 'red', add = TRUE)
plot(st_buffer(u, 10000), border = 'blue', add = TRUE)
# st_buffer(u, units::set_unit(10, km)) # with sf devel
plot(st_buffer(u, -5000), border = 'green', add = TRUE)
par(opar)
g = st_make_grid(nc, n = c(20,10))
a1 = st_interpolate_aw(nc["BIR74"], g, extensive = FALSE)
## Warning in st_interpolate_aw(nc["BIR74"], g, extensive = FALSE):
## st_interpolate_aw assumes attributes are constant over areas of x
sum(a1$BIR74) / sum(nc$BIR74) # not close to one: property is assumed spatially intensive
## [1] 1.191945
a2 = st_interpolate_aw(nc["BIR74"], g, extensive = TRUE)
## Warning in st_interpolate_aw(nc["BIR74"], g, extensive = TRUE):
## st_interpolate_aw assumes attributes are constant over areas of x
sum(a2$BIR74) / sum(nc$BIR74)
## [1] 1
#a1$intensive = a1$BIR74
#a1$extensive = a2$BIR74
#plot(a1[c("intensive", "extensive")])
a1$what = "intensive"
a2$what = "extensive"
# devtools::install_github("tidyverse/ggplot2")
library(ggplot2)
l = st_cast(nc, "LINESTRING")
ggplot() + geom_sf(data = rbind(a1,a2), aes(fill = BIR74)) + 
    geom_sf(data = l, col = 'lightgray') + facet_grid(what~.) +
    scale_fill_gradientn(colors = sf.colors(10))
# find county matching a coordinate:
pt = st_point(c(472038, 210385))
pt_sfc = st_sfc(pt, crs = st_crs(nc))
pt_sf = st_sf(a = 1, geom = pt_sfc)
st_join(pt_sf, nc)
## Simple feature collection with 1 feature and 15 fields
## geometry type:  POINT
## dimension:      XY
## bbox:           xmin: 472038 ymin: 210385 xmax: 472038 ymax: 210385
## epsg (SRID):    32119
## proj4string:    +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
##   a  AREA PERIMETER CNTY_ CNTY_ID  NAME  FIPS FIPSNO CRESS_ID BIR74 SID74
## 1 1 0.134      1.59  1980    1980 Rowan 37159  37159       80  4606     3
##   NWBIR74 BIR79 SID79 NWBIR79                 geom
## 1    1057  6427     8    1504 POINT(472038 210385)
st_join(nc, pt_sf, left = FALSE) # full join
## Simple feature collection with 1 feature and 15 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: 449767.8 ymin: 194348 xmax: 502410 ymax: 234481
## epsg (SRID):    32119
## proj4string:    +proj=lcc +lat_1=36.16666666666666 +lat_2=34.33333333333334 +lat_0=33.75 +lon_0=-79 +x_0=609601.22 +y_0=0 +ellps=GRS80 +towgs84=0,0,0,0,0,0,0 +units=m +no_defs
##     AREA PERIMETER CNTY_ CNTY_ID  NAME  FIPS FIPSNO CRESS_ID BIR74 SID74
## 50 0.134      1.59  1980    1980 Rowan 37159  37159       80  4606     3
##    NWBIR74 BIR79 SID79 NWBIR79 a                       geometry
## 50    1057  6427     8    1504 1 MULTIPOLYGON(((491842.71265...
# join-with-self:
nrow(nc)
## [1] 100
x = st_join(nc, nc)
nrow(x) # neighbours AND each state with itself
## [1] 590
x = st_join(nc, nc, join = st_touches)
nrow(x) # neighbours, now excluding itself
## [1] 490
st_rook = function(a, b, prepared) st_relate(a, b, pattern = "F***1****")
x = st_join(nc, nc, join = st_rook)
nrow(x) # "rook" neighbours: touch over a line, not in a point
## [1] 462
# which states touch another state in a point only?
sel = unlist(st_relate(nc, nc, pattern = "F***0****"))
plot(st_geometry(nc))
plot(st_geometry(nc[sel,]), add = TRUE, col = 'grey')
pts = st_intersection(nc, nc)
## Warning: attribute variables are assumed to be spatially constant
## throughout all geometries
pts = pts[st_dimension(pts) == 0, ]
plot(st_geometry(nc))
plot(st_geometry(nc[sel,]), add = TRUE, col = 'grey')
plot(st_geometry(pts), add = TRUE, col = '#ff000044', pch = 16)
# how many neighbours does each of these points have?
unique(lengths(st_intersects(pts, nc)))
## [1] 4
p = st_point(c(0,1))
mp = st_multipoint(rbind(c(1,1), c(3,3)))
ls = st_linestring(rbind(c(1,1), c(3,3)))
(gc = st_geometrycollection(list(p, mp)))
## GEOMETRYCOLLECTION(POINT(0 1), MULTIPOINT(1 1, 3 3))
st_sfc(gc)
## Geometry set for 1 feature 
## geometry type:  GEOMETRYCOLLECTION
## dimension:      XY
## bbox:           xmin: 0 ymin: 1 xmax: 3 ymax: 3
## epsg (SRID):    NA
## proj4string:    NA
## GEOMETRYCOLLECTION(POINT(0 1), MULTIPOINT(1 1, ...
opar <- par(mfrow = c(1, 2))
a <- st_polygon(list(cbind(c(0,0,7.5,7.5,0),c(0,-1,-1,0,0))))
b <- st_polygon(list(cbind(c(0,1,2,3,4,5,6,7,7,0),c(1,0,.5,0,0,0.5,-0.5,-0.5,1,1))))
plot(a, ylim = c(-1,1))
title("intersecting two polygons:")
plot(b, add = TRUE, border = 'red')
(i <- st_intersection(a,b))
## GEOMETRYCOLLECTION(POINT(1 0), LINESTRING(4 0, 3 0), POLYGON((5.5 0, 7 0, 7 -0.5, 6 -0.5, 5.5 0)))
plot(a, ylim = c(-1,1))
title("GEOMETRYCOLLECTION")
plot(b, add = TRUE, border = 'red')
plot(i, add = TRUE, col = 'green', lwd = 2)
par(opar)
(gm = st_sfc(p, mp, ls))
## Geometry set for 3 features 
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: 0 ymin: 1 xmax: 3 ymax: 3
## epsg (SRID):    NA
## proj4string:    NA
## POINT(0 1)
## MULTIPOINT(1 1, 3 3)
## LINESTRING(1 1, 3 3)
sf = st_sf(a = 1:3, geom = gm)
st_dimension(sf)
## [1] 0 0 1
sf[st_dimension(sf) == 1,]
## Simple feature collection with 1 feature and 1 field
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: 1 ymin: 1 xmax: 3 ymax: 3
## epsg (SRID):    NA
## proj4string:    NA
##   a                 geom
## 3 3 LINESTRING(1 1, 3 3)
st_cast(sf, "MULTIPOINT")
## Simple feature collection with 3 features and 1 field
## geometry type:  MULTIPOINT
## dimension:      XY
## bbox:           xmin: 0 ymin: 1 xmax: 3 ymax: 3
## epsg (SRID):    NA
## proj4string:    NA
##   a             geometry
## 1 1      MULTIPOINT(0 1)
## 2 2 MULTIPOINT(1 1, 3 3)
## 3 3 MULTIPOINT(1 1, 3 3)
(parts = st_cast(st_sf(a = 1, geometry = st_sfc(gc))))
## Warning in st_cast.sf(st_sf(a = 1, geometry = st_sfc(gc))): repeating
## attributes for all sub-geometries for which they may not be constant
## Simple feature collection with 2 features and 1 field
## geometry type:  GEOMETRY
## dimension:      XY
## bbox:           xmin: 0 ymin: 1 xmax: 3 ymax: 3
## epsg (SRID):    NA
## proj4string:    NA
##   a             geometry
## 1 1           POINT(0 1)
## 2 1 MULTIPOINT(1 1, 3 3)
st_cast(parts)
## Simple feature collection with 2 features and 1 field
## geometry type:  MULTIPOINT
## dimension:      XY
## bbox:           xmin: 0 ymin: 1 xmax: 3 ymax: 3
## epsg (SRID):    NA
## proj4string:    NA
##   a             geometry
## 1 1      MULTIPOINT(0 1)
## 2 1 MULTIPOINT(1 1, 3 3)
st_point()
## POINT(NA NA)
st_geometrycollection()
## GEOMETRYCOLLECTION()
st_sfc(st_point(0:1), st_point(), st_geometrycollection()) %>%
    st_dimension
## [1]  0 NA NA
m1 = attr(methods(class = "sf"), "info")$generic
library(tidyverse)
## + ggplot2 2.2.1.9000        Date: 2017-07-04
## + tibble  1.3.3                R: 3.4.0
## + tidyr   0.6.1               OS: Ubuntu 16.04.2 LTS
## + readr   1.1.0              GUI: X11
## + purrr   0.2.2.9000      Locale: en_US.UTF-8
## + dplyr   0.7.1               TZ: Europe/Berlin
## + stringr 1.2.0           
## + forcats 0.2.0
## Conflicts -----------------------------------------------------------------
## * filter(),  from dplyr, masks stats::filter()
## * lag(),     from dplyr, masks stats::lag()
m2 = attr(methods(class = "sf"), "info")$generic
noquote(setdiff(m2, m1))
##  [1] anti_join   arrange_    arrange     distinct_   distinct   
##  [6] filter_     filter      full_join   gather_     group_by_  
## [11] group_by    inner_join  left_join   mutate_     mutate     
## [16] nest_       rename_     rename      right_join  sample_frac
## [21] sample_n    select_     select      semi_join   separate_  
## [26] slice_      slice       spread_     summarise_  summarise  
## [31] transmute_  transmute   ungroup     unite_
cent <- st_geometry(nc) %>% st_centroid %>% st_coordinates 
nc1 <- nc %>% 
  mutate(area = as.numeric(st_area(.)), longitude = cent[,1]) %>%
  group_by(cut(longitude,4))  %>% 
  summarise(area = sum(area), BIR79 = sum(BIR79), dens = mean(BIR79/area))
ggplot(nc1) + geom_sf(aes(fill = dens))

Spatial Data in R: New Directions

Edzer Pebesma

Jul 4, 2017, UseR! tutorial

A short history of handling spatial data in R

Simple feature access in R: package `sf`

Tidyverse, list-columns

Reference systems

sf: handling real data

reading and writing spatial data

coordinate transformation/conversion

Methods for simple features

book keeping, low-level I/O

logical binary geometry predicates

geometry generating logical operators

higher-level operations: summarise, interpolate, aggregate, st_join

manipulating geometries

convenience functions

handling mixes: `GEOMETRY`, `GEOMETRYCOLLECTION`

empty geometries

Pipe-based workflows and tidyverse

Array data: rasters, spatial time series

Summary/outlook

Spatial Data in R: New Directions

Edzer Pebesma

Jul 4, 2017, UseR! tutorial

A short history of handling spatial data in R

Simple feature access in R: package sf

Tidyverse, list-columns

Reference systems

sf: handling real data

reading and writing spatial data

coordinate transformation/conversion

Methods for simple features

book keeping, low-level I/O

logical binary geometry predicates

geometry generating logical operators

higher-level operations: summarise, interpolate, aggregate, st_join

manipulating geometries

convenience functions

handling mixes: GEOMETRY, GEOMETRYCOLLECTION

empty geometries

Pipe-based workflows and tidyverse

Array data: rasters, spatial time series

Summary/outlook

Simple feature access in R: package `sf`

handling mixes: `GEOMETRY`, `GEOMETRYCOLLECTION`