All the material presented here, to the extent it is original, is available under CC-BY-SA.
Source files (.Rmd) are found here: https://github.com/edzer/UseR2020
For the exercises it is best to install a fresh copy of stars, using
if (!require(remotes))
install.packages("remotes")
remotes::install_github("r-spatial/stars")
and to install starsdata, which is about 1 Gb in size (don't forget to remove afterwards):
install.packages("starsdata", repos = "https://gis-bigdata.uni-muenster.de/pebesma", type = "source")
data.frames) data cubes?RasterLayer, or single-band GeoTIFF) a data cube?library(stars)
## Loading required package: abind
## Loading required package: sf
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 7.0.0
Panel data are time series data collected for a set of subjects (persons, companies, countries etc). That makes them (i) multidimensional, and (ii) spatiotemporal, since the subjects are typically spatial entities (though they might move).
library(ggplot2)
data(Produc, package = "plm")
head(Produc)
## state year region pcap hwy water util pc gsp emp
## 1 ALABAMA 1970 6 15032.67 7325.80 1655.68 6051.20 35793.80 28418 1010.5
## 2 ALABAMA 1971 6 15501.94 7525.94 1721.02 6254.98 37299.91 29375 1021.9
## 3 ALABAMA 1972 6 15972.41 7765.42 1764.75 6442.23 38670.30 31303 1072.3
## 4 ALABAMA 1973 6 16406.26 7907.66 1742.41 6756.19 40084.01 33430 1135.5
## 5 ALABAMA 1974 6 16762.67 8025.52 1734.85 7002.29 42057.31 33749 1169.8
## 6 ALABAMA 1975 6 17316.26 8158.23 1752.27 7405.76 43971.71 33604 1155.4
## unemp
## 1 4.7
## 2 5.2
## 3 4.7
## 4 3.9
## 5 5.5
## 6 7.7
ggplot(Produc) + geom_raster(aes(y = state, x = year, fill = pcap))
this plot shows the raw pcap (public capital stock) values, which mainly demonstrates that large states are large. Normalizing them by dividing through the temporal means, we see more structure:
(s = st_as_stars(Produc, y_decreasing = FALSE))
## stars object with 2 dimensions and 9 attributes
## attribute(s):
## region pcap hwy water
## Min. :1.000 Min. : 2627 Min. : 1827 Min. : 228.5
## 1st Qu.:3.000 1st Qu.: 7097 1st Qu.: 3858 1st Qu.: 764.5
## Median :5.000 Median : 17572 Median : 7556 Median : 2266.5
## Mean :4.958 Mean : 25037 Mean :10218 Mean : 3618.8
## 3rd Qu.:7.000 3rd Qu.: 27692 3rd Qu.:11267 3rd Qu.: 4318.7
## Max. :9.000 Max. :140217 Max. :47699 Max. :24592.3
## util pc gsp emp
## Min. : 538.5 Min. : 4053 Min. : 4354 Min. : 108.3
## 1st Qu.: 2488.3 1st Qu.: 21651 1st Qu.: 16502 1st Qu.: 475.0
## Median : 7008.8 Median : 40671 Median : 39987 Median : 1164.8
## Mean :11199.5 Mean : 58188 Mean : 61014 Mean : 1747.1
## 3rd Qu.:11598.5 3rd Qu.: 64796 3rd Qu.: 68126 3rd Qu.: 2114.1
## Max. :80728.1 Max. :375342 Max. :464550 Max. :11258.0
## unemp
## Min. : 2.800
## 1st Qu.: 5.000
## Median : 6.200
## Mean : 6.602
## 3rd Qu.: 7.900
## Max. :18.000
## dimension(s):
## from to offset delta refsys point values x/y
## state 1 48 NA NA NA NA ALABAMA,...,WYOMING [x]
## year 1 17 1986 -1 NA NA NULL [y]
s = st_apply(s, 1, function(x) x/mean(x)) %>%
st_set_dimensions(names = c("year", "state"))
s
## stars object with 2 dimensions and 9 attributes
## attribute(s):
## region pcap hwy water
## Min. :1 Min. :0.6705 Min. :0.6181 Min. :0.5294
## 1st Qu.:1 1st Qu.:0.9327 1st Qu.:0.9646 1st Qu.:0.8626
## Median :1 Median :1.0146 Median :1.0088 Median :0.9968
## Mean :1 Mean :1.0000 Mean :1.0000 Mean :1.0000
## 3rd Qu.:1 3rd Qu.:1.0644 3rd Qu.:1.0413 3rd Qu.:1.1286
## Max. :1 Max. :1.4160 Max. :1.1928 Max. :1.6239
## util pc gsp emp
## Min. :0.5385 Min. :0.6231 Min. :0.6215 Min. :0.6055
## 1st Qu.:0.9236 1st Qu.:0.8718 1st Qu.:0.8921 1st Qu.:0.9065
## Median :1.0178 Median :1.0041 Median :1.0012 Median :1.0155
## Mean :1.0000 Mean :1.0000 Mean :1.0000 Mean :1.0000
## 3rd Qu.:1.0727 3rd Qu.:1.1204 3rd Qu.:1.0939 3rd Qu.:1.0920
## Max. :1.7289 Max. :1.4786 Max. :1.6435 Max. :1.4798
## unemp
## Min. :0.4340
## 1st Qu.:0.7989
## Median :0.9633
## Mean :1.0000
## 3rd Qu.:1.1732
## Max. :1.9416
## dimension(s):
## from to offset delta refsys point values
## year 1 17 NA NA NA NA NULL
## state 1 48 NA NA NA NA ALABAMA,...,WYOMING
pr = as.data.frame(s)
head(pr)
## year state region pcap hwy water util pc gsp
## 1 1 ALABAMA 1 1.099395 1.061091 1.166170 1.123685 1.220994 1.269043
## 2 2 ALABAMA 1 1.083229 1.050617 1.137907 1.104474 1.202354 1.228148
## 3 3 ALABAMA 1 1.073425 1.042153 1.128882 1.093014 1.177764 1.182770
## 4 4 ALABAMA 1 1.065874 1.036919 1.126735 1.081551 1.166810 1.107454
## 5 5 ALABAMA 1 1.065665 1.040853 1.119124 1.078759 1.149528 1.057200
## 6 6 ALABAMA 1 1.065680 1.038889 1.120675 1.080527 1.119022 1.077569
## emp unemp
## 1 1.164437 1.217836
## 2 1.135630 1.105994
## 3 1.104277 1.366959
## 4 1.057407 1.739766
## 5 1.044436 1.739766
## 6 1.072367 1.366959
ggplot(pr) + geom_raster(aes(y = state, x = year, fill = pcap))
We can also match the state names to state geometries:
library(maps)
states.m = map('state', plot=FALSE, fill=TRUE)
#IDs <- sapply(strsplit(states.m$names, ":"), function(x) x[1])
st = st_as_sf(states.m)[-8,] # deselect D.C., not in Produc
match(tolower(st_get_dimension_values(s, "state")), sub(" ", "_", st$ID))
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
## [26] 26 27 28 29 30 31 32 33 34 35 36 37 38 39 NA 41 42 43 44 45 46 47 48
this indicates that we do not have to reorder the states... then we can simply set them:
(s_sf = st_set_dimensions(s, "state", values = st_geometry(st)))
## stars object with 2 dimensions and 9 attributes
## attribute(s):
## region pcap hwy water
## Min. :1 Min. :0.6705 Min. :0.6181 Min. :0.5294
## 1st Qu.:1 1st Qu.:0.9327 1st Qu.:0.9646 1st Qu.:0.8626
## Median :1 Median :1.0146 Median :1.0088 Median :0.9968
## Mean :1 Mean :1.0000 Mean :1.0000 Mean :1.0000
## 3rd Qu.:1 3rd Qu.:1.0644 3rd Qu.:1.0413 3rd Qu.:1.1286
## Max. :1 Max. :1.4160 Max. :1.1928 Max. :1.6239
## util pc gsp emp
## Min. :0.5385 Min. :0.6231 Min. :0.6215 Min. :0.6055
## 1st Qu.:0.9236 1st Qu.:0.8718 1st Qu.:0.8921 1st Qu.:0.9065
## Median :1.0178 Median :1.0041 Median :1.0012 Median :1.0155
## Mean :1.0000 Mean :1.0000 Mean :1.0000 Mean :1.0000
## 3rd Qu.:1.0727 3rd Qu.:1.1204 3rd Qu.:1.0939 3rd Qu.:1.0920
## Max. :1.7289 Max. :1.4786 Max. :1.6435 Max. :1.4798
## unemp
## Min. :0.4340
## 1st Qu.:0.7989
## Median :0.9633
## Mean :1.0000
## 3rd Qu.:1.1732
## Max. :1.9416
## dimension(s):
## from to offset delta refsys point
## year 1 17 NA NA NA NA
## sfc 1 48 NA NA WGS 84 FALSE
## values
## year NULL
## sfc MULTIPOLYGON (((-87.46201 3...,...,MULTIPOLYGON (((-109.0511 4...
# plot a single time slice:
(s_sf_y1 = s_sf[,1])
## stars object with 2 dimensions and 9 attributes
## attribute(s):
## region pcap hwy water util
## Min. :1 Min. :0.9677 Min. :0.9235 Min. :1.012 Min. :0.9296
## 1st Qu.:1 1st Qu.:1.0470 1st Qu.:1.0195 1st Qu.:1.162 1st Qu.:1.0174
## Median :1 Median :1.0911 Median :1.0613 Median :1.241 Median :1.0887
## Mean :1 Mean :1.1183 Mean :1.0654 Mean :1.249 Mean :1.1374
## 3rd Qu.:1 3rd Qu.:1.1593 3rd Qu.:1.1196 3rd Qu.:1.306 3rd Qu.:1.2134
## Max. :1 Max. :1.4160 Max. :1.1928 Max. :1.624 Max. :1.7289
## pc gsp emp unemp
## Min. :0.9806 Min. :0.9794 Min. :1.012 Min. :0.5657
## 1st Qu.:1.1667 1st Qu.:1.1610 1st Qu.:1.115 1st Qu.:0.8593
## Median :1.2175 Median :1.2304 Median :1.168 Median :1.0205
## Mean :1.2411 Mean :1.2426 Mean :1.181 Mean :1.0569
## 3rd Qu.:1.3299 3rd Qu.:1.3263 3rd Qu.:1.250 3rd Qu.:1.2191
## Max. :1.4786 Max. :1.6435 Max. :1.480 Max. :1.8478
## dimension(s):
## from to offset delta refsys point
## year 1 1 NA NA NA NA
## sfc 1 48 NA NA WGS 84 FALSE
## values
## year NULL
## sfc MULTIPOLYGON (((-87.46201 3...,...,MULTIPOLYGON (((-109.0511 4...
# drop the obsolete dimension:
(s_sf_y1 = adrop(s_sf_y1))
## stars object with 1 dimensions and 9 attributes
## attribute(s):
## region pcap hwy water util
## Min. :1 Min. :0.9677 Min. :0.9235 Min. :1.012 Min. :0.9296
## 1st Qu.:1 1st Qu.:1.0470 1st Qu.:1.0195 1st Qu.:1.162 1st Qu.:1.0174
## Median :1 Median :1.0911 Median :1.0613 Median :1.241 Median :1.0887
## Mean :1 Mean :1.1183 Mean :1.0654 Mean :1.249 Mean :1.1374
## 3rd Qu.:1 3rd Qu.:1.1593 3rd Qu.:1.1196 3rd Qu.:1.306 3rd Qu.:1.2134
## Max. :1 Max. :1.4160 Max. :1.1928 Max. :1.624 Max. :1.7289
## pc gsp emp unemp
## Min. :0.9806 Min. :0.9794 Min. :1.012 Min. :0.5657
## 1st Qu.:1.1667 1st Qu.:1.1610 1st Qu.:1.115 1st Qu.:0.8593
## Median :1.2175 Median :1.2304 Median :1.168 Median :1.0205
## Mean :1.2411 Mean :1.2426 Mean :1.181 Mean :1.0569
## 3rd Qu.:1.3299 3rd Qu.:1.3263 3rd Qu.:1.250 3rd Qu.:1.2191
## Max. :1.4786 Max. :1.6435 Max. :1.480 Max. :1.8478
## dimension(s):
## from to offset delta refsys point
## sfc 1 48 NA NA WGS 84 FALSE
## values
## sfc MULTIPOLYGON (((-87.46201 3...,...,MULTIPOLYGON (((-109.0511 4...
# convert to sf:
sf = st_as_sf(s_sf_y1)
plot(sf)
The North Carolina SIDS (sudden infant death syndrome) data set, introduced here, is an epidemiological data set containing population (births, BIR), disease cases (SID), and non-white births (NWBIR) for two periods, indicated by the start years 1974 and 1979. The population and time information is spread over columns in a data.frame:
nc = read_sf(system.file("gpkg/nc.gpkg", package="sf"))
nc.df = st_set_geometry(nc, NULL) # m is a regular, non-spatial data.frame
head(nc.df)
## # A tibble: 6 x 14
## AREA PERIMETER CNTY_ CNTY_ID NAME FIPS FIPSNO CRESS_ID BIR74 SID74 NWBIR74
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <dbl> <int> <dbl> <dbl> <dbl>
## 1 0.114 1.44 1825 1825 Ashe 37009 37009 5 1091 1 10
## 2 0.061 1.23 1827 1827 Alle… 37005 37005 3 487 0 10
## 3 0.143 1.63 1828 1828 Surry 37171 37171 86 3188 5 208
## 4 0.07 2.97 1831 1831 Curr… 37053 37053 27 508 1 123
## 5 0.153 2.21 1832 1832 Nort… 37131 37131 66 1421 9 1066
## 6 0.097 1.67 1833 1833 Hert… 37091 37091 46 1452 7 954
## # … with 3 more variables: BIR79 <dbl>, SID79 <dbl>, NWBIR79 <dbl>
We now mold this table into a 100 (counties) x 3 (categories) x 2 (years) array:
mat = as.matrix(nc.df[c("BIR74", "SID74", "NWBIR74", "BIR79", "SID79", "NWBIR79")])
dim(mat) = c(county = 100, var = 3, year = 2) # make it a 3-dimensional array
# set dimension values to the array:
dimnames(mat) = list(county = nc$NAME, var = c("BIR", "SID", "NWBIR"), year = c(1974, 1979))
# convert array into a stars object
(nc.st = st_as_stars(pop = mat))
## stars object with 3 dimensions and 1 attribute
## attribute(s):
## pop
## Min. : 0
## 1st Qu.: 8
## Median : 538
## Mean : 1657
## 3rd Qu.: 1784
## Max. :30757
## dimension(s):
## from to offset delta refsys point values
## county 1 100 NA NA NA NA Ashe,...,Brunswick
## var 1 3 NA NA NA NA BIR , SID , NWBIR
## year 1 2 NA NA NA NA 1974, 1979
after which we can replace the county names with the county geometries:
(nc.geom <- st_set_dimensions(nc.st, 1, st_geometry(nc)))
## stars object with 3 dimensions and 1 attribute
## attribute(s):
## pop
## Min. : 0
## 1st Qu.: 8
## Median : 538
## Mean : 1657
## 3rd Qu.: 1784
## Max. :30757
## dimension(s):
## from to offset delta refsys point
## sfc 1 100 NA NA NAD27 FALSE
## var 1 3 NA NA NA NA
## year 1 2 NA NA NA NA
## values
## sfc MULTIPOLYGON (((-81.47276 3...,...,MULTIPOLYGON (((-78.65572 3...
## var BIR , SID , NWBIR
## year 1974, 1979
Note that we have now two fields filled for the sfc (simple feature geometry) dimension: refsys and points. What do they mean?
We can compute and plot the sums over the years for each county (1) and variable (2):
plot(st_apply(nc.geom, c(1,2), sum), key.pos = 4) # sum population over year
In order to meaningfully compare disease cases, we standardise incidence rates (SIR), by
\[\mbox{SIR}_i=\frac{c_i/p_i}{\sum_i c_i / \sum_i p_i}\]
with \(c_i\) the incidences and \(p_i\) the corresponding population of county \(i\). For SIR, the value one indicates equality to the mean incidence rate. We first compute the global incidence \(m\):
# split out BIR, SID, NWBIR over attributes:
split(nc.geom, 2)
## stars object with 2 dimensions and 3 attributes
## attribute(s):
## BIR SID NWBIR
## Min. : 248 Min. : 0.000 Min. : 1
## 1st Qu.: 1177 1st Qu.: 2.000 1st Qu.: 206
## Median : 2265 Median : 5.000 Median : 742
## Mean : 3762 Mean : 7.515 Mean : 1202
## 3rd Qu.: 4451 3rd Qu.: 9.000 3rd Qu.: 1316
## Max. :30757 Max. :57.000 Max. :11631
## dimension(s):
## from to offset delta refsys point
## sfc 1 100 NA NA NAD27 FALSE
## year 1 2 NA NA NA NA
## values
## sfc MULTIPOLYGON (((-81.47276 3...,...,MULTIPOLYGON (((-78.65572 3...
## year 1974, 1979
# sum by category:
(nc.sum = sapply(split(nc.geom, 2), sum))
## BIR SID NWBIR
## 752354 1503 240362
# denominator: mean incidence ratio, averaged over county & years:
(IR = nc.sum[2]/nc.sum[1])
## SID
## 0.00199773
# standardise each year/counte value by dividing over IR:
nc.SIR = st_apply(nc.geom, c(1,3), function(x) (x[2]/x[1])/IR)
plot(nc.SIR, breaks = c(0,.25,.5,.75,.9,1.1,1.5,2.5,3.5,5),
pal = rev(RColorBrewer::brewer.pal(9, "RdBu")))
#library(mapview)
#mapview(breweries)
See section 4.3.2 here.
Why don't we work on regular data.frame or tbl_df tables?
NA or 0 filled)Why not?
NA or 0 values)(g = read_stars(system.file("external/test.grd", package = "raster")))
## stars object with 2 dimensions and 1 attribute
## attribute(s):
## test.grd
## Min. : 138.7
## 1st Qu.: 294.0
## Median : 371.9
## Mean : 425.6
## 3rd Qu.: 501.0
## Max. :1736.1
## NA's :6022
## dimension(s):
## from to offset delta refsys point values x/y
## x 1 80 178400 40 +proj=sterea +lat_0=52.15... NA NULL [x]
## y 1 115 334000 -40 +proj=sterea +lat_0=52.15... NA NULL [y]
plot(g, col = viridis::viridis(11), breaks = "equal")
Note that:
offset and delta are filled, and geographic coordinates can be computed, e.g. for the x dimension, from 1-based cell index \(i\) by \[x_i = \mbox{offset}_x + (i-1) * \mbox{delta}_x\]delta for y is typically negative: going south row indexes increase while y coordinates decreaserefsys (here: a deprecated Proj4 string), allowing a match to other world coordinates (e.g. plotting with leaflet/mapview)(g.sf = st_as_sf(g, na.rm = FALSE))
## Simple feature collection with 9200 features and 1 field
## geometry type: POLYGON
## dimension: XY
## bbox: xmin: 178400 ymin: 329400 xmax: 181600 ymax: 334000
## CRS: unknown
## First 10 features:
## test.grd geometry
## 1 NA POLYGON ((178400 334000, 17...
## 2 NA POLYGON ((178440 334000, 17...
## 3 NA POLYGON ((178480 334000, 17...
## 4 NA POLYGON ((178520 334000, 17...
## 5 NA POLYGON ((178560 334000, 17...
## 6 NA POLYGON ((178600 334000, 17...
## 7 NA POLYGON ((178640 334000, 17...
## 8 NA POLYGON ((178680 334000, 17...
## 9 NA POLYGON ((178720 334000, 17...
## 10 NA POLYGON ((178760 334000, 17...
plot(g.sf, border = 'grey', pal = viridis::viridis(9), nbreaks = 10)
A lot of raster information comes as multi-layer, the simplest being rgb images:
(r = read_stars(system.file("pictures/Rlogo.jpg", package = "rgdal"))) # the old one
## stars object with 3 dimensions and 1 attribute
## attribute(s):
## Rlogo.jpg
## Min. : 8.0
## 1st Qu.:181.0
## Median :255.0
## Mean :207.9
## 3rd Qu.:255.0
## Max. :255.0
## dimension(s):
## from to offset delta refsys point values x/y
## x 1 200 0 1 NA NA NULL [x]
## y 1 175 175 -1 NA NA NULL [y]
## band 1 3 NA NA NA NA NULL
plot(r, breaks = "equal")
Obviously, such data can be plotted as colors; we will first convert the rgb values to R color values:
(r.rgb = st_rgb(r))
## stars object with 2 dimensions and 1 attribute
## attribute(s):
## rgb3
## Length:35000
## Class :character
## Mode :character
## dimension(s):
## from to offset delta refsys point values x/y
## x 1 200 0 1 NA NA NULL [x]
## y 1 175 175 -1 NA NA NULL [y]
r.rgb[[1]][1:3,1]
## [1] "#FFFFFF" "#FFFFFF" "#FFFFFF"
before plotting it:
plot(r.rgb)
Multi-band data is much more common and goes beyond rgb; an exerpt with the 30m bands of a Landsat-7 image is found here:
L7file = system.file("tif/L7_ETMs.tif", package = "stars")
(L7 = read_stars(L7file))
## stars object with 3 dimensions and 1 attribute
## attribute(s):
## L7_ETMs.tif
## Min. : 1.00
## 1st Qu.: 54.00
## Median : 69.00
## Mean : 68.91
## 3rd Qu.: 86.00
## Max. :255.00
## dimension(s):
## from to offset delta refsys point values x/y
## x 1 349 288776 28.5 UTM Zone 25, Southern Hem... FALSE NULL [x]
## y 1 352 9120761 -28.5 UTM Zone 25, Southern Hem... FALSE NULL [y]
## band 1 6 NA NA NA NA NULL
Plotting this uses histogram stretching over all bands. I think of histogram stretching as HDR in monochrome attribute space: each grey tone covers the same area, color breaks are quantiles of the map layer:
plot(L7)
We can also do the stretching over each band individually, meaning there is no common key:
plot(L7, join_zlim = FALSE)
From these bands we can also make color or false color composites:
par(mfrow = c(1, 2))
plot(L7, rgb = c(3,2,1), reset = FALSE, main = "RGB") # rgb
plot(L7, rgb = c(4,3,2), main = "False color (NIR-R-G)") # false color
Suppose we create a 1-degree grid over parts of Europe:
bb = st_bbox(c(xmin = -10, xmax = 20, ymin = 40, ymax = 60), crs = 4326)
(x = st_as_stars(bb, dx = 1, dy = 1))
## stars object with 2 dimensions and 1 attribute
## attribute(s):
## values
## Min. :0
## 1st Qu.:0
## Median :0
## Mean :0
## 3rd Qu.:0
## Max. :0
## dimension(s):
## from to offset delta refsys point values x/y
## x 1 30 -10 1 WGS 84 NA NULL [x]
## y 1 20 60 -1 WGS 84 NA NULL [y]
We can plot the grid outline e.g. by:
#sf_use_s2(FALSE)
library(rnaturalearth)
ne <- ne_countries(returnclass = "sf", continent = 'europe') %>%
st_set_precision(1e8)
ne$pop_dens = units::set_units(ne$pop_est / st_area(ne), 1/(km^2))
plot(ne["pop_dens"], reset = FALSE, extent = bb)
pop = st_rasterize(ne["pop_dens"], x)
plot(st_as_sf(pop, na.rm = FALSE), add = TRUE, border = 'grey')
We can transform this grid e.g. to the ETRS89 / LAEA projection:
(pop.3035 = st_transform(pop, 3035))
## stars object with 2 dimensions and 1 attribute
## attribute(s):
## pop_dens [1/km^2]
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 78.03
## Mean : 80.40
## 3rd Qu.:123.98
## Max. :417.65
## dimension(s):
## from to offset delta refsys point
## x 1 30 NA NA ETRS89-extended / LAEA Eu... FALSE
## y 1 20 NA NA ETRS89-extended / LAEA Eu... FALSE
## values x/y
## x [30x20] 2680703,...,5127800 [x]
## y [30x20] 1933849,...,4195484 [y]
## curvilinear grid
ne.3035 = st_transform(ne, 3035)
plot(pop.3035, border = 'grey', reset = FALSE)
plot(st_geometry(ne.3035), add = TRUE, border = 'yellow')
Note that the transformed grid is no longer a regular grid (it does not have offset and delta values for x and y), but a curvilinear grid: for every grid cell the longitude/latitude pair is stored. All the grid cell values are exactly retained, their geometry only changed reference system!
If we need to work this curvilinear grid to a regular grid in the new reference system, we can either warp it:
target_grid = st_as_stars(st_bbox(pop.3035)) # add dimensions/cell sizes etc here
(w = st_warp(pop, target_grid)) # or give only a target crs
## stars object with 2 dimensions and 1 attribute
## attribute(s):
## pop_dens [1/km^2]
## Min. : 0.00
## 1st Qu.: 0.00
## Median : 80.68
## Mean : 82.27
## 3rd Qu.:123.98
## Max. :417.65
## NA's :10148
## dimension(s):
## from to offset delta refsys point values x/y
## x 1 265 2680703 9241.65 ETRS89-extended / LAEA Eu... NA NULL [x]
## y 1 245 4195484 -9241.65 ETRS89-extended / LAEA Eu... NA NULL [y]
plot(w, border = 'grey', reset = FALSE)
plot(st_geometry(ne.3035), add = TRUE, border = 'yellow')
Suppose we had worked with population rather than population density, this warping would have caused a shift in total population; another approach would have been to use sf::st_interpolate_aw for area-weighted interpolation for this.
A lot of Earth observation and modelling data from domains such as weather, climate, hydrology, oceanography come with a time dimension. Quite commonly, data cubes are stored in NetCDF (or HDF4, HDF5) formats. We have two examples; the first is a time series with two attributes, concerning monthly total precipitation, downscaled from CMIP:
(w <- system.file("nc/bcsd_obs_1999.nc", package = "stars") %>%
read_stars("data/full_data_daily_2013.nc"))
## pr, tas,
## stars object with 3 dimensions and 2 attributes
## attribute(s):
## pr [mm/m] tas [C]
## Min. : 0.59 Min. :-0.421
## 1st Qu.: 56.14 1st Qu.: 8.899
## Median : 81.88 Median :15.658
## Mean :101.26 Mean :15.489
## 3rd Qu.:121.07 3rd Qu.:21.780
## Max. :848.55 Max. :29.386
## NA's :7116 NA's :7116
## dimension(s):
## from to offset delta refsys point values x/y
## x 1 81 -85 0.125 NA NA NULL [x]
## y 1 33 37.125 -0.125 NA NA NULL [y]
## time 1 12 NA NA POSIXct NA 1999-01-31,...,1999-12-31
plot(w)
The second concerns a time series split over multiple files,
x = c(
"avhrr-only-v2.19810901.nc",
"avhrr-only-v2.19810902.nc",
"avhrr-only-v2.19810903.nc",
"avhrr-only-v2.19810904.nc",
"avhrr-only-v2.19810905.nc",
"avhrr-only-v2.19810906.nc",
"avhrr-only-v2.19810907.nc",
"avhrr-only-v2.19810908.nc",
"avhrr-only-v2.19810909.nc"
)
# see the second vignette:
# install.packages("starsdata", repos = "http://gis-bigdata.uni-muenster.de", type = "source")
file_list = system.file(paste0("netcdf/", x), package = "starsdata")
(y = read_stars(file_list, quiet = TRUE))
## stars object with 4 dimensions and 4 attributes
## attribute(s), summary of first 1e+05 cells:
## sst [°*C] anom [°*C] err [°*C] ice [percent]
## Min. :-1.80 Min. :-4.69 Min. :0.110 Min. :0.010
## 1st Qu.:-1.19 1st Qu.:-0.06 1st Qu.:0.300 1st Qu.:0.730
## Median :-1.05 Median : 0.52 Median :0.300 Median :0.830
## Mean :-0.32 Mean : 0.23 Mean :0.295 Mean :0.766
## 3rd Qu.:-0.20 3rd Qu.: 0.71 3rd Qu.:0.300 3rd Qu.:0.870
## Max. : 9.36 Max. : 3.70 Max. :0.480 Max. :1.000
## NA's :13360 NA's :13360 NA's :13360 NA's :27377
## dimension(s):
## from to offset delta refsys point values x/y
## x 1 1440 0 0.25 NA NA NULL [x]
## y 1 720 90 -0.25 NA NA NULL [y]
## zlev 1 1 0 [m] NA NA NA NULL
## time 1 9 1981-09-01 02:00:00 CEST 1 days POSIXct NA NULL
This variable contains a singular dimension, zlev, which we can remove using adrop:
(z = adrop(y))
## stars object with 3 dimensions and 4 attributes
## attribute(s), summary of first 1e+05 cells:
## sst [°*C] anom [°*C] err [°*C] ice [percent]
## Min. :-1.80 Min. :-4.69 Min. :0.110 Min. :0.010
## 1st Qu.:-1.19 1st Qu.:-0.06 1st Qu.:0.300 1st Qu.:0.730
## Median :-1.05 Median : 0.52 Median :0.300 Median :0.830
## Mean :-0.32 Mean : 0.23 Mean :0.295 Mean :0.766
## 3rd Qu.:-0.20 3rd Qu.: 0.71 3rd Qu.:0.300 3rd Qu.:0.870
## Max. : 9.36 Max. : 3.70 Max. :0.480 Max. :1.000
## NA's :13360 NA's :13360 NA's :13360 NA's :27377
## dimension(s):
## from to offset delta refsys point values x/y
## x 1 1440 0 0.25 NA NA NULL [x]
## y 1 720 90 -0.25 NA NA NULL [y]
## time 1 9 1981-09-01 02:00:00 CEST 1 days POSIXct NA NULL
which we can for instance plot with ggplot:
library(ggplot2)
library(viridis)
## Loading required package: viridisLite
## Loading required package: viridisLite
library(ggthemes)
ggplot() +
geom_stars(data = z[1], alpha = 0.8, downsample = c(10, 10, 1)) +
facet_wrap("time") +
scale_fill_viridis() +
coord_equal() +
theme_map() +
theme(legend.position = "bottom") +
theme(legend.key.width = unit(2, "cm"))