class: center, middle, inverse, title-slide # visualizing
spatial data I ### 2021-10-08 --- class: middle, inverse # Welcome --- ## Announcements - Let me know by the end of class today if you want to work on improving your Project 1 - Decide as a team if you'd like to "own" your Project 1 repo and share it as part of your portfolio publicly, email me to let me know - Get started on HW 3 before lab on Monday so you can get questions answered --- ## Setup .midi[ ```r # load packages library(tidyverse) library(sf) library(lubridate) library(colorblindr) library(gghighlight) library(highcharter) library(fs) library(geosphere) library(rnaturalearth) library(rnaturalearthdata) library(ggspatial) library(dsbox) # set default theme for ggplot2 ggplot2::theme_set(ggplot2::theme_minimal(base_size = 16)) # set default figure parameters for knitr knitr::opts_chunk$set( fig.width = 8, fig.asp = 0.618, fig.retina = 3, dpi = 300, out.width = "60%" ) # dplyr print min and max options(dplyr.print_max = 6, dplyr.print_min = 6) ``` ] --- class: middle, inverse # Wrap up: Visualizing multiple trends --- ## Remember: Air Quality Index - The AQI is the Environmental Protection Agency's index for reporting air quality - Higher values of AQI indicate worse air quality <img src="images/aqi-levels.png" title="AQI Basics for Ozone and Particle Pollution" alt="AQI Basics for Ozone and Particle Pollution" width="100%" /> .footnote[ Source: https://www.airnow.gov/aqi-basics ] --- ## AQI data - Source: [EPA's Daily Air Quality Tracker](https://www.epa.gov/outdoor-air-quality-data/air-data-daily-air-quality-tracker) - 2016 - 2021 AQI (Ozone and PM2.5 combined) for Durham-Chapel Hill, NC core-based statistical area (CBSA), one file per year .midi[ ```r dch_files <- fs::dir_ls(here::here("data/durham-chapel-hill")) ``` ] .midi[ ```r dch <- read_csv(dch_files, na = c(".", "")) ``` ] - 2016 - 2021 AQI (Ozone and PM2.5 combined) for San Francisco-Oakland-Hayward, CA CBSA, one file per year .midi[ ```r sf_files <- fs::dir_ls(here::here("data/san-francisco")) ``` ] .midi[ ```r sf <- read_csv(sf_files, na = c(".", "")) ``` ] --- ## Data cleaning - Durham-Chapel Hill .midi[ ```r dch <- dch %>% janitor::clean_names() %>% mutate(date = mdy(date), aqi_value = as.numeric(aqi_value), good_aqi = if_else(aqi_value <= 50, 1, 0)) %>% filter(!is.na(aqi_value)) %>% arrange(date) %>% mutate(cum_good_aqi = cumsum(good_aqi), .after = aqi_value) ``` ] - San Francisco-Oakland-Hayward .midi[ ```r sf <- sf %>% janitor::clean_names() %>% mutate(date = mdy(date), aqi_value = as.numeric(aqi_value), good_aqi = if_else(aqi_value <= 50, 1, 0)) %>% filter(!is.na(aqi_value)) %>% arrange(date) %>% mutate(cum_good_aqi = cumsum(good_aqi), .after = aqi_value) ``` ] --- ## Visualizing annual trends ```r sf <- sf %>% mutate( year = year(date), month = month(date), day = day(date), fake_date = ymd(as.Date(paste0(2000, "-", month, "-", day))), .after = date ) sf ``` ``` ## # A tibble: 2,105 × 17 ## date year month day fake_date aqi_value cum_good_aqi main_pollutant ## <date> <dbl> <dbl> <int> <date> <dbl> <dbl> <chr> ## 1 2016-01-01 2016 1 1 2000-01-01 32 1 PM2.5 ## 2 2016-01-02 2016 1 2 2000-01-02 37 2 PM2.5 ## 3 2016-01-03 2016 1 3 2000-01-03 45 3 PM2.5 ## 4 2016-01-04 2016 1 4 2000-01-04 33 4 PM2.5 ## 5 2016-01-05 2016 1 5 2000-01-05 27 5 PM2.5 ## 6 2016-01-06 2016 1 6 2000-01-06 39 6 PM2.5 ## # … with 2,099 more rows, and 9 more variables: site_name <chr>, site_id <chr>, ## # source <chr>, x20_year_high_2000_2019 <dbl>, x20_year_low_2000_2019 <dbl>, ## # x5_year_average_2015_2019 <dbl>, date_of_20_year_high <chr>, ## # date_of_20_year_low <chr>, good_aqi <dbl> ``` --- ## Visualizing annual trends .panelset.sideways[ .panel[.panel-name[Code] ```r ggplot(sf, aes(x = fake_date, y = aqi_value, group = year, color = factor(year))) + geom_line() + scale_x_date(date_labels = "%b") + scale_color_OkabeIto() + labs( x = NULL, y = "AQI", color = "Year", title = "Ozone and PM2.5 Daily AQI Values", subtitle = "San Francisco-Oakland-Hayward, CA", caption = "\nSource: EPA Daily Air Quality Tracker" ) + theme( plot.title.position = "plot", panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank() ) ``` ] .panel[.panel-name[Plot] <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-12-1.png" width="100%" /> ] ] --- ## Highlighting Highlight a given year .panelset.sideways[ .panel[.panel-name[Code] ```r ggplot(sf, aes(x = fake_date, y = aqi_value, group = year)) + geom_line() + * gghighlight(year == 2018, label_key = year) + scale_x_date(date_labels = "%b") + labs( x = NULL, y = "AQI", color = "Year", title = "Ozone and PM2.5 Daily AQI Values", subtitle = "San Francisco-Oakland-Hayward, CA", caption = "\nSource: EPA Daily Air Quality Tracker" ) + theme( plot.title.position = "plot", panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank() ) ``` ] .panel[.panel-name[Plot] <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-13-1.png" width="100%" /> ] ] --- ## Highlighting Customizing highlighting style .panelset.sideways[ .panel[.panel-name[Code] ```r ggplot(sf, aes(x = fake_date, y = aqi_value, group = year)) + * geom_line(color = "firebrick4", size = 1) + gghighlight( year == 2018, label_key = year, * unhighlighted_params = list(size = 0.2, color = "darkgray") ) + scale_x_date(date_labels = "%b") + labs( x = NULL, y = "AQI", color = "Year", title = "Ozone and PM2.5 Daily AQI Values", subtitle = "San Francisco-Oakland-Hayward, CA", caption = "\nSource: EPA Daily Air Quality Tracker" ) + theme( plot.title.position = "plot", panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank() ) ``` ] .panel[.panel-name[Plot] <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-14-1.png" width="100%" /> ] ] --- ## Highlighting Customizing labels .small[ .panelset.sideways[ .panel[.panel-name[Code] ```r ggplot(sf, aes(x = fake_date, y = aqi_value, group = year)) + geom_line(color = "firebrick4", size = 1) + gghighlight( year == 2018, label_key = year, unhighlighted_params = list(size = 0.2, color = "darkgray"), * use_direct_label = FALSE ) + * annotate( * "label", x = ymd("2000-12-15"), y = 225, * label = "2018", fill = "firebrick4", color = "white" * ) + scale_x_date(date_labels = "%b") + labs( x = NULL, y = "AQI", color = "Year", title = "Ozone and PM2.5 Daily AQI Values", subtitle = "San Francisco-Oakland-Hayward, CA", caption = "\nSource: EPA Daily Air Quality Tracker" ) + theme( plot.title.position = "plot", panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank() ) ``` ] .panel[.panel-name[Plot] <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-15-1.png" width="100%" /> ] ] ] --- ## Highlighting Highlight any years where the average AQI is over 50 .midi[ .panelset.sideways[ .panel[.panel-name[Code] ```r ggplot(sf, aes(x = fake_date, y = aqi_value, group = year, color = factor(year))) + geom_line(show.legend = FALSE) + gghighlight( * mean(aqi_value) > 50, label_key = year, use_direct_label = FALSE ) + scale_x_date(date_labels = "%b") + scale_color_OkabeIto(darken = 0.5) + labs( x = NULL, y = "AQI", color = "Year", title = "Ozone and PM2.5 Daily AQI Values", subtitle = "San Francisco-Oakland-Hayward, CA", caption = "\nSource: EPA Daily Air Quality Tracker" ) + theme( plot.title.position = "plot", panel.grid.minor.y = element_blank(), panel.grid.minor.x = element_blank() ) + * facet_wrap(~ year) ``` ] .panel[.panel-name[Plot] <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-16-1.png" width="100%" /> ] ] ] --- class: middle, inverse # Wrap up: Interactive plots --- ## Interactive plots Using the **highcharter** package .panelset.sideways[ .panel[.panel-name[Code] ```r highchart(type = "stock") %>% hc_add_series( data = dch, hcaes(x = date, y = aqi_value), type = "line", color = "#4E72E3", showInLegend = FALSE ) %>% hc_title( text = "Ozone and PM2.5 Daily AQI Values", align = "left" ) %>% hc_subtitle( text = "Durham-Chapel Hill, NC", align = "left" ) %>% hc_rangeSelector(enabled = FALSE) ``` ] .panel[.panel-name[Plot]
] ] --- class: middle, inverse # Geospatial data in the real world --- ## Visualizing geographic areas Without any projection, on the cartesian coordinate system ```r world_map <- ggplot(map_data("world"), aes(long, lat, group = group)) + geom_polygon(fill = "white", color = "#3c3b6e", size = 0.3) + labs(x = NULL, y = NULL) world_map ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-18-1.png" width="60%" /> --- ## Mercator projection Meridians are equally spaced and vertical, parallels are horizontal lines whose spacing increases the further we move away from the equator ```r world_map + * coord_map(projection = "mercator") ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-19-1.png" width="60%" /> --- ## Mercator projection .hand[without the weird straight lines through the earth!] ```r world_map + coord_map( projection = "mercator", * xlim = c(-180, 180) ) ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-20-1.png" width="55%" /> --- ## Sinusoidal projection Parallels are equally spaced ```r world_map + * coord_map(projection = "sinusoidal", xlim = c(-180, 180)) ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-21-1.png" width="60%" /> --- ## Orthographic projection Viewed from infinity ```r world_map + * coord_map(projection = "orthographic") ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-22-1.png" width="60%" /> --- ## Mollweide projection Equal-area, hemisphere is a circle ```r world_map + * coord_map(projection = "mollweide", xlim = c(-180, 180)) ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-23-1.png" width="60%" /> --- ## Visualizing distances .task[ Draw a line between Istanbul and Los Angeles. ] ```r cities <- tribble( ~city, ~long, ~lat, "istanbul", 28.9784, 41.0082, "los angeles", -118.243, 34.0522, ) ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-25-1.png" width="50%" /> --- ## Visualizing distances As if the earth is flat .panelset.sideways[ .panel[.panel-name[Code] ```r world_map + geom_point( data = cities, aes(x = long, y = lat, group = NULL), size = 2, color = "red" ) + geom_line( data = cities, aes(x = long, y = lat, group = NULL), size = 1, color = "red" ) ``` ] .panel[.panel-name[Plot] <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-26-1.png" width="100%" /> ] ] --- ## Visualizing distances Based on a spherical model of the earth <img src="12-visualize-spatial-I_files/figure-html/both-distances-1.png" width="60%" /> --- ## Intermediate points on the great circle ```r gc <- geosphere::gcIntermediate( p1 = cities %>% filter(city == "istanbul") %>% select(-city), p2 = cities %>% filter(city == "los angeles") %>% select(-city), n = 100, addStartEnd = TRUE ) %>% as_tibble() ``` ```r gc ``` ``` ## # A tibble: 102 × 2 ## lon lat ## <dbl> <dbl> ## 1 29.0 41.0 ## 2 28.4 41.9 ## 3 27.8 42.8 ## 4 27.1 43.6 ## 5 26.5 44.5 ## 6 25.8 45.3 ## # … with 96 more rows ``` --- ## Plotting both distances .panelset.sideways[ .panel[.panel-name[Code] ```r world_map + geom_point( data = cities, aes(x = long, y = lat, group = NULL), size = 2, color = "red" ) + geom_line( data = cities, aes(x = long, y = lat, group = NULL), size = 1, color = "red" ) + geom_line( data = gc, aes(x = lon, y = lat, group = NULL), size = 1, color = "red", linetype = "dashed" ) ``` ] .panel[.panel-name[Plot] <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-29-1.png" width="100%" /> ] ] --- ## Dateline .task[ How long does it take to fly from the Western most point in the US to the Eastern most point? ] <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-30-1.png" width="75%" /> --- class: middle, inverse # Using **sf** --- ## Simple Features for R <img src="images/sf.jpeg" title="Simple features for R" alt="Simple features for R" width="60%" /> .footnote[ Illustration by Allison Horst ] --- ## Simple features <img src="images/simple-features.png" title="Simple features" alt="Simple features" width="70%" /> .footnote[ Source: [Simple Features for R](https://r-spatial.github.io/sf/articles/sf1.html#sf-objects-with-simple-features-1) ] --- ## The sf package .pull-left[ A package that provides simple features access for R - represents simple features as records in a `data.frame` or `tibble` with a `geometry` list-column - represents natively in R all 17 simple feature types for all dimensions - ... <br> Learn more at [r-spatial.github.io/sf](https://r-spatial.github.io/sf). ] .pull-right[ <img src="images/sf-hex.gif" title="Hex logo for sf" alt="Hex logo for sf" width="50%" /> ] --- ## Get world data Using the **rnaturalearth** package ```r world <- ne_countries(scale = "medium", returnclass = "sf") class(world) ``` ``` ## [1] "sf" "data.frame" ``` --- ## sf geometry .small[ ```r world %>% select(geometry) ``` ``` ## Simple feature collection with 241 features and 0 fields ## Geometry type: MULTIPOLYGON ## Dimension: XY ## Bounding box: xmin: -180 ymin: -89.99893 xmax: 180 ymax: 83.59961 ## CRS: +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0 ## First 10 features: ## geometry ## 1 MULTIPOLYGON (((-69.89912 1... ## 2 MULTIPOLYGON (((74.89131 37... ## 3 MULTIPOLYGON (((14.19082 -5... ## 4 MULTIPOLYGON (((-63.00122 1... ## 5 MULTIPOLYGON (((20.06396 42... ## 6 MULTIPOLYGON (((20.61133 60... ## 7 MULTIPOLYGON (((1.706055 42... ## 8 MULTIPOLYGON (((53.92783 24... ## 9 MULTIPOLYGON (((-64.54917 -... ## 10 MULTIPOLYGON (((45.55234 40... ``` ] --- ## Map the world with sf ```r ggplot(data = world) + * geom_sf() ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-36-1.png" width="60%" /> --- ## Plays nicely with ggplot2 ```r ggplot(data = world) + geom_sf(fill = "cornsilk", size = 0.2) + labs(x = "Longitude", y = "Latitude", title = "World map") + theme(panel.background = element_rect("lightblue")) ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-37-1.png" width="60%" /> --- ## Plays nicely with ggplot2 ```r ggplot(data = world) + geom_sf(aes(fill = pop_est)) + scale_fill_viridis_c(option = "plasma", trans = "sqrt") ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-38-1.png" width="60%" /> --- ## Projections with sf ```r ggplot(data = world) + geom_sf() + * coord_sf( * crs = "+proj=laea +lat_0=52 +lon_0=10 +x_0=4321000 +y_0=3210000 +ellps=GRS80 +units=m +no_defs " * ) ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-39-1.png" width="60%" /> --- ## Scale bar and North arrow Using the **ggspatial** package ```r ggplot(data = world) + geom_sf(fill = "cornsilk") + annotation_scale(location = "bl", width_hint = 0.4) + annotation_north_arrow( location = "bl", which_north = "true", pad_x = unit(0.5, "in"), pad_y = unit(0.3, "in"), style = north_arrow_fancy_orienteering) + coord_sf(xlim = c(24, 45), ylim = c(32, 43)) ``` ``` ## Scale on map varies by more than 10%, scale bar may be inaccurate ``` <img src="12-visualize-spatial-I_files/figure-html/unnamed-chunk-40-1.png" width="60%" />