#Libraries
# SPDS
library(tidyverse) # data wrangling
library(readxl) # import xlsx data
library(sf) # Working with vector data
library(rmapshaper)# Simplify geometries
library(units) # manage your units
#Data
library(USAboundaries) # county boundaries
#Visualization
library(gghighlight) # ggplot conditional highlighting
library(knitr) # table generation
library(kableExtra) # making tables pretty
#Question 1
#1.1
CONUS = USAboundaries::us_counties(resolution = "low") %>%
filter(state_name != "Hawaii") %>%
filter(state_name != "Alaska") %>%
filter(state_name != "Puerto Rico") %>%
st_transform(5070)
CONUS_u = st_union(CONUS)
CONUS_simp = ms_simplify(CONUS_u, keep = .05)
CONUSpnts = mapview:: npts(CONUS)
simppts = mapview::npts(CONUS_simp)
#1.2
county_centroid = st_centroid(CONUS) %>%
st_combine()
#1.3 -1.5 Tessalations
#Voronoi
v_grid = st_voronoi(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
#Triangulated
t_grid = st_triangulate(county_centroid) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
#Grid Coverage
sq_grid = st_make_grid(CONUS_simp, n = c(70, 50)) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
#Hexagonal Coverage
hex_grid = st_make_grid(CONUS_simp, n = c(50,70), square = FALSE) %>%
st_cast() %>%
st_as_sf() %>%
mutate(id = 1:n())
#1.6 Plot
plot_tess = function(data, title)
{ggplot() +
geom_sf(data = data, fill = "white", col = "navy", size = .2) +
theme_void() +
labs(title = title, caption = paste("This tesselation contains:", nrow(data), "tiles")) +
theme(plot.title = element_text(hjust = .5, color = "black", face = "bold"))}
#Original
plot_tess(data = CONUS_simp, "Original County Data")
#Voronoi
v_grid = st_intersection(v_grid, st_union(CONUS_simp))
plot_tess(v_grid, "Voronoi") +
geom_sf(data = county_centroid, col = "red", size = 0.2)
#Triangulated
t_grid = st_intersection(t_grid, st_union(CONUS_simp))
plot_tess(t_grid, "Triangulated") +
geom_sf(data = county_centroid, col = "red", size = 0.2)
#Gridded
plot_tess(sq_grid, "Square Coverage")
#Hexagonal
plot_tess(hex_grid, "Hexogonal Coverage")
#Question 2
#2.1
sf_to_df = function(sf_object, description) {
area_sf= st_area(sf_object) %>%
set_units('km^2') %>%
drop_units()
area_df = data.frame(tesselation = description, features = length(area_sf), mean_area = mean(area_sf), std = sd(area_sf), total_area = sum(area_sf))
return(area_df)
}
description= "test"
#2.2
voroni_df = sf_to_df(v_grid, "voroni tesselation")
tri_df = sf_to_df(t_grid, "triangulation tesselation")
sq_df = sf_to_df(sq_grid, "square grid tesselation")
hex_df = sf_to_df(hex_grid, "hexagonal tesselation")
counties_df = sf_to_df(CONUS, "county")
#2.3
tess_summary = bind_rows(
sf_to_df(t_grid, "triangulation"),
sf_to_df(v_grid, "voroni"),
sf_to_df(sq_grid, "square grid"),
sf_to_df(hex_grid, "hexagonal grid"),
sf_to_df(sf_object= CONUS, "county"))
#2.4
?mean()
knitr::kable(tess_summary, caption = "US county tesselations", col.names = c("Tesselations", "Number of features", "Mean Area", "Standard Deviation", "Total area"))
Tesselations | Number of features | Mean Area | Standard Deviation | Total area |
---|---|---|---|---|
triangulation | 6195 | 1253.691 | 1578.647 | 7766614 |
voroni | 3106 | 2525.425 | 2886.661 | 7843970 |
square grid | 2256 | 3761.443 | 0.000 | 8485816 |
hexagonal grid | 1175 | 7368.799 | 0.000 | 8658339 |
county | 3108 | 2521.745 | 3404.325 | 7837583 |
#2.5 Comment - As you move from triangulation at the top of the chart to the original county data atthe bottom, the area moves from smallest to largest. Each individual tesselation returns a specific part of the US and counties. The point in polygon will be affected by the different areas returned by each of the tesselations.
#Question 3
#3.1
NID2019_U = read_excel("/users/owner1/github/geog-176A-labs/data/NID2019_U.xlsx") %>%
filter(!is.na(LATITUDE)) %>%
st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>%
st_transform(5070)
#3.2
point_in_polygon = function(points, polygon, group){
st_join(polygon, points) %>%
st_drop_geometry() %>%
count(get(group)) %>%
setNames(c(group, "n")) %>%
left_join(polygon, by = group) %>%
st_as_sf()
}
#3.3
voroni_pip = point_in_polygon(NID2019_U, polygon = v_grid, group = "id")
triangulation_pip = point_in_polygon(NID2019_U, t_grid, group = "id")
grid_pip = point_in_polygon(NID2019_U, sq_grid, group = "id")
hexagonal_pip = point_in_polygon(NID2019_U, hex_grid, group = "id")
county_pip = point_in_polygon(NID2019_U, CONUS, group = "geoid")
#3.4
plot_pip = function(data, title){
ggplot() +
geom_sf(data = data, aes(fill = n), col = NA, alpha = .9, size= .2) +
scale_fill_gradient(low = "white", high = "blue") +
theme_void() +
labs(title = "dams in the US", caption = paste0(sum(data$n), "dams represented")) +
theme(legend.position = "none", plot.title = element_text(face = "bold", color = "blue", hjust = .5, size = 24))
}
plot_pip(voroni_pip, "Voroni Point in Polygon")
plot_pip(county_pip, "Raw County Data Point in Polygon")
#3.5
voroni_pip = point_in_polygon(NID2019_U, polygon = v_grid, group = "id")
triangulation_pip = point_in_polygon(NID2019_U, t_grid, group = "id")
grid_pip = point_in_polygon(NID2019_U, sq_grid, group = "id")
hexagonal_pip = point_in_polygon(NID2019_U, hex_grid, group = "id")
county_pip = point_in_polygon(NID2019_U, CONUS, group = "geoid")
plot_pip(voroni_pip)
plot_pip(triangulation_pip)
plot_pip(grid_pip)
plot_pip(hexagonal_pip)
plot_pip(county_pip)
The hex and square tesselations provided a different result than the others. This is because the others were plotted by county and the hex and squares were plotted under equal ratios of area. Moving forward I am only going to use the hexagonal tesselation. It was either that or square tesselation becasue of the fact that it takes into account the area and size rather than just the county.
#Question 4
#4.1
fireprot_dams = point_in_polygon(filter(NID2019_U, grepl("P", NID2019_U$PURPOSES)), hex_grid, "id")
watersup_dams = point_in_polygon(filter(NID2019_U, grepl("S", NID2019_U$PURPOSES)), hex_grid, "id")
hydroelec_dams = point_in_polygon(filter(NID2019_U, grepl("H", NID2019_U$PURPOSES)), hex_grid, "id")
irrig_dams = point_in_polygon(filter(NID2019_U, grepl("I", NID2019_U$PURPOSES)), hex_grid, "id")
#4.2
plot_pip2 = function(data, text){
ggplot() +
geom_sf(data = data, aes(fill = n), alpha = .9, size = .2) +
gghighlight(n > mean(n) +sd(n)) +
viridis::scale_fill_viridis() +
theme_void() +
theme(plot.title = element_text(face = "bold", color = "black", size = 24), plot.subtitle = element_text(size = 12),
plot.caption = element_text(face = "bold", size = 12), legend.title = element_text(face = "bold"),
legend.text = element_text(face = "bold")) +
labs(title = text,
subtitle = "Data Source: National Inventory of Dams",
fill = "Number of dams",
caption = paste0(sum(data$n), "dams")) +
theme(aspect.ratio = .5)
}
#4.3
plot_pip2(fireprot_dams, "Fire Protection Dams")
plot_pip2(watersup_dams, "Water Suppy Dams")
plot_pip2(hydroelec_dams, "Hydroelectric Dams")
plot_pip2(irrig_dams, "Irrigation Dams")
#Question 4 Comments Fire protection dams are mainly central US. Possibly due to the drier climates which allow for fires to start easier. Water supply dams are scattered throughout the east, central, and west US; located on bigger water supplys.Hydroelectric dams are also scattered but also seem to be most dense in the Northeastern US. Irrigation dams are found more on the west US; this oculd be due to the fact that the western US does not receive nearly as much rainfall than Eastern US