#Libraries

# SPDS
library(tidyverse) # data wrangling
library(readxl)    # import xlsx data
library(sf)        # Working with vector data
library(rmapshaper)# Simplify geometries
library(units)     # manage your units
#Data
library(USAboundaries) # county boundaries
#Visualization
library(gghighlight) # ggplot conditional highlighting
library(knitr) # table generation
library(kableExtra) # making tables pretty

#Question 1

#1.1
CONUS = USAboundaries::us_counties(resolution = "low") %>% 
  filter(state_name != "Hawaii") %>% 
  filter(state_name != "Alaska") %>% 
  filter(state_name != "Puerto Rico") %>% 
  st_transform(5070)

CONUS_u = st_union(CONUS)
CONUS_simp = ms_simplify(CONUS_u, keep = .05)

CONUSpnts = mapview:: npts(CONUS)
simppts = mapview::npts(CONUS_simp)
#1.2
county_centroid = st_centroid(CONUS) %>% 
  st_combine() 
#1.3 -1.5 Tessalations
#Voronoi
v_grid = st_voronoi(county_centroid) %>% 
  st_cast() %>% 
  st_as_sf()  %>% 
  mutate(id = 1:n())

#Triangulated
t_grid = st_triangulate(county_centroid) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())

#Grid Coverage
sq_grid = st_make_grid(CONUS_simp, n = c(70, 50)) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
  
#Hexagonal Coverage
hex_grid = st_make_grid(CONUS_simp, n = c(50,70), square = FALSE) %>% 
  st_cast() %>% 
  st_as_sf() %>% 
  mutate(id = 1:n())
#1.6 Plot
plot_tess = function(data, title)
{ggplot() +
    geom_sf(data = data, fill = "white", col = "navy", size = .2) +
    theme_void() +
    labs(title = title, caption = paste("This tesselation contains:", nrow(data), "tiles")) +
    theme(plot.title = element_text(hjust = .5, color = "black", face = "bold"))}

#Original
plot_tess(data = CONUS_simp, "Original County Data")

#Voronoi
v_grid = st_intersection(v_grid, st_union(CONUS_simp))
plot_tess(v_grid, "Voronoi") +
  geom_sf(data = county_centroid, col = "red", size = 0.2)

#Triangulated
t_grid = st_intersection(t_grid, st_union(CONUS_simp))
plot_tess(t_grid, "Triangulated") +
  geom_sf(data = county_centroid, col = "red", size = 0.2)

#Gridded
plot_tess(sq_grid, "Square Coverage")

#Hexagonal
plot_tess(hex_grid, "Hexogonal Coverage")

#Question 2

#2.1
sf_to_df = function(sf_object, description) {
  area_sf= st_area(sf_object) %>% 
    set_units('km^2') %>% 
    drop_units()
  area_df = data.frame(tesselation = description, features = length(area_sf), mean_area = mean(area_sf), std = sd(area_sf), total_area = sum(area_sf))

  return(area_df)
}

description= "test"
#2.2
voroni_df = sf_to_df(v_grid, "voroni tesselation")
tri_df = sf_to_df(t_grid, "triangulation tesselation")
sq_df = sf_to_df(sq_grid, "square grid tesselation")
hex_df = sf_to_df(hex_grid, "hexagonal tesselation")
counties_df = sf_to_df(CONUS, "county")
#2.3
tess_summary = bind_rows(
  sf_to_df(t_grid, "triangulation"),
  sf_to_df(v_grid, "voroni"),
  sf_to_df(sq_grid, "square grid"),
  sf_to_df(hex_grid, "hexagonal grid"),
  sf_to_df(sf_object= CONUS, "county"))
#2.4
?mean()

knitr::kable(tess_summary, caption = "US county tesselations", col.names = c("Tesselations", "Number of features", "Mean Area", "Standard Deviation", "Total area"))
US county tesselations
Tesselations Number of features Mean Area Standard Deviation Total area
triangulation 6195 1253.691 1578.647 7766614
voroni 3106 2525.425 2886.661 7843970
square grid 2256 3761.443 0.000 8485816
hexagonal grid 1175 7368.799 0.000 8658339
county 3108 2521.745 3404.325 7837583

#2.5 Comment - As you move from triangulation at the top of the chart to the original county data atthe bottom, the area moves from smallest to largest. Each individual tesselation returns a specific part of the US and counties. The point in polygon will be affected by the different areas returned by each of the tesselations.

#Question 3

#3.1
NID2019_U = read_excel("/users/owner1/github/geog-176A-labs/data/NID2019_U.xlsx") %>% 
  filter(!is.na(LATITUDE)) %>% 
  st_as_sf(coords = c("LONGITUDE", "LATITUDE"), crs = 4326) %>% 
  st_transform(5070)
#3.2

point_in_polygon = function(points, polygon, group){
  st_join(polygon, points) %>% 
    st_drop_geometry() %>% 
    count(get(group)) %>% 
    setNames(c(group, "n")) %>% 
    left_join(polygon, by = group) %>% 
    st_as_sf()
}
#3.3
voroni_pip = point_in_polygon(NID2019_U, polygon = v_grid, group = "id")
triangulation_pip = point_in_polygon(NID2019_U, t_grid, group = "id")
grid_pip = point_in_polygon(NID2019_U, sq_grid, group = "id")
hexagonal_pip = point_in_polygon(NID2019_U, hex_grid, group = "id")
county_pip = point_in_polygon(NID2019_U, CONUS, group = "geoid")
#3.4
plot_pip = function(data, title){
  ggplot() +
    geom_sf(data = data, aes(fill = n), col = NA, alpha = .9, size= .2) +
    scale_fill_gradient(low = "white", high = "blue") +
    theme_void() +
    labs(title = "dams in the US", caption = paste0(sum(data$n), "dams represented")) +
    theme(legend.position = "none", plot.title = element_text(face = "bold", color = "blue", hjust = .5, size = 24))
}

plot_pip(voroni_pip, "Voroni Point in Polygon")

plot_pip(county_pip, "Raw County Data Point in Polygon")

#3.5
voroni_pip = point_in_polygon(NID2019_U, polygon = v_grid, group = "id")
triangulation_pip = point_in_polygon(NID2019_U, t_grid, group = "id")
grid_pip = point_in_polygon(NID2019_U, sq_grid, group = "id")
hexagonal_pip = point_in_polygon(NID2019_U, hex_grid, group = "id")
county_pip = point_in_polygon(NID2019_U, CONUS, group = "geoid")

plot_pip(voroni_pip)

plot_pip(triangulation_pip)

plot_pip(grid_pip)

plot_pip(hexagonal_pip)

plot_pip(county_pip)

The hex and square tesselations provided a different result than the others. This is because the others were plotted by county and the hex and squares were plotted under equal ratios of area. Moving forward I am only going to use the hexagonal tesselation. It was either that or square tesselation becasue of the fact that it takes into account the area and size rather than just the county.

#Question 4

#4.1
fireprot_dams =  point_in_polygon(filter(NID2019_U, grepl("P", NID2019_U$PURPOSES)), hex_grid, "id")
watersup_dams =  point_in_polygon(filter(NID2019_U, grepl("S", NID2019_U$PURPOSES)), hex_grid, "id")
hydroelec_dams =  point_in_polygon(filter(NID2019_U, grepl("H", NID2019_U$PURPOSES)), hex_grid, "id")
irrig_dams =  point_in_polygon(filter(NID2019_U, grepl("I", NID2019_U$PURPOSES)), hex_grid, "id")
#4.2
plot_pip2 = function(data, text){
  ggplot() +
    geom_sf(data = data, aes(fill = n), alpha = .9, size = .2) +
    gghighlight(n > mean(n) +sd(n)) +
    viridis::scale_fill_viridis() +
    theme_void() +
    theme(plot.title = element_text(face = "bold", color = "black", size = 24), plot.subtitle = element_text(size = 12),
          plot.caption = element_text(face = "bold", size = 12), legend.title = element_text(face = "bold"),
          legend.text = element_text(face = "bold")) +
    labs(title = text,
         subtitle = "Data Source: National Inventory of Dams",
         fill = "Number of dams",
         caption = paste0(sum(data$n), "dams")) +
    theme(aspect.ratio = .5)
}
#4.3
plot_pip2(fireprot_dams, "Fire Protection Dams")

plot_pip2(watersup_dams, "Water Suppy Dams")

plot_pip2(hydroelec_dams, "Hydroelectric Dams")

plot_pip2(irrig_dams, "Irrigation Dams")

#Question 4 Comments Fire protection dams are mainly central US. Possibly due to the drier climates which allow for fires to start easier. Water supply dams are scattered throughout the east, central, and west US; located on bigger water supplys.Hydroelectric dams are also scattered but also seem to be most dense in the Northeastern US. Irrigation dams are found more on the west US; this oculd be due to the fact that the western US does not receive nearly as much rainfall than Eastern US