made some graphs and maps

2023-11-05 16:04:49 -06:00 · 2023-11-05 16:04:49 -06:00 · 9e3f866680
commit 9e3f866680
parent 0f0cc0b0a5
5 changed files with 173 additions and 37 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,4 @@
 data/*

 figures/*
+Neighborhood_Indicator_project.docx
--- a/.~lock.Neighborhood_Indicator_project.docx#
+++ b/.~lock.Neighborhood_Indicator_project.docx#
@ -0,0 +1 @@
+,ben,pseudotsuga,05.11.2023 13:50,file:///home/ben/.config/libreoffice/4;
--- a/bay_creek_data.R
+++ b/bay_creek_data.R
@ -4,64 +4,79 @@ library(sf)
 library(ggmap)
 library(scales)
 library(ggrepel)
-
-setwd("~/Documents/Bay_Creek/bay_creek_data")
+library(ggpattern)
+library(RColorBrewer)

 # ---- load data
 block_data_2022 <- sf::read_sf("data/nip_bg_22/nip_bg_22.shp")
-metadata_2022 <- read_csv("data/nip_bg_22/nip_metadata_22.csv")
-extent <- st_bbox(block_data_2022)
+block_metadata_2022 <- read_csv("data/nip_bg_22/nip_metadata_22.csv")
+extent_madison <- st_bbox(block_data_2022)
+
+census_data_2022 <- sf::read_sf("data/nip_tr_22/nip_tr_22.shp")
+census_metadata_2022 <- read_csv("data/nip_tr_22/nip_metadata_22.csv")

 # ---- define areas of interest
-block_groups <- data.frame(name = c("Bay Creek 1", "Bay Creek 2"), geo_id = c("550250013001", "550250013002"))
+block_interest <- read_csv("block_interest.csv", col_types = "cc")
+
+block_interest_data <- block_data_2022 %>% 
+  filter(geo_id %in% block_interest$geo_id) %>% 
+  mutate(center_geom = st_centroid(geometry)) %>% 
+  mutate(lon = st_coordinates(center_geom)[,1],
+         lat = st_coordinates(center_geom)[,2])
+
+block_interest_data <- left_join(block_interest_data, block_interest, join_by(geo_id)) %>% 
+  select(geo_id, name, lon, lat, baycreek) %>%
+  mutate(interest = ifelse(baycreek, "baycreek", TRUE))
+  
+extent_blocks <- st_bbox(block_interest_data)
+
+block_data_2022 <- left_join(block_data_2022, block_interest_data %>%st_drop_geometry(), join_by(geo_id))
+#census_interest <- read_csv("census_interest.csv", col_types = "cc")
+# ---- data pivoting
+races <- c("pc_wht", "pc_afrm", "pc_asn", "pc_othm", "pc_hisp")
+races <- block_metadata_2022 %>% filter(variable %in% races) %>% select(variable, name)
+
+wealth <- c("pc_unem", "pc_fmpv")
+wealth <- block_metadata_2022 %>% filter(variable %in% wealth) %>% select(variable, name)
+
+

-centroids <- block_data_2022 %>% 
-  left_join(block_groups, by = "geo_id") %>% 
-  filter(geo_id %in% block_groups$geo_id) %>% 
-  st_centroid() %>% 
-  pull(geometry) %>%
-  transpose()

-block_groups["lon"] <- unlist(centroids[[1]])
-block_groups["lat"] <- unlist(centroids[[2]])

 # ---- download basemap
-zoom_level <- 12
+zoom_level <- 13
+buffer <- 0.01
+extent <- extent_blocks
 if(file.exists(paste0("data/basemap_cache/basemap_", zoom_level, ".RData"))){
  load(file = paste0("data/basemap_cache/basemap_", zoom_level, ".RData"))
 } else {
  register_stadiamaps(substr(read_file("data/stadia_api_key.txt"), 1, 36), 
                      write = FALSE)
-  basemap <- ggmap::get_stadiamap(bbox = c(left = as.double(extent[1]), 
-                                           bottom = as.double(extent[2]), 
-                                           right = as.double(extent[3]), 
-                                           top = as.double(extent[4])),
+  basemap <- ggmap::get_stadiamap(bbox = c(left = as.double(extent[1]) - buffer, 
+                                           bottom = as.double(extent[2]) - buffer, 
+                                           right = as.double(extent[3]) + buffer, 
+                                           top = as.double(extent[4])) + buffer,
                                  zoom = zoom_level,
                                  maptype = "alidade_smooth",
                                  color = "bw",
                                  force = TRUE)
  
-  save(basemap_raster, file = paste0("data/basemap_cache/basemap_", zoom_level, ".RData"))
+  save(basemap, file = paste0("data/basemap_cache/basemap_", zoom_level, ".RData"))
 }

 # ---- plot figures
 # --- plot maps
 ggmap(basemap) +
  geom_sf(data = block_data_2022,
-               aes(fill = medhhinc),
+          fill = NA,
+          color = "black",
          inherit.aes = FALSE,
          alpha = 0.6) +
-  geom_label_repel(data = block_groups,
-    aes(label = name, 
+  geom_label_repel(data = block_interest_data,
+    aes(label = paste(name), 
        y = lat,
        x = lon),
-    min.segment.length = 0,
-    nudge_y = -0.03) +
-  # geom_sf_label(data = block_data_2022 %>% left_join(block_groups, by = "geo_id"),
-  #               aes(label = name),
-  #               inherit.aes = FALSE,
-  #               nudge_x = 1,
-  #               size = 2) +
+    min.segment.length = 0.02) +
  theme(axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
@ -71,14 +86,15 @@ ggmap(basemap) +
                        type = "viridis") +
  labs(title = "Median Income",
       fill = NULL)
-ggsave(file = "figures/median_income_map.png",
+ggsave(file = "figures/block_map.png",
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

 # ---- plot graphs
-ggplot(data = block_data_2022 %>% mutate(baycreek = geo_id %in% block_groups$geo_id) %>% left_join(block_groups, by = "geo_id") %>% arrange(medhhinc)) +
+ggplot(data = block_data_2022 %>% 
+         arrange(medhhinc)) +
  geom_hline(data = block_data_2022 %>% filter(geo_id == "Madison"),
             aes(yintercept = medhhinc),
             linetype = "dashed") +
@ -88,12 +104,16 @@ ggplot(data = block_data_2022 %>% mutate(baycreek = geo_id %in% block_groups$geo
             label = "Madison median") +
  geom_col(aes(x = reorder(geo_id, medhhinc, sum),
               y = medhhinc,
-               fill = baycreek)) +
-  geom_label(aes(x = reorder(geo_id, medhhinc, sum),
-                 y = medhhinc + 10000,
-                 label = name)) +
+               fill = baycreek),
+           color = "black",
+           size = 0.01,
+           position = position_dodge2(padding = 0)) +
+  geom_label_repel(aes(x = reorder(geo_id, medhhinc, sum),
+                 y = medhhinc,
+                 label = name),
+                 min.segment.length = 0) +
  scale_x_discrete(labels = NULL, breaks = NULL) +
-  scale_y_continuous(label = scales::label_dollar()) +
+  scale_y_continuous(label = scales::label_dollar(), expand = expansion(mult = c(0,NA))) +
  scale_fill_discrete(guide="none") +
  theme(axis.text.x=element_blank(),
        axis.title.x=element_blank()) +
@ -104,4 +124,103 @@ ggsave(file = "figures/median_income.png",
       device = "png",
       width = 11,
       height = 8.5,
-       units = "in")
+       units = "in")
+
+ggplot() +
+  geom_hline(data = block_data_2022 %>% filter(geo_id == "Madison"),
+             aes(yintercept = medhhinc),
+             linetype = "dashed") +
+  geom_boxplot(data = block_data_2022,
+               aes(x = "Madison",
+                   y = medhhinc),
+               outlier.shape = NA) +
+  geom_col(data = block_data_2022 %>% filter(interest %in% c(TRUE, "baycreek")) %>% arrange(medhhinc),
+           aes(x = reorder(name, medhhinc, sum),
+               y = medhhinc,
+               fill = baycreek)) +
+  scale_y_continuous(label = scales::label_dollar(), expand = expansion(mult = c(0,NA))) +
+  scale_fill_discrete(guide="none") +
+  theme(axis.text.x=element_text(angle = 30, vjust = 0.7),
+        axis.title.x=element_blank()) +
+  labs(title = "Median Income by Block",
+       x = NULL,
+       y = "Median Income")
+ggsave(file = "figures/median_income_boxplot.png",
+       device = "png",
+       width = 11,
+       height = 8.5,
+       units = "in")
+
+ggplot(data = block_data_2022 %>% filter(interest %in% c("baycreek", TRUE))) +
+  geom_hline(data = block_data_2022 %>% filter(geo_id == "Madison"),
+             aes(yintercept = medhhinc),
+             linetype = "dashed") +
+  geom_label(data = block_data_2022 %>% filter(geo_id == "Madison"),
+             aes(y = medhhinc,
+                 x = 0.1,
+                 label = "Madison Median")) +
+  geom_point(data = block_data_2022,
+             aes(x = pc_wht/100,
+                 y = medhhinc),
+             size = 2,
+             alpha = 0.5,
+             color = "grey") +
+  geom_point(aes(x = pc_wht/100,
+                 y = medhhinc,
+                 color = interest),
+             size = 5) +
+  geom_label_repel(aes(x = pc_wht/100,
+                       y = medhhinc,
+                       label = name),
+                   nudge_y = 5000,
+                   min.segment.length = 0) +
+  scale_x_continuous(label = scales::label_percent(), expand = expansion(mult = c(0, 0)), limits = c(0, 1)) +
+  scale_y_continuous(label = scales::label_dollar(), expand = expansion(mult = c(0.1, 0.1))) +
+  scale_color_discrete(guide="none") +
+  labs(title = "Median income by racial makup of neighborhood",
+       x = "Percent of residents that are white",
+       y = "Median income")
+ggsave(file = "figures/income_race.png",
+       device = "png",
+       width = 11,
+       height = 8.5,
+       units = "in")
+
+ggplot(data = block_data_2022 %>% 
+         filter(interest %in% c("baycreek", TRUE) | geo_id == "Madison") %>% 
+         pivot_longer(cols = races$variable, names_to = "race", values_to = "percent")) +
+  geom_col(aes(x = ifelse(geo_id == "Madison", "Madison average", name),
+              y = percent/100,
+              fill = race),
+           color = "black") +
+  scale_y_continuous(label = scales::label_percent(), expand = expansion(mult = c(0, 0))) +
+  scale_fill_brewer(type = "qual", labels = deframe(races) %>% as.list()) +
+  theme(axis.text.x=element_text(angle = 30, vjust = 0.7),
+        axis.title.x=element_blank()) +
+  labs(title = "Racial makup of neighborhood",
+       x = NULL,
+       y = NULL,
+       fill = "Race")
+ggsave(file = "figures/race_percent.png",
+       device = "png",
+       width = 11,
+       height = 8.5,
+       units = "in")
+
+ggplot(data = block_data_2022 %>% 
+         filter(interest %in% c("baycreek", TRUE) | geo_id == "Madison")) +
+  geom_col(aes(x = ifelse(geo_id == "Madison", "Madison average", name),
+               y = pc_fmpv/100),
+           color = "black") +
+  scale_y_continuous(label = scales::label_percent(), expand = expansion(mult = c(0, 0.1))) +
+  theme(axis.text.x=element_text(angle = 30, vjust = 0.7),
+        axis.title.x=element_blank()) +
+  labs(title = "Families below the poverty lines",
+       x = NULL,
+       y = NULL,
+       fill = NULL)
+ggsave(file = "figures/neighborhood_poverty.png",
+       device = "png",
+       width = 11,
+       height = 8.5,
+       units = "in")
--- a/block_interest.csv
+++ b/block_interest.csv
@ -0,0 +1,9 @@
+name,geo_id,baycreek
+Bay Creek 1,550250013001,TRUE
+Bay Creek 2,550250013002,TRUE
+Greenbush 4,550250012004,FALSE
+Greenbush 2,550250012002,FALSE
+Bram’s Addition,550250014011,FALSE
+Burr Oaks 2,550250014012,FALSE
+Burr Oaks 3,550250014013,FALSE
+Capital View,550250014014,FALSE
--- a/census_interest.csv
+++ b/census_interest.csv
@ -0,0 +1,6 @@
+name,geo_id
+Bay Creek,55025001300
+Greenbush,
+Bram’s Addition,
+Burr Oaks,
+Capital View Neighborhoods,
				`@ -0,0 +1 @@`
				`,ben,pseudotsuga,05.11.2023 13:50,file:///home/ben/.config/libreoffice/4;`