bay_creek_data/bay_creek_data.R

# ---- load libraries
library(tidyverse)
library(sf)
library(ggmap)
library(scales)
library(ggrepel)
library(ggpattern)
library(RColorBrewer)

# ---- load data
block_data_2022 <- sf::read_sf("data/nip_bg_22/nip_bg_22.shp")
block_metadata_2022 <- read_csv("data/nip_bg_22/nip_metadata_22.csv")
extent_madison <- st_bbox(block_data_2022)

census_data_2022 <- sf::read_sf("data/nip_tr_22/nip_tr_22.shp")
census_metadata_2022 <- read_csv("data/nip_tr_22/nip_metadata_22.csv")

# ---- define areas of interest
block_interest <- read_csv("block_interest.csv", col_types = "cc")

block_interest_data <- block_data_2022 %>%
  filter(geo_id %in% block_interest$geo_id) %>%
  mutate(center_geom = st_centroid(geometry)) %>%
  mutate(lon = st_coordinates(center_geom)[,1],
         lat = st_coordinates(center_geom)[,2])

block_interest_data <- left_join(block_interest_data, block_interest, join_by(geo_id)) %>%
  select(geo_id, name, lon, lat, baycreek) %>%
  mutate(interest = ifelse(baycreek, "baycreek", TRUE))

extent_blocks <- st_bbox(block_interest_data)

block_data_2022 <- left_join(block_data_2022, block_interest_data %>%st_drop_geometry(), join_by(geo_id))
#census_interest <- read_csv("census_interest.csv", col_types = "cc")
# ---- data pivoting
races <- c("pc_wht", "pc_afrm", "pc_asn", "pc_othm", "pc_hisp")
races <- block_metadata_2022 %>% filter(variable %in% races) %>% select(variable, name)

wealth <- c("pc_unem", "pc_fmpv")
wealth <- block_metadata_2022 %>% filter(variable %in% wealth) %>% select(variable, name)


# ---- download basemap
zoom_level <- 13
buffer <- 0.01
extent <- extent_blocks
if(file.exists(paste0("data/basemap_cache/basemap_", zoom_level, ".RData"))){
  load(file = paste0("data/basemap_cache/basemap_", zoom_level, ".RData"))
} else {
  register_stadiamaps(substr(read_file("data/stadia_api_key.txt"), 1, 36),
                      write = FALSE)
  basemap <- ggmap::get_stadiamap(bbox = c(left = as.double(extent[1]) - buffer,
                                           bottom = as.double(extent[2]) - buffer,
                                           right = as.double(extent[3]) + buffer,
                                           top = as.double(extent[4])) + buffer,
                                  zoom = zoom_level,
                                  maptype = "alidade_smooth",
                                  color = "bw",
                                  force = TRUE)

  save(basemap, file = paste0("data/basemap_cache/basemap_", zoom_level, ".RData"))
}

# ---- plot figures
# --- plot maps
ggmap(basemap) +
  geom_sf(data = block_data_2022,
          fill = NA,
          color = "black",
          inherit.aes = FALSE,
          alpha = 0.6) +
  geom_label_repel(data = block_interest_data,
    aes(label = paste(name),
        y = lat,
        x = lon),
    min.segment.length = 0.02) +
  theme(axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
        axis.title.y=element_blank()) +
  scale_fill_continuous(label = scales::label_dollar(),
                        type = "viridis") +
  labs(title = "Median Income",
       fill = NULL)
ggsave(file = "figures/block_map.png",
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

# ---- plot graphs
ggplot(data = block_data_2022 %>%
         arrange(medhhinc)) +
  geom_hline(data = block_data_2022 %>% filter(geo_id == "Madison"),
             aes(yintercept = medhhinc),
             linetype = "dashed") +
  geom_label(data = block_data_2022 %>% filter(geo_id == "Madison"),
             aes(y = medhhinc,
                 x = 20),
             label = "Madison median") +
  geom_col(aes(x = reorder(geo_id, medhhinc, sum),
               y = medhhinc,
               fill = baycreek),
           color = "black",
           size = 0.01,
           position = position_dodge2(padding = 0)) +
  geom_label_repel(aes(x = reorder(geo_id, medhhinc, sum),
                 y = medhhinc,
                 label = name),
                 min.segment.length = 0) +
  scale_x_discrete(labels = NULL, breaks = NULL) +
  scale_y_continuous(label = scales::label_dollar(), expand = expansion(mult = c(0,NA))) +
  scale_fill_discrete(guide="none") +
  theme(axis.text.x=element_blank(),
        axis.title.x=element_blank()) +
  labs(title = "Median Income by Block Group",
       x = NULL,
       y = NULL)
ggsave(file = "figures/median_income.png",
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

ggplot() +
  geom_hline(data = block_data_2022 %>% filter(geo_id == "Madison"),
             aes(yintercept = medhhinc),
             linetype = "dashed") +
  geom_boxplot(data = block_data_2022,
               aes(x = "Madison",
                   y = medhhinc),
               outlier.shape = NA) +
  geom_col(data = block_data_2022 %>% filter(interest %in% c(TRUE, "baycreek")) %>% arrange(medhhinc),
           aes(x = reorder(name, medhhinc, sum),
               y = medhhinc,
               fill = baycreek)) +
  scale_y_continuous(label = scales::label_dollar(), expand = expansion(mult = c(0,NA))) +
  scale_fill_discrete(guide="none") +
  theme(axis.text.x=element_text(angle = 30, vjust = 0.7),
        axis.title.x=element_blank()) +
  labs(title = "Median Income by Block",
       x = NULL,
       y = "Median Income")
ggsave(file = "figures/median_income_boxplot.png",
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

ggplot(data = block_data_2022 %>% filter(interest %in% c("baycreek", TRUE))) +
  geom_hline(data = block_data_2022 %>% filter(geo_id == "Madison"),
             aes(yintercept = medhhinc),
             linetype = "dashed") +
  geom_label(data = block_data_2022 %>% filter(geo_id == "Madison"),
             aes(y = medhhinc,
                 x = 0.1,
                 label = "Madison Median")) +
  geom_point(data = block_data_2022,
             aes(x = pc_wht/100,
                 y = medhhinc),
             size = 2,
             alpha = 0.5,
             color = "grey") +
  geom_point(aes(x = pc_wht/100,
                 y = medhhinc,
                 color = interest),
             size = 5) +
  geom_label_repel(aes(x = pc_wht/100,
                       y = medhhinc,
                       label = name),
                   nudge_y = 5000,
                   min.segment.length = 0) +
  scale_x_continuous(label = scales::label_percent(), expand = expansion(mult = c(0, 0)), limits = c(0, 1)) +
  scale_y_continuous(label = scales::label_dollar(), expand = expansion(mult = c(0.1, 0.1))) +
  scale_color_discrete(guide="none") +
  labs(title = "Median income by racial makup of neighborhood",
       x = "Percent of residents that are white",
       y = "Median income")
ggsave(file = "figures/income_race.png",
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

ggplot(data = block_data_2022 %>%
         filter(interest %in% c("baycreek", TRUE) | geo_id == "Madison") %>%
         pivot_longer(cols = races$variable, names_to = "race", values_to = "percent")) +
  geom_col(aes(x = ifelse(geo_id == "Madison", "Madison average", name),
              y = percent/100,
              fill = race),
           color = "black") +
  scale_y_continuous(label = scales::label_percent(), expand = expansion(mult = c(0, 0))) +
  scale_fill_brewer(type = "qual", labels = deframe(races) %>% as.list()) +
  theme(axis.text.x=element_text(angle = 30, vjust = 0.7),
        axis.title.x=element_blank()) +
  labs(title = "Racial makup of neighborhood",
       x = NULL,
       y = NULL,
       fill = "Race")
ggsave(file = "figures/race_percent.png",
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

ggplot(data = block_data_2022 %>%
         filter(interest %in% c("baycreek", TRUE) | geo_id == "Madison",
                !geo_id %in% c("550250014012", "550250012004"))) +
  geom_col(aes(x = ifelse(geo_id == "Madison", "Madison average", name),
               y = pc_fmpv/100),
           color = "black") +
  scale_y_continuous(label = scales::label_percent(), expand = expansion(mult = c(0, 0.1))) +
  theme(axis.text.x=element_text(angle = 30, vjust = 0.7),
        axis.title.x=element_blank()) +
  labs(title = "Families with income below the poverty lines",
       x = NULL,
       y = NULL,
       fill = NULL)
ggsave(file = "figures/neighborhood_poverty.png",
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

ggplot(data = block_data_2022 %>%
         st_drop_geometry() %>%
         filter(geo_id != "Madison") %>%
         mutate(baycreek = ifelse(interest == "baycreek", "Bay Creek", NA)) %>%
         mutate(baycreek = ifelse(is.na(baycreek), "The rest of Madison", "Bay Creek")) %>%
         group_by(baycreek) %>%
         summarise(percentage = sum(tot_pop)/274622)) +
  geom_bar(aes(x="", y=percentage, fill=baycreek),
           stat="identity",
           width=1) +
  coord_polar("y", start=0) +
  geom_label_repel(aes(x="",
                y=percentage,
                label = paste(baycreek, round(percentage * 100, 2), "%"),
                fill = baycreek),
            position = position_stack(vjust=0.5)) +
  scale_y_continuous(label = NULL, breaks = NULL) +
  scale_fill_brewer(type = "qual", palette = "Pastel1", guide = NULL) +
  theme(axis.title.x=element_blank(),
        axis.title.y=element_blank()) +
  labs(title = NULL,
       x = NULL,
       y = NULL,
       fill = NULL)
ggsave(file = "figures/baycreek_pie.png",
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")