From 9e3f8666809ee3ef7059a61adbad21243faddf40 Mon Sep 17 00:00:00 2001 From: Ben Varick Date: Sun, 5 Nov 2023 16:04:49 -0600 Subject: [PATCH] made some graphs and maps --- .gitignore | 1 + .~lock.Neighborhood_Indicator_project.docx# | 1 + bay_creek_data.R | 193 ++++++++++++++++---- block_interest.csv | 9 + census_interest.csv | 6 + 5 files changed, 173 insertions(+), 37 deletions(-) create mode 100644 .~lock.Neighborhood_Indicator_project.docx# create mode 100644 block_interest.csv create mode 100644 census_interest.csv diff --git a/.gitignore b/.gitignore index bbc4004..1b316e1 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ data/* figures/* +Neighborhood_Indicator_project.docx diff --git a/.~lock.Neighborhood_Indicator_project.docx# b/.~lock.Neighborhood_Indicator_project.docx# new file mode 100644 index 0000000..0b11e40 --- /dev/null +++ b/.~lock.Neighborhood_Indicator_project.docx# @@ -0,0 +1 @@ +,ben,pseudotsuga,05.11.2023 13:50,file:///home/ben/.config/libreoffice/4; \ No newline at end of file diff --git a/bay_creek_data.R b/bay_creek_data.R index 8c23715..7e4ae5b 100644 --- a/bay_creek_data.R +++ b/bay_creek_data.R @@ -4,64 +4,79 @@ library(sf) library(ggmap) library(scales) library(ggrepel) - -setwd("~/Documents/Bay_Creek/bay_creek_data") +library(ggpattern) +library(RColorBrewer) # ---- load data block_data_2022 <- sf::read_sf("data/nip_bg_22/nip_bg_22.shp") -metadata_2022 <- read_csv("data/nip_bg_22/nip_metadata_22.csv") -extent <- st_bbox(block_data_2022) +block_metadata_2022 <- read_csv("data/nip_bg_22/nip_metadata_22.csv") +extent_madison <- st_bbox(block_data_2022) + +census_data_2022 <- sf::read_sf("data/nip_tr_22/nip_tr_22.shp") +census_metadata_2022 <- read_csv("data/nip_tr_22/nip_metadata_22.csv") # ---- define areas of interest -block_groups <- data.frame(name = c("Bay Creek 1", "Bay Creek 2"), geo_id = c("550250013001", "550250013002")) +block_interest <- read_csv("block_interest.csv", col_types = "cc") + +block_interest_data <- block_data_2022 %>% + filter(geo_id %in% block_interest$geo_id) %>% + mutate(center_geom = st_centroid(geometry)) %>% + mutate(lon = st_coordinates(center_geom)[,1], + lat = st_coordinates(center_geom)[,2]) + +block_interest_data <- left_join(block_interest_data, block_interest, join_by(geo_id)) %>% + select(geo_id, name, lon, lat, baycreek) %>% + mutate(interest = ifelse(baycreek, "baycreek", TRUE)) + +extent_blocks <- st_bbox(block_interest_data) + +block_data_2022 <- left_join(block_data_2022, block_interest_data %>%st_drop_geometry(), join_by(geo_id)) +#census_interest <- read_csv("census_interest.csv", col_types = "cc") +# ---- data pivoting +races <- c("pc_wht", "pc_afrm", "pc_asn", "pc_othm", "pc_hisp") +races <- block_metadata_2022 %>% filter(variable %in% races) %>% select(variable, name) + +wealth <- c("pc_unem", "pc_fmpv") +wealth <- block_metadata_2022 %>% filter(variable %in% wealth) %>% select(variable, name) + + -centroids <- block_data_2022 %>% - left_join(block_groups, by = "geo_id") %>% - filter(geo_id %in% block_groups$geo_id) %>% - st_centroid() %>% - pull(geometry) %>% - transpose() -block_groups["lon"] <- unlist(centroids[[1]]) -block_groups["lat"] <- unlist(centroids[[2]]) # ---- download basemap -zoom_level <- 12 +zoom_level <- 13 +buffer <- 0.01 +extent <- extent_blocks if(file.exists(paste0("data/basemap_cache/basemap_", zoom_level, ".RData"))){ load(file = paste0("data/basemap_cache/basemap_", zoom_level, ".RData")) } else { register_stadiamaps(substr(read_file("data/stadia_api_key.txt"), 1, 36), write = FALSE) - basemap <- ggmap::get_stadiamap(bbox = c(left = as.double(extent[1]), - bottom = as.double(extent[2]), - right = as.double(extent[3]), - top = as.double(extent[4])), + basemap <- ggmap::get_stadiamap(bbox = c(left = as.double(extent[1]) - buffer, + bottom = as.double(extent[2]) - buffer, + right = as.double(extent[3]) + buffer, + top = as.double(extent[4])) + buffer, zoom = zoom_level, maptype = "alidade_smooth", color = "bw", force = TRUE) - save(basemap_raster, file = paste0("data/basemap_cache/basemap_", zoom_level, ".RData")) + save(basemap, file = paste0("data/basemap_cache/basemap_", zoom_level, ".RData")) } # ---- plot figures # --- plot maps ggmap(basemap) + geom_sf(data = block_data_2022, - aes(fill = medhhinc), + fill = NA, + color = "black", inherit.aes = FALSE, alpha = 0.6) + - geom_label_repel(data = block_groups, - aes(label = name, + geom_label_repel(data = block_interest_data, + aes(label = paste(name), y = lat, x = lon), - min.segment.length = 0, - nudge_y = -0.03) + - # geom_sf_label(data = block_data_2022 %>% left_join(block_groups, by = "geo_id"), - # aes(label = name), - # inherit.aes = FALSE, - # nudge_x = 1, - # size = 2) + + min.segment.length = 0.02) + theme(axis.text.x=element_blank(), axis.text.y=element_blank(), axis.ticks=element_blank(), @@ -71,14 +86,15 @@ ggmap(basemap) + type = "viridis") + labs(title = "Median Income", fill = NULL) -ggsave(file = "figures/median_income_map.png", +ggsave(file = "figures/block_map.png", device = "png", width = 11, height = 8.5, units = "in") # ---- plot graphs -ggplot(data = block_data_2022 %>% mutate(baycreek = geo_id %in% block_groups$geo_id) %>% left_join(block_groups, by = "geo_id") %>% arrange(medhhinc)) + +ggplot(data = block_data_2022 %>% + arrange(medhhinc)) + geom_hline(data = block_data_2022 %>% filter(geo_id == "Madison"), aes(yintercept = medhhinc), linetype = "dashed") + @@ -88,12 +104,16 @@ ggplot(data = block_data_2022 %>% mutate(baycreek = geo_id %in% block_groups$geo label = "Madison median") + geom_col(aes(x = reorder(geo_id, medhhinc, sum), y = medhhinc, - fill = baycreek)) + - geom_label(aes(x = reorder(geo_id, medhhinc, sum), - y = medhhinc + 10000, - label = name)) + + fill = baycreek), + color = "black", + size = 0.01, + position = position_dodge2(padding = 0)) + + geom_label_repel(aes(x = reorder(geo_id, medhhinc, sum), + y = medhhinc, + label = name), + min.segment.length = 0) + scale_x_discrete(labels = NULL, breaks = NULL) + - scale_y_continuous(label = scales::label_dollar()) + + scale_y_continuous(label = scales::label_dollar(), expand = expansion(mult = c(0,NA))) + scale_fill_discrete(guide="none") + theme(axis.text.x=element_blank(), axis.title.x=element_blank()) + @@ -104,4 +124,103 @@ ggsave(file = "figures/median_income.png", device = "png", width = 11, height = 8.5, - units = "in") \ No newline at end of file + units = "in") + +ggplot() + + geom_hline(data = block_data_2022 %>% filter(geo_id == "Madison"), + aes(yintercept = medhhinc), + linetype = "dashed") + + geom_boxplot(data = block_data_2022, + aes(x = "Madison", + y = medhhinc), + outlier.shape = NA) + + geom_col(data = block_data_2022 %>% filter(interest %in% c(TRUE, "baycreek")) %>% arrange(medhhinc), + aes(x = reorder(name, medhhinc, sum), + y = medhhinc, + fill = baycreek)) + + scale_y_continuous(label = scales::label_dollar(), expand = expansion(mult = c(0,NA))) + + scale_fill_discrete(guide="none") + + theme(axis.text.x=element_text(angle = 30, vjust = 0.7), + axis.title.x=element_blank()) + + labs(title = "Median Income by Block", + x = NULL, + y = "Median Income") +ggsave(file = "figures/median_income_boxplot.png", + device = "png", + width = 11, + height = 8.5, + units = "in") + +ggplot(data = block_data_2022 %>% filter(interest %in% c("baycreek", TRUE))) + + geom_hline(data = block_data_2022 %>% filter(geo_id == "Madison"), + aes(yintercept = medhhinc), + linetype = "dashed") + + geom_label(data = block_data_2022 %>% filter(geo_id == "Madison"), + aes(y = medhhinc, + x = 0.1, + label = "Madison Median")) + + geom_point(data = block_data_2022, + aes(x = pc_wht/100, + y = medhhinc), + size = 2, + alpha = 0.5, + color = "grey") + + geom_point(aes(x = pc_wht/100, + y = medhhinc, + color = interest), + size = 5) + + geom_label_repel(aes(x = pc_wht/100, + y = medhhinc, + label = name), + nudge_y = 5000, + min.segment.length = 0) + + scale_x_continuous(label = scales::label_percent(), expand = expansion(mult = c(0, 0)), limits = c(0, 1)) + + scale_y_continuous(label = scales::label_dollar(), expand = expansion(mult = c(0.1, 0.1))) + + scale_color_discrete(guide="none") + + labs(title = "Median income by racial makup of neighborhood", + x = "Percent of residents that are white", + y = "Median income") +ggsave(file = "figures/income_race.png", + device = "png", + width = 11, + height = 8.5, + units = "in") + +ggplot(data = block_data_2022 %>% + filter(interest %in% c("baycreek", TRUE) | geo_id == "Madison") %>% + pivot_longer(cols = races$variable, names_to = "race", values_to = "percent")) + + geom_col(aes(x = ifelse(geo_id == "Madison", "Madison average", name), + y = percent/100, + fill = race), + color = "black") + + scale_y_continuous(label = scales::label_percent(), expand = expansion(mult = c(0, 0))) + + scale_fill_brewer(type = "qual", labels = deframe(races) %>% as.list()) + + theme(axis.text.x=element_text(angle = 30, vjust = 0.7), + axis.title.x=element_blank()) + + labs(title = "Racial makup of neighborhood", + x = NULL, + y = NULL, + fill = "Race") +ggsave(file = "figures/race_percent.png", + device = "png", + width = 11, + height = 8.5, + units = "in") + +ggplot(data = block_data_2022 %>% + filter(interest %in% c("baycreek", TRUE) | geo_id == "Madison")) + + geom_col(aes(x = ifelse(geo_id == "Madison", "Madison average", name), + y = pc_fmpv/100), + color = "black") + + scale_y_continuous(label = scales::label_percent(), expand = expansion(mult = c(0, 0.1))) + + theme(axis.text.x=element_text(angle = 30, vjust = 0.7), + axis.title.x=element_blank()) + + labs(title = "Families below the poverty lines", + x = NULL, + y = NULL, + fill = NULL) +ggsave(file = "figures/neighborhood_poverty.png", + device = "png", + width = 11, + height = 8.5, + units = "in") diff --git a/block_interest.csv b/block_interest.csv new file mode 100644 index 0000000..84d03f2 --- /dev/null +++ b/block_interest.csv @@ -0,0 +1,9 @@ +name,geo_id,baycreek +Bay Creek 1,550250013001,TRUE +Bay Creek 2,550250013002,TRUE +Greenbush 4,550250012004,FALSE +Greenbush 2,550250012002,FALSE +Bram’s Addition,550250014011,FALSE +Burr Oaks 2,550250014012,FALSE +Burr Oaks 3,550250014013,FALSE +Capital View,550250014014,FALSE diff --git a/census_interest.csv b/census_interest.csv new file mode 100644 index 0000000..5fb63c3 --- /dev/null +++ b/census_interest.csv @@ -0,0 +1,6 @@ +name,geo_id +Bay Creek,55025001300 +Greenbush, +Bram’s Addition, +Burr Oaks, +Capital View Neighborhoods,