diff --git a/city_compare.Rmd b/city_compare.Rmd index cd88bb1..90c14db 100644 --- a/city_compare.Rmd +++ b/city_compare.Rmd @@ -15,7 +15,7 @@ editor_options: ## Libraries -```{r libs, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +```{r libs, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} date() rm(list=ls()) library(tidyverse) @@ -28,21 +28,21 @@ library(scales) ``` ## API keys -```{r api_keys, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +```{r api_keys, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} # load census api key census_api_key(key = substr(read_file(file = "api_keys/census_api_key"), 1, 40)) ``` ## Date ranges -```{r date_range, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +```{r date_range, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} date_start <- "2014-01-01" date_end <- "2024-12-31" ``` ## Cities to compare -```{r cities, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +```{r cities, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} cities <- read_csv(file = "cities.csv") cities <- cities |> mutate(city_name = paste0(City, " ", Type)) @@ -52,29 +52,33 @@ cities <- cities |> # Data ## Census data -```{r census, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +```{r census, eval = TRUE, echo = FALSE, results = "hide", warning = FALSE, error = TRUE, message = FALSE} +all_cbsas <- get_acs( + geography = "cbsa", + variables = "B01003_001", + year = 2023, + geometry = TRUE +) populations <- list(NULL) -for(city in cities$city_name){ - state <- cities |> filter(city_name == city) |> pull(State) - populations[[city]] <- get_acs( - geography = "place", - variables = "B01003_001", - state = state, - year = 2023, - geometry = TRUE - ) |> - filter(str_detect(NAME, city)) +for(i in seq_len(nrow(cities))){ + city <- cities$City[i] + state <- cities$State[i] + + populations[[city]] <- all_cbsas |> + filter( + str_detect(NAME, city) & + str_detect(NAME, paste0(", .*", state)) # State appears after the comma + ) } - populations <- bind_rows(populations) city_center <- populations |> st_centroid() |> - st_transform(4326) %>% - mutate( - lon = st_coordinates(.)[,1], - lat = st_coordinates(.)[,2] - ) |> + st_transform(4326) |> + (\(x) mutate(x, + lon = st_coordinates(x)[,1], + lat = st_coordinates(x)[,2] + ))() |> st_drop_geometry() |> select(lat, lon) @@ -82,42 +86,49 @@ cities <- bind_cols(cities, populations, city_center) cities <- cities |> mutate( density = estimate/as.double(st_area(geometry))*1000000 ) +``` +```{r plotPop, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} ggplot(cities) + geom_col(aes(x = City, - y = estimate)) + - labs(title = "City Population", - x = "City", + y = estimate, + fill = City)) + + labs(title = "City Population - Metro Areas", + x = NULL, y = NULL) + - scale_y_continuous(label = unit_format(unit = "K", scale = 1e-3, sep = "")) + scale_y_continuous(label = unit_format(unit = "K", scale = 1e-3, sep = ""), expand = expansion(mult = c(0,0.05))) + + scale_fill_brewer(type = "qual", guide = NULL) ggplot(cities) + geom_col(aes(x = City, - y = density)) + - labs(title = "City Density", - x = "City", + y = density, + fill = City)) + + labs(title = "City Density - Metro Areas", + x = NULL, y = "people/km^2") + - scale_y_continuous() + scale_y_continuous(expand = expansion(mult = c(0,0.05))) + + scale_fill_brewer(type = "qual", guide = NULL) ``` ## Map cities -```{r cities_map, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +```{r cities_map, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} ggplot(data = cities) + geom_polygon(data = map_data(map = "state"), aes(long, lat, group = group), fill = "white", colour = "grey50") + geom_point(aes(x = lon, - y = lat), + y = lat, + fill = City), shape = 21, - fill = "lightgreen", color = "black", - size = 4) + size = 4) + + scale_fill_brewer(type = "qual") ``` ## weather -```{r weather, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +```{r weather, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} weather <- list(NULL) for(city in cities$City){ city_info <- cities |> filter(City == city) @@ -143,10 +154,13 @@ weather |> summarise(median_days_above_80 = median(days_above_80)) |> ggplot() + geom_col(aes(x = city, - y = median_days_above_80)) + + y = median_days_above_80, + fill = city)) + + scale_fill_brewer(type = "qual", guide = NULL) + + scale_y_continuous(expand = expansion(mult = c(0,0.05))) + labs(title = "Days above 80°F (apparent temperature)", y = "Median days per year", - x = "City", + x = NULL, fill = NULL) weather |> @@ -162,7 +176,7 @@ ggplot() + values = c("firebrick", "dodgerblue")) + labs(title = "Apparent Temperature", y = "°F", - x = "City", + x = NULL, fill = NULL) weather |> @@ -174,10 +188,13 @@ weather |> summarise(median_days_above_4hr = median(days_above_4hr)) |> ggplot() + geom_col(aes(x = city, - y = median_days_above_4hr)) + + y = median_days_above_4hr, + fill = city)) + + scale_fill_brewer(type = "qual", guide = NULL) + + scale_y_continuous(expand = expansion(mult = c(0,0.05))) + labs(title = "Days with more than 4 hrs of rain", y = "Median days per year", - x = "City", + x = NULL, fill = NULL) weather |> @@ -188,10 +205,13 @@ weather |> group_by(city, month) |> summarise(median_days_above_4hr = median(days_above_4hr)) |> ggplot() + - scale_x_continuous(breaks = seq(1,12,1)) + + scale_x_continuous(breaks = seq(1,12,1), expand = expansion(mult = c(0,0))) + + scale_y_continuous(expand = expansion(mult = c(0,0))) + geom_line(aes(x = month, y = median_days_above_4hr, - color = city)) + + color = city), + size = 2) + + scale_color_brewer(type = "qual") + labs(title = "Days with more than 4 hrs of rain", y = "Median days per month", x = "Month", @@ -201,7 +221,7 @@ weather |> ## Air Quality -```{r air_quality, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +```{r airQualityData, eval = TRUE, echo = FALSE, results = "hide", warning = FALSE, error = TRUE, message = FALSE} # download data # Create data directory if it doesn't exist if (!dir.exists("data")) { @@ -272,56 +292,68 @@ aqi_data <- bind_rows(aqi_list) aqi_data <- aqi_data |> filter(CBSA.Code %in% (cities |> pull(`CBSA-Code`))) |> mutate(year = year(ymd(Date)), - month = month(ymd(Date))) + month = month(ymd(Date))) |> + left_join(cities, by = join_by("CBSA.Code" == "CBSA-Code")) +``` +```{r airQualityPlots, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} aqi_threshhold <- 50 aqi_data |> - group_by(year, CBSA) |> + group_by(year, City) |> summarise(hours_above = sum(AQI >= aqi_threshhold, na.rm = TRUE)) |> - group_by(CBSA) |> + group_by(City) |> summarise(mean_hours_above = mean(hours_above, na.rm = TRUE)) |> ggplot() + - geom_col(aes(x = CBSA, - y = mean_hours_above)) + + geom_col(aes(x = City, + y = mean_hours_above, + fill = City)) + + scale_fill_brewer(type = "qual", guide = NULL) + + scale_y_continuous(expand = expansion(mult = c(0,0.05))) + labs(title = "Hours above 50 AQI", y = "Mean hours per year", - x = "City", + x = NULL, fill = NULL) aqi_data |> - group_by(year, CBSA) |> + group_by(year, City) |> summarise(hours_above = sum(AQI >= 2*aqi_threshhold, na.rm = TRUE)) |> - group_by(CBSA) |> + group_by(City) |> summarise(mean_hours_above = mean(hours_above, na.rm = TRUE)) |> ggplot() + - geom_col(aes(x = CBSA, - y = mean_hours_above)) + + geom_col(aes(x = City, + y = mean_hours_above, + fill = City)) + + scale_fill_brewer(type = "qual", guide = NULL) + + scale_y_continuous(expand = expansion(mult = c(0,0.05))) + labs(title = paste0("Hours above ", aqi_threshhold * 2," AQI"), y = "Mean hours per year", - x = "City", + x = NULL, fill = NULL) aqi_data |> ggplot() + - geom_violin(aes(x = CBSA, - y = AQI)) + + geom_violin(aes(x = City, + y = AQI, + fill = City)) + + scale_fill_brewer(type = "qual", guide = NULL) + + coord_cartesian(ylim = c(0, 150)) + labs(title = "AQI", y = "AQI", - x = "City", - fill = NULL) + - coord_cartesian(ylim = (c(0,500))) + x = NULL, + fill = NULL) aqi_data |> - group_by(year, month, CBSA) |> + group_by(year, month, City) |> summarise(hours_above = sum(AQI >= aqi_threshhold, na.rm = TRUE)) |> - group_by(CBSA, month) |> + group_by(City, month) |> summarise(mean_hours_above = mean(hours_above, na.rm = TRUE)) |> ggplot() + geom_line(aes(x = month, y = mean_hours_above, - color = CBSA)) + - scale_x_continuous(breaks = seq(1,12,1)) + + color = City)) + + scale_x_continuous(breaks = seq(1,12,1), expand = expansion(mult = c(0,0))) + + scale_y_continuous(expand = expansion(mult = c(0,0.05))) + scale_color_brewer(type = "qual") + labs(title = paste0("Hours with an AQI of greater than or equal to ", aqi_threshhold), y = "Mean days per month", @@ -329,3 +361,123 @@ aqi_data |> color = "City") ``` + +## Transportation +```{r transportation, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +# Define commute mode variables +commute_vars <- c( + commute_total = "B08301_001", + drove_alone = "B08301_003", + carpooled = "B08301_004", + transit = "B08301_010", + bicycle = "B08301_018", + walked = "B08301_019", + work_from_home = "B08301_021" +) + +# Define vehicle ownership variables +vehicle_vars <- c( + households_total = "B08201_001", + vehicles_0 = "B08201_002", + vehicles_1 = "B08201_003", + vehicles_2 = "B08201_004", + vehicles_3 = "B08201_005", + vehicles_4plus = "B08201_006", + vehicles_aggregate = "B25046_001" +) + +# Fetch commute mode data +commute_data <- list() +for(i in seq_len(nrow(cities))){ + city <- cities$city_name[i] + state <- cities$State[i] + + get_acs( + geography = "place", + variables = commute_vars, + state = state, + year = 2023 + ) |> + filter(str_detect(NAME, city)) |> + select(NAME, variable, estimate) |> + pivot_wider(names_from = variable, values_from = estimate) |> + mutate( + city = city, + pct_drove_alone = drove_alone / commute_total * 100, + pct_carpooled = carpooled / commute_total * 100, + pct_transit = transit / commute_total * 100, + pct_bicycle = bicycle / commute_total * 100, + pct_walked = walked / commute_total * 100, + pct_work_from_home = work_from_home / commute_total * 100 + ) -> commute_data[[city]] +} + +commute_data <- bind_rows(commute_data) + +# Fetch vehicle ownership data +vehicle_data <- list() +for(i in seq_len(nrow(cities))){ + city <- cities$city_name[i] + state <- cities$State[i] + + get_acs( + geography = "place", + variables = vehicle_vars, + state = state, + year = 2023 + ) |> + filter(str_detect(NAME, city)) |> + select(NAME, variable, estimate) |> + pivot_wider(names_from = variable, values_from = estimate) |> + mutate( + city = city, + avg_vehicles_per_hh = vehicles_aggregate / households_total, + pct_no_vehicle = vehicles_0 / households_total * 100, + pct_1_vehicle = vehicles_1 / households_total * 100, + pct_2_vehicle = vehicles_2 / households_total * 100, + pct_3plus_vehicle = (vehicles_3 + vehicles_4plus) / households_total * 100 + ) -> vehicle_data[[city]] +} + +vehicle_data <- bind_rows(vehicle_data) +``` + + +```{r transportationPlots, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +vehicle_data |> + ggplot() + + geom_col(aes(x = str_remove(city, " city"), + y = pct_no_vehicle / 100, + fill = city)) + + scale_y_continuous(expand = expansion(mult = c(0,0.05)), labels = percent_format()) + + scale_fill_brewer(type = "qual", guide = NULL) + + labs(title = "Percent of households without a car", + y = NULL, + x = NULL) + +vehicle_data |> + ggplot() + + geom_col(aes(x = str_remove(city, " city"), + y = avg_vehicles_per_hh, + fill = city)) + + scale_y_continuous(expand = expansion(mult = c(0,0.05))) + + scale_fill_brewer(type = "qual", guide = NULL) + + labs(title = "Average Vehicle per Household", + x = NULL, + y = NULL) + +commute_data |> + pivot_longer(cols = c("pct_walked","pct_bicycle","pct_transit"), + names_to = "transit_type", + values_to = "pct") |> + ggplot() + + geom_col(aes(x = str_remove(city, " city"), + y = pct / 100, + fill = transit_type)) + + scale_y_continuous(expand = expansion(mult = c(0,0.05)), labels = percent_format()) + + labs(title = "Commute mode share - non car", + y = NULL, + x = NULL, + fill = "Commute mode") + +```