added transportation
This commit is contained in:
parent
891eca1a00
commit
5898cd14fc
1 changed files with 214 additions and 62 deletions
276
city_compare.Rmd
276
city_compare.Rmd
|
|
@ -15,7 +15,7 @@ editor_options:
|
|||
|
||||
## Libraries
|
||||
|
||||
```{r libs, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
```{r libs, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
date()
|
||||
rm(list=ls())
|
||||
library(tidyverse)
|
||||
|
|
@ -28,21 +28,21 @@ library(scales)
|
|||
```
|
||||
|
||||
## API keys
|
||||
```{r api_keys, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
```{r api_keys, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
# load census api key
|
||||
census_api_key(key = substr(read_file(file = "api_keys/census_api_key"), 1, 40))
|
||||
|
||||
```
|
||||
|
||||
## Date ranges
|
||||
```{r date_range, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
```{r date_range, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
date_start <- "2014-01-01"
|
||||
date_end <- "2024-12-31"
|
||||
|
||||
```
|
||||
## Cities to compare
|
||||
|
||||
```{r cities, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
```{r cities, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
cities <- read_csv(file = "cities.csv")
|
||||
cities <- cities |>
|
||||
mutate(city_name = paste0(City, " ", Type))
|
||||
|
|
@ -52,29 +52,33 @@ cities <- cities |>
|
|||
# Data
|
||||
|
||||
## Census data
|
||||
```{r census, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
```{r census, eval = TRUE, echo = FALSE, results = "hide", warning = FALSE, error = TRUE, message = FALSE}
|
||||
all_cbsas <- get_acs(
|
||||
geography = "cbsa",
|
||||
variables = "B01003_001",
|
||||
year = 2023,
|
||||
geometry = TRUE
|
||||
)
|
||||
populations <- list(NULL)
|
||||
for(city in cities$city_name){
|
||||
state <- cities |> filter(city_name == city) |> pull(State)
|
||||
populations[[city]] <- get_acs(
|
||||
geography = "place",
|
||||
variables = "B01003_001",
|
||||
state = state,
|
||||
year = 2023,
|
||||
geometry = TRUE
|
||||
) |>
|
||||
filter(str_detect(NAME, city))
|
||||
for(i in seq_len(nrow(cities))){
|
||||
city <- cities$City[i]
|
||||
state <- cities$State[i]
|
||||
|
||||
populations[[city]] <- all_cbsas |>
|
||||
filter(
|
||||
str_detect(NAME, city) &
|
||||
str_detect(NAME, paste0(", .*", state)) # State appears after the comma
|
||||
)
|
||||
}
|
||||
|
||||
populations <- bind_rows(populations)
|
||||
|
||||
city_center <- populations |>
|
||||
st_centroid() |>
|
||||
st_transform(4326) %>%
|
||||
mutate(
|
||||
lon = st_coordinates(.)[,1],
|
||||
lat = st_coordinates(.)[,2]
|
||||
) |>
|
||||
st_transform(4326) |>
|
||||
(\(x) mutate(x,
|
||||
lon = st_coordinates(x)[,1],
|
||||
lat = st_coordinates(x)[,2]
|
||||
))() |>
|
||||
st_drop_geometry() |>
|
||||
select(lat, lon)
|
||||
|
||||
|
|
@ -82,42 +86,49 @@ cities <- bind_cols(cities, populations, city_center)
|
|||
cities <- cities |> mutate(
|
||||
density = estimate/as.double(st_area(geometry))*1000000
|
||||
)
|
||||
```
|
||||
|
||||
```{r plotPop, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
ggplot(cities) +
|
||||
geom_col(aes(x = City,
|
||||
y = estimate)) +
|
||||
labs(title = "City Population",
|
||||
x = "City",
|
||||
y = estimate,
|
||||
fill = City)) +
|
||||
labs(title = "City Population - Metro Areas",
|
||||
x = NULL,
|
||||
y = NULL) +
|
||||
scale_y_continuous(label = unit_format(unit = "K", scale = 1e-3, sep = ""))
|
||||
scale_y_continuous(label = unit_format(unit = "K", scale = 1e-3, sep = ""), expand = expansion(mult = c(0,0.05))) +
|
||||
scale_fill_brewer(type = "qual", guide = NULL)
|
||||
|
||||
ggplot(cities) +
|
||||
geom_col(aes(x = City,
|
||||
y = density)) +
|
||||
labs(title = "City Density",
|
||||
x = "City",
|
||||
y = density,
|
||||
fill = City)) +
|
||||
labs(title = "City Density - Metro Areas",
|
||||
x = NULL,
|
||||
y = "people/km^2") +
|
||||
scale_y_continuous()
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
|
||||
scale_fill_brewer(type = "qual", guide = NULL)
|
||||
```
|
||||
|
||||
## Map cities
|
||||
```{r cities_map, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
```{r cities_map, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
ggplot(data = cities) +
|
||||
geom_polygon(data = map_data(map = "state"),
|
||||
aes(long, lat, group = group),
|
||||
fill = "white", colour = "grey50") +
|
||||
geom_point(aes(x = lon,
|
||||
y = lat),
|
||||
y = lat,
|
||||
fill = City),
|
||||
shape = 21,
|
||||
fill = "lightgreen",
|
||||
color = "black",
|
||||
size = 4)
|
||||
size = 4) +
|
||||
scale_fill_brewer(type = "qual")
|
||||
|
||||
|
||||
```
|
||||
|
||||
## weather
|
||||
```{r weather, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
```{r weather, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
weather <- list(NULL)
|
||||
for(city in cities$City){
|
||||
city_info <- cities |> filter(City == city)
|
||||
|
|
@ -143,10 +154,13 @@ weather |>
|
|||
summarise(median_days_above_80 = median(days_above_80)) |>
|
||||
ggplot() +
|
||||
geom_col(aes(x = city,
|
||||
y = median_days_above_80)) +
|
||||
y = median_days_above_80,
|
||||
fill = city)) +
|
||||
scale_fill_brewer(type = "qual", guide = NULL) +
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
|
||||
labs(title = "Days above 80°F (apparent temperature)",
|
||||
y = "Median days per year",
|
||||
x = "City",
|
||||
x = NULL,
|
||||
fill = NULL)
|
||||
|
||||
weather |>
|
||||
|
|
@ -162,7 +176,7 @@ ggplot() +
|
|||
values = c("firebrick", "dodgerblue")) +
|
||||
labs(title = "Apparent Temperature",
|
||||
y = "°F",
|
||||
x = "City",
|
||||
x = NULL,
|
||||
fill = NULL)
|
||||
|
||||
weather |>
|
||||
|
|
@ -174,10 +188,13 @@ weather |>
|
|||
summarise(median_days_above_4hr = median(days_above_4hr)) |>
|
||||
ggplot() +
|
||||
geom_col(aes(x = city,
|
||||
y = median_days_above_4hr)) +
|
||||
y = median_days_above_4hr,
|
||||
fill = city)) +
|
||||
scale_fill_brewer(type = "qual", guide = NULL) +
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
|
||||
labs(title = "Days with more than 4 hrs of rain",
|
||||
y = "Median days per year",
|
||||
x = "City",
|
||||
x = NULL,
|
||||
fill = NULL)
|
||||
|
||||
weather |>
|
||||
|
|
@ -188,10 +205,13 @@ weather |>
|
|||
group_by(city, month) |>
|
||||
summarise(median_days_above_4hr = median(days_above_4hr)) |>
|
||||
ggplot() +
|
||||
scale_x_continuous(breaks = seq(1,12,1)) +
|
||||
scale_x_continuous(breaks = seq(1,12,1), expand = expansion(mult = c(0,0))) +
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0))) +
|
||||
geom_line(aes(x = month,
|
||||
y = median_days_above_4hr,
|
||||
color = city)) +
|
||||
color = city),
|
||||
size = 2) +
|
||||
scale_color_brewer(type = "qual") +
|
||||
labs(title = "Days with more than 4 hrs of rain",
|
||||
y = "Median days per month",
|
||||
x = "Month",
|
||||
|
|
@ -201,7 +221,7 @@ weather |>
|
|||
|
||||
|
||||
## Air Quality
|
||||
```{r air_quality, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
```{r airQualityData, eval = TRUE, echo = FALSE, results = "hide", warning = FALSE, error = TRUE, message = FALSE}
|
||||
# download data
|
||||
# Create data directory if it doesn't exist
|
||||
if (!dir.exists("data")) {
|
||||
|
|
@ -272,56 +292,68 @@ aqi_data <- bind_rows(aqi_list)
|
|||
aqi_data <- aqi_data |>
|
||||
filter(CBSA.Code %in% (cities |> pull(`CBSA-Code`))) |>
|
||||
mutate(year = year(ymd(Date)),
|
||||
month = month(ymd(Date)))
|
||||
month = month(ymd(Date))) |>
|
||||
left_join(cities, by = join_by("CBSA.Code" == "CBSA-Code"))
|
||||
```
|
||||
|
||||
```{r airQualityPlots, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
aqi_threshhold <- 50
|
||||
|
||||
aqi_data |>
|
||||
group_by(year, CBSA) |>
|
||||
group_by(year, City) |>
|
||||
summarise(hours_above = sum(AQI >= aqi_threshhold, na.rm = TRUE)) |>
|
||||
group_by(CBSA) |>
|
||||
group_by(City) |>
|
||||
summarise(mean_hours_above = mean(hours_above, na.rm = TRUE)) |>
|
||||
ggplot() +
|
||||
geom_col(aes(x = CBSA,
|
||||
y = mean_hours_above)) +
|
||||
geom_col(aes(x = City,
|
||||
y = mean_hours_above,
|
||||
fill = City)) +
|
||||
scale_fill_brewer(type = "qual", guide = NULL) +
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
|
||||
labs(title = "Hours above 50 AQI",
|
||||
y = "Mean hours per year",
|
||||
x = "City",
|
||||
x = NULL,
|
||||
fill = NULL)
|
||||
|
||||
aqi_data |>
|
||||
group_by(year, CBSA) |>
|
||||
group_by(year, City) |>
|
||||
summarise(hours_above = sum(AQI >= 2*aqi_threshhold, na.rm = TRUE)) |>
|
||||
group_by(CBSA) |>
|
||||
group_by(City) |>
|
||||
summarise(mean_hours_above = mean(hours_above, na.rm = TRUE)) |>
|
||||
ggplot() +
|
||||
geom_col(aes(x = CBSA,
|
||||
y = mean_hours_above)) +
|
||||
geom_col(aes(x = City,
|
||||
y = mean_hours_above,
|
||||
fill = City)) +
|
||||
scale_fill_brewer(type = "qual", guide = NULL) +
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
|
||||
labs(title = paste0("Hours above ", aqi_threshhold * 2," AQI"),
|
||||
y = "Mean hours per year",
|
||||
x = "City",
|
||||
x = NULL,
|
||||
fill = NULL)
|
||||
|
||||
aqi_data |>
|
||||
ggplot() +
|
||||
geom_violin(aes(x = CBSA,
|
||||
y = AQI)) +
|
||||
geom_violin(aes(x = City,
|
||||
y = AQI,
|
||||
fill = City)) +
|
||||
scale_fill_brewer(type = "qual", guide = NULL) +
|
||||
coord_cartesian(ylim = c(0, 150)) +
|
||||
labs(title = "AQI",
|
||||
y = "AQI",
|
||||
x = "City",
|
||||
fill = NULL) +
|
||||
coord_cartesian(ylim = (c(0,500)))
|
||||
x = NULL,
|
||||
fill = NULL)
|
||||
|
||||
aqi_data |>
|
||||
group_by(year, month, CBSA) |>
|
||||
group_by(year, month, City) |>
|
||||
summarise(hours_above = sum(AQI >= aqi_threshhold, na.rm = TRUE)) |>
|
||||
group_by(CBSA, month) |>
|
||||
group_by(City, month) |>
|
||||
summarise(mean_hours_above = mean(hours_above, na.rm = TRUE)) |>
|
||||
ggplot() +
|
||||
geom_line(aes(x = month,
|
||||
y = mean_hours_above,
|
||||
color = CBSA)) +
|
||||
scale_x_continuous(breaks = seq(1,12,1)) +
|
||||
color = City)) +
|
||||
scale_x_continuous(breaks = seq(1,12,1), expand = expansion(mult = c(0,0))) +
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
|
||||
scale_color_brewer(type = "qual") +
|
||||
labs(title = paste0("Hours with an AQI of greater than or equal to ", aqi_threshhold),
|
||||
y = "Mean days per month",
|
||||
|
|
@ -329,3 +361,123 @@ aqi_data |>
|
|||
color = "City")
|
||||
|
||||
```
|
||||
|
||||
## Transportation
|
||||
```{r transportation, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
# Define commute mode variables
|
||||
commute_vars <- c(
|
||||
commute_total = "B08301_001",
|
||||
drove_alone = "B08301_003",
|
||||
carpooled = "B08301_004",
|
||||
transit = "B08301_010",
|
||||
bicycle = "B08301_018",
|
||||
walked = "B08301_019",
|
||||
work_from_home = "B08301_021"
|
||||
)
|
||||
|
||||
# Define vehicle ownership variables
|
||||
vehicle_vars <- c(
|
||||
households_total = "B08201_001",
|
||||
vehicles_0 = "B08201_002",
|
||||
vehicles_1 = "B08201_003",
|
||||
vehicles_2 = "B08201_004",
|
||||
vehicles_3 = "B08201_005",
|
||||
vehicles_4plus = "B08201_006",
|
||||
vehicles_aggregate = "B25046_001"
|
||||
)
|
||||
|
||||
# Fetch commute mode data
|
||||
commute_data <- list()
|
||||
for(i in seq_len(nrow(cities))){
|
||||
city <- cities$city_name[i]
|
||||
state <- cities$State[i]
|
||||
|
||||
get_acs(
|
||||
geography = "place",
|
||||
variables = commute_vars,
|
||||
state = state,
|
||||
year = 2023
|
||||
) |>
|
||||
filter(str_detect(NAME, city)) |>
|
||||
select(NAME, variable, estimate) |>
|
||||
pivot_wider(names_from = variable, values_from = estimate) |>
|
||||
mutate(
|
||||
city = city,
|
||||
pct_drove_alone = drove_alone / commute_total * 100,
|
||||
pct_carpooled = carpooled / commute_total * 100,
|
||||
pct_transit = transit / commute_total * 100,
|
||||
pct_bicycle = bicycle / commute_total * 100,
|
||||
pct_walked = walked / commute_total * 100,
|
||||
pct_work_from_home = work_from_home / commute_total * 100
|
||||
) -> commute_data[[city]]
|
||||
}
|
||||
|
||||
commute_data <- bind_rows(commute_data)
|
||||
|
||||
# Fetch vehicle ownership data
|
||||
vehicle_data <- list()
|
||||
for(i in seq_len(nrow(cities))){
|
||||
city <- cities$city_name[i]
|
||||
state <- cities$State[i]
|
||||
|
||||
get_acs(
|
||||
geography = "place",
|
||||
variables = vehicle_vars,
|
||||
state = state,
|
||||
year = 2023
|
||||
) |>
|
||||
filter(str_detect(NAME, city)) |>
|
||||
select(NAME, variable, estimate) |>
|
||||
pivot_wider(names_from = variable, values_from = estimate) |>
|
||||
mutate(
|
||||
city = city,
|
||||
avg_vehicles_per_hh = vehicles_aggregate / households_total,
|
||||
pct_no_vehicle = vehicles_0 / households_total * 100,
|
||||
pct_1_vehicle = vehicles_1 / households_total * 100,
|
||||
pct_2_vehicle = vehicles_2 / households_total * 100,
|
||||
pct_3plus_vehicle = (vehicles_3 + vehicles_4plus) / households_total * 100
|
||||
) -> vehicle_data[[city]]
|
||||
}
|
||||
|
||||
vehicle_data <- bind_rows(vehicle_data)
|
||||
```
|
||||
|
||||
|
||||
```{r transportationPlots, eval = TRUE, echo = FALSE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||
vehicle_data |>
|
||||
ggplot() +
|
||||
geom_col(aes(x = str_remove(city, " city"),
|
||||
y = pct_no_vehicle / 100,
|
||||
fill = city)) +
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0.05)), labels = percent_format()) +
|
||||
scale_fill_brewer(type = "qual", guide = NULL) +
|
||||
labs(title = "Percent of households without a car",
|
||||
y = NULL,
|
||||
x = NULL)
|
||||
|
||||
vehicle_data |>
|
||||
ggplot() +
|
||||
geom_col(aes(x = str_remove(city, " city"),
|
||||
y = avg_vehicles_per_hh,
|
||||
fill = city)) +
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0.05))) +
|
||||
scale_fill_brewer(type = "qual", guide = NULL) +
|
||||
labs(title = "Average Vehicle per Household",
|
||||
x = NULL,
|
||||
y = NULL)
|
||||
|
||||
commute_data |>
|
||||
pivot_longer(cols = c("pct_walked","pct_bicycle","pct_transit"),
|
||||
names_to = "transit_type",
|
||||
values_to = "pct") |>
|
||||
ggplot() +
|
||||
geom_col(aes(x = str_remove(city, " city"),
|
||||
y = pct / 100,
|
||||
fill = transit_type)) +
|
||||
scale_y_continuous(expand = expansion(mult = c(0,0.05)), labels = percent_format()) +
|
||||
labs(title = "Commute mode share - non car",
|
||||
y = NULL,
|
||||
x = NULL,
|
||||
fill = "Commute mode")
|
||||
|
||||
```
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue