added MilWAUKeeWalks.Rmd

This commit is contained in:
Ben Varick 2025-09-19 15:22:08 -07:00
parent 486ad2533c
commit e71f197046
Signed by: ben
SSH key fingerprint: SHA256:jWnpFDAcacYM5aPFpYRqlsamlDyKNpSj3jj+k4ojtUo
2 changed files with 350 additions and 1 deletions

348
R/MilWALKeeWalks.Rmd Normal file
View file

@ -0,0 +1,348 @@
---
title: "MilWALKeeWalks"
output:
html_document:
toc: true
toc_depth: 5
toc_float:
collapsed: false
smooth_scroll: true
editor_options:
chunk_output_type: console
---
# Input Data & Configuration
## Libraries
```{r libs, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
date()
rm(list=ls())
library(tidyverse)
library(ggmap)
library(sf)
library(osrm)
library(smoothr)
library(ggnewscale)
library(RColorBrewer)
library(magick)
library(rsvg)
library(parallel)
library(tidycensus)
```
## Load TOPS data
```{r loadTOPS, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
load(file = "data/TOPS/TOPS_data.Rda")
load(file = "data/TOPS/vuln_roles.Rda")
load(file = "data/TOPS/retrieve_date.Rda")
load(file = "data/TOPS/injury_severity.Rda")
```
## filter to county
```{r filterTOPS, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
focus_county <- "MILWAUKEE"
TOPS_data_filtered <- TOPS_data %>% filter(CNTYNAME == focus_county)
```
## identify start and end dates
```{r startenddates, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
year_min <- min(year(TOPS_data_filtered$date))
year_max <- max(year(TOPS_data_filtered$date))
```
## intro charts
```{r introCharts, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
ggplot() +
geom_col(data = TOPS_data_filtered %>%
filter(year != year_max) %>%
filter(!is.na(vulnerable_role)) %>%
group_by(month, vulnerable_role) %>%
summarize(total = n()),
aes(x = month,
y = total/((year_max - 1) - year_min + 1),
fill = vulnerable_role),
position = position_dodge()) +
geom_line(data = TOPS_data_filtered %>%
filter(year == year_max) %>%
filter(!is.na(vulnerable_role)) %>%
group_by(month, vulnerable_role) %>%
summarize(total = n()),
aes(x = month,
y = total,
color = vulnerable_role,
group = vulnerable_role),
linewidth = 1) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
scale_fill_manual(values = c("sienna3", "deepskyblue3")) +
scale_color_manual(values = c("sienna4", "deepskyblue4")) +
labs(title = paste0("Crashes involved pedestrians and bicyclists"),
subtitle = paste0(str_to_title(focus_county), " County"),
x = "Month",
y = "Average crashes per year",
fill = paste0("Yearly average\n", year_min, " - ", year_max - 1),
color = year_max,
caption = paste0("crash data from UW TOPS lab - retrieved ",
strftime(retrieve_date, format = "%m/%Y"),
"\nper direction of the WisDOT Bureau of Transportation Safety")) +
theme(plot.caption = element_text(color = "grey"))
ggsave(filename = paste0("figures/MilWALKee_Walks/", "month_role.png"),
device = png,
height = 8.5,
width = 11,
units = "in",
create.dir = TRUE)
ggplot() +
geom_col(data = TOPS_data_filtered %>%
filter(vulnerable_role == "Pedestrian",
!is.na(ped_age)) %>%
filter(year != year_max) %>%
mutate(age = ifelse(ped_age <= 18, "child", "adult")) %>%
group_by(month, age) %>%
summarize(total = n()/((year_max - 1) - year_min + 1)),
aes(x = month,
y = total,
fill = age),
position = position_dodge()) +
geom_line(data = TOPS_data_filtered %>%
filter(year == year_max) %>%
filter(vulnerable_role == "Pedestrian",
!is.na(ped_age)) %>%
mutate(age = ifelse(ped_age <= 18, "child", "adult")) %>%
group_by(month, age, year) %>%
summarize(total = n()),
aes(x = month,
y = total,
color = age,
group = age),
linewidth = 1) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
scale_fill_manual(values = c("deeppink1", "darkgoldenrod1")) +
scale_color_manual(values = c("deeppink3", "darkgoldenrod3")) +
labs(title = paste0("Crashes involved pedestrians"),
subtitle = paste0(str_to_title(focus_county), " County"),
x = "Month",
y = "Crashes",
fill = paste0("Yearly average\n", year_min, " - ", year_max - 1),
color = year_max,
caption = paste0("crash data from UW TOPS lab - retrieved ",
strftime(retrieve_date, format = "%m/%Y"),
"\nper direction of the WisDOT Bureau of Transportation Safety")) +
theme(plot.caption = element_text(color = "grey"))
ggsave(filename = paste0("figures/MilWALKee_Walks/", "month_age.png"),
device = png,
height = 8.5,
width = 11,
units = "in",
create.dir = TRUE)
ggplot(data = TOPS_data_filtered %>%
filter(vulnerable_role == "Pedestrian",
month(date) <= 8) %>%
group_by(year) %>%
summarize(total = n())) +
geom_col(aes(x = year,
y = total),
fill = "lightblue4") +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
labs(title = paste0("Crashes involved pedestrians"),
subtitle = paste0(str_to_title(focus_county), " County | ", "January - August"),
x = NULL,
y = "Crashes",
caption = paste0("crash data from UW TOPS lab - retrieved ",
strftime(retrieve_date, format = "%m/%Y"),
"\nper direction of the WisDOT Bureau of Transportation Safety")) +
theme(plot.caption = element_text(color = "grey"))
ggsave(filename = paste0("figures/MilWALKee_Walks/", "ped_years.png"),
device = png,
height = 8.5,
width = 11,
units = "in",
create.dir = TRUE)
```
## Milwaukee maps
## Load API keys from StadiaMaps
```{r APIkeys, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
# register stadia API key ----
register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36))
```
## add county census data ----
```{r countycensus, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
census_api_key(key = substr(read_file(file = "api_keys/census_api_key"), 1, 40))
county_populations <- get_estimates(geography = "county",
year = 2022,
product = "population",
state = "Wisconsin",
geometry = TRUE) %>%
filter(variable == "POPESTIMATE") %>%
mutate(County = str_to_upper(str_replace(NAME, " County, Wisconsin", "")))
county_populations <- st_transform(county_populations, crs = 4326) %>% filter(County %in% focus_county)
census_tract_populations <- st_transform(get_decennial(
year = 2020,
geography = "block",
variables = "P1_001N",
state = "WI",
county = focus_county,
geometry = TRUE
), crs = 4326)
census_tract_crashes <- st_join(census_tract_populations, st_as_sf(TOPS_data_filtered %>% filter(!is.na(latitude)), coords = c("longitude", "latitude"), crs = 4326), join = st_contains) %>%
group_by(GEOID) %>%
summarise(count = n(), .groups = 'drop')
hexgrid <- rowid_to_column(st_transform(st_as_sf(st_make_grid(st_transform(county_populations, crs = 32616),
cellsize = 3000,
what = 'polygons',
square = FALSE
)), crs = 4326), "ID")
hex_crashes <- st_join(hexgrid, st_as_sf(TOPS_data_filtered %>% filter(!is.na(latitude)), coords = c("longitude", "latitude"), crs = 4326), join = st_contains) %>%
filter(!is.na(year)) %>%
filter(date >= (max(date) - (365 * 5))) %>%
mutate(lastyear = ifelse((date <= max(date) - 365),
"priorfive",
"lastyear")) %>%
group_by(ID, lastyear) %>%
summarise(count = n(), .groups = 'drop') %>%
st_drop_geometry() %>%
pivot_wider(id_cols = ID, names_from = lastyear, values_from = count) %>%
mutate(across(-ID, ~ replace_na(., 0))) %>%
mutate(total = rowSums(dplyr::select(., -ID), na.rm = TRUE))
hex_crashes <- st_as_sf(left_join(hexgrid, hex_crashes), crs = 4326)
hex_crashes <- hex_crashes %>%
mutate(lastyearchange = (lastyear - priorfive/5)/(priorfive/5))
hex_crashes_points <- st_centroid(hex_crashes)
```
```{r MilwaukeeMaps, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
# get basemap
bbox <- st_bbox(county_populations)
bbox <- c(left = as.double(bbox[1]),
bottom = as.double(bbox[2]),
right = as.double(bbox[3]),
top = as.double(bbox[4]))
basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite")
# generate map with bubbles
ggmap(basemap) +
labs(title = paste0("Crashes between cars and pedestrians"),
subtitle = paste0(str_to_title(focus_county),
" County | ",
min(year(TOPS_data$date), na.rm = TRUE),
" - ",
max(year(TOPS_data$date), na.rm = TRUE)),
caption = paste0("crash data from UW TOPS lab - retrieved ",
strftime(retrieve_date, format = "%m/%Y"),
"\nper direction of the WisDOT Bureau of Transportation Safety",
"\nbasemap from StadiaMaps and OpenStreetMap Contributers"),
x = NULL,
y = NULL,
size = paste0("Total crashes"),
fill = "last 12 months\ncompared to previous") +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey", size = 8)) +
# add crash locations
geom_sf(data = hex_crashes_points %>% filter(is.double(total), !is.na(total)),
inherit.aes = FALSE,
aes(size = total,
fill = lastyearchange),
linewidth = 0,
shape = 21,
color = "black") +
scale_size_area() +
scale_fill_gradient2(
low = "darkgreen",
mid = "white",
high = "red",
midpoint = 0,
limits = c(-2, 2),
oob = scales::squish,
labels = scales::percent
)
ggsave(file = paste0("figures/MilWALKee_Walks/",
"milwaukee_map.png"),
device = png,
height = 8.5,
width = 11,
units = "in",
create.dir = TRUE)
```
## identify Halloween trick-or-treating days
```{r trickortreatdays, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
trickortreatdays <- data_frame(year = seq(year(min(TOPS_data$date)), year(max(TOPS_data$date)), 1))
trickortreatdays <- trickortreatdays %>%
mutate(halloween = ymd(paste(year, "10, 31"))) %>%
mutate(wday = wday(halloween, label = TRUE)) %>%
mutate(satbefore = floor_date(halloween, "week", week_start = 6),
sunbefore = floor_date(halloween, "week"))
trickortreatdays <- c(trickortreatdays$halloween, trickortreatdays$satbefore, trickortreatdays$sunbefore)
TOPS_data_filtered <- TOPS_data_filtered %>% mutate(trickortreat = ifelse(date %in% trickortreatdays, TRUE, FALSE))
TOPS_data <- TOPS_data %>% mutate(trickortreat = ifelse(date %in% trickortreatdays, TRUE, FALSE))
```
## explore graphs
```{r exploreGraphs, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
ggplot(data = TOPS_data_filtered %>%
# filter(ped_inj %in% c("K", "A", "B")) %>%
# filter(ped_age <=18) %>%
# filter(vulnerable_role == "Pedestrian") %>%
mutate(mday = mday(date)) %>%
mutate(date_yearagnostic = ymd(paste("2025", month, mday))) %>%
group_by(date_yearagnostic, year, trickortreat) %>%
summarize(total = n())) +
geom_col(aes(x = date_yearagnostic,
y = total,
fill = trickortreat)) +
scale_x_date(minor_breaks = "month", date_labels = "%b", expand = expansion(mult = c(0,0))) +
scale_fill_manual(values = c("black", "orange")) +
facet_grid(year ~ .) +
labs(title = paste0("Crashes involved pedestrians - Halloween"),
subtitle = paste0(str_to_title(focus_county), " County | ", year_min, " - ", year_max),
x = NULL,
y = "Crashes",
caption = paste0("crash data from UW TOPS lab - retrieved ",
strftime(retrieve_date, format = "%m/%Y"),
"\nper direction of the WisDOT Bureau of Transportation Safety")) +
theme(plot.caption = element_text(color = "grey"))
ggplot(data = TOPS_data_filtered %>%
# filter(ped_inj %in% c("K", "A", "B")) %>%
mutate(age = ifelse(ped_age <= 18, "child", "adult"))) +
geom_bar(aes(x = month,
fill = age),
position = "fill")
ggplot(data = TOPS_data_filtered %>%
# filter(ped_age <=18) %>%
# filter(vulnerable_role == "Pedestrian") %>%
mutate(age = ifelse(ped_age <= 18, "child", "adult")) %>%
mutate(date_yearagnostic = ymd(paste("2025", month, mday(date)))) %>%
group_by(date_yearagnostic, year, age, trickortreat) %>%
summarize(total = n())) +
# geom_vline(aes(xintercept = ymd("2025-10-31")),
# linetype = "dashed",
# alpha = 0.5) +
geom_col(aes(x = date_yearagnostic,
y = total,
fill = trickortreat)) +
scale_x_date(minor_breaks = "month", date_labels = "%b", expand = expansion(mult = c(0,0))) +
scale_fill_manual(values = c("black", "orange")) +
facet_grid(year ~ .)
```

View file

@ -86,7 +86,8 @@ TOPS_data <- TOPS_data %>% mutate(ped_inj = ifelse(ROLE1 %in% vuln_roles,
INJSVR1, INJSVR1,
ifelse(ROLE2 %in% vuln_roles, ifelse(ROLE2 %in% vuln_roles,
INJSVR2, INJSVR2,
NA))) NA))) %>%
mutate(ped_age = ifelse(ROLE1 %in% vuln_roles, age1, age2))
TOPS_data <- left_join(TOPS_data, injury_severity %>% select(InjSevName, code), join_by(ped_inj == code)) %>% TOPS_data <- left_join(TOPS_data, injury_severity %>% select(InjSevName, code), join_by(ped_inj == code)) %>%
mutate(InjSevName = factor(InjSevName, levels = injury_severity$InjSevName)) %>% mutate(InjSevName = factor(InjSevName, levels = injury_severity$InjSevName)) %>%