diff --git a/R/MilWALKeeWalks.Rmd b/R/MilWALKeeWalks.Rmd new file mode 100644 index 0000000..0d26d6e --- /dev/null +++ b/R/MilWALKeeWalks.Rmd @@ -0,0 +1,348 @@ +--- +title: "MilWALKeeWalks" +output: + html_document: + toc: true + toc_depth: 5 + toc_float: + collapsed: false + smooth_scroll: true +editor_options: + chunk_output_type: console +--- + + +# Input Data & Configuration + +## Libraries +```{r libs, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +date() +rm(list=ls()) +library(tidyverse) +library(ggmap) +library(sf) +library(osrm) +library(smoothr) +library(ggnewscale) +library(RColorBrewer) +library(magick) +library(rsvg) +library(parallel) +library(tidycensus) +``` + + +## Load TOPS data +```{r loadTOPS, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +load(file = "data/TOPS/TOPS_data.Rda") +load(file = "data/TOPS/vuln_roles.Rda") +load(file = "data/TOPS/retrieve_date.Rda") +load(file = "data/TOPS/injury_severity.Rda") +``` + +## filter to county +```{r filterTOPS, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +focus_county <- "MILWAUKEE" +TOPS_data_filtered <- TOPS_data %>% filter(CNTYNAME == focus_county) +``` + +## identify start and end dates +```{r startenddates, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +year_min <- min(year(TOPS_data_filtered$date)) +year_max <- max(year(TOPS_data_filtered$date)) +``` + +## intro charts +```{r introCharts, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +ggplot() + + geom_col(data = TOPS_data_filtered %>% + filter(year != year_max) %>% + filter(!is.na(vulnerable_role)) %>% + group_by(month, vulnerable_role) %>% + summarize(total = n()), + aes(x = month, + y = total/((year_max - 1) - year_min + 1), + fill = vulnerable_role), + position = position_dodge()) + + geom_line(data = TOPS_data_filtered %>% + filter(year == year_max) %>% + filter(!is.na(vulnerable_role)) %>% + group_by(month, vulnerable_role) %>% + summarize(total = n()), + aes(x = month, + y = total, + color = vulnerable_role, + group = vulnerable_role), + linewidth = 1) + + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + + scale_fill_manual(values = c("sienna3", "deepskyblue3")) + + scale_color_manual(values = c("sienna4", "deepskyblue4")) + + labs(title = paste0("Crashes involved pedestrians and bicyclists"), + subtitle = paste0(str_to_title(focus_county), " County"), + x = "Month", + y = "Average crashes per year", + fill = paste0("Yearly average\n", year_min, " - ", year_max - 1), + color = year_max, + caption = paste0("crash data from UW TOPS lab - retrieved ", + strftime(retrieve_date, format = "%m/%Y"), + "\nper direction of the WisDOT Bureau of Transportation Safety")) + + theme(plot.caption = element_text(color = "grey")) +ggsave(filename = paste0("figures/MilWALKee_Walks/", "month_role.png"), + device = png, + height = 8.5, + width = 11, + units = "in", + create.dir = TRUE) + +ggplot() + + geom_col(data = TOPS_data_filtered %>% + filter(vulnerable_role == "Pedestrian", + !is.na(ped_age)) %>% + filter(year != year_max) %>% + mutate(age = ifelse(ped_age <= 18, "child", "adult")) %>% + group_by(month, age) %>% + summarize(total = n()/((year_max - 1) - year_min + 1)), + aes(x = month, + y = total, + fill = age), + position = position_dodge()) + + geom_line(data = TOPS_data_filtered %>% + filter(year == year_max) %>% + filter(vulnerable_role == "Pedestrian", + !is.na(ped_age)) %>% + mutate(age = ifelse(ped_age <= 18, "child", "adult")) %>% + group_by(month, age, year) %>% + summarize(total = n()), + aes(x = month, + y = total, + color = age, + group = age), + linewidth = 1) + + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + + scale_fill_manual(values = c("deeppink1", "darkgoldenrod1")) + + scale_color_manual(values = c("deeppink3", "darkgoldenrod3")) + + labs(title = paste0("Crashes involved pedestrians"), + subtitle = paste0(str_to_title(focus_county), " County"), + x = "Month", + y = "Crashes", + fill = paste0("Yearly average\n", year_min, " - ", year_max - 1), + color = year_max, + caption = paste0("crash data from UW TOPS lab - retrieved ", + strftime(retrieve_date, format = "%m/%Y"), + "\nper direction of the WisDOT Bureau of Transportation Safety")) + + theme(plot.caption = element_text(color = "grey")) +ggsave(filename = paste0("figures/MilWALKee_Walks/", "month_age.png"), + device = png, + height = 8.5, + width = 11, + units = "in", + create.dir = TRUE) + +ggplot(data = TOPS_data_filtered %>% + filter(vulnerable_role == "Pedestrian", + month(date) <= 8) %>% + group_by(year) %>% + summarize(total = n())) + + geom_col(aes(x = year, + y = total), + fill = "lightblue4") + + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + + labs(title = paste0("Crashes involved pedestrians"), + subtitle = paste0(str_to_title(focus_county), " County | ", "January - August"), + x = NULL, + y = "Crashes", + caption = paste0("crash data from UW TOPS lab - retrieved ", + strftime(retrieve_date, format = "%m/%Y"), + "\nper direction of the WisDOT Bureau of Transportation Safety")) + + theme(plot.caption = element_text(color = "grey")) +ggsave(filename = paste0("figures/MilWALKee_Walks/", "ped_years.png"), + device = png, + height = 8.5, + width = 11, + units = "in", + create.dir = TRUE) +``` + +## Milwaukee maps + +## Load API keys from StadiaMaps +```{r APIkeys, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +# register stadia API key ---- +register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) +``` + +## add county census data ---- +```{r countycensus, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +census_api_key(key = substr(read_file(file = "api_keys/census_api_key"), 1, 40)) +county_populations <- get_estimates(geography = "county", + year = 2022, + product = "population", + state = "Wisconsin", + geometry = TRUE) %>% + filter(variable == "POPESTIMATE") %>% + mutate(County = str_to_upper(str_replace(NAME, " County, Wisconsin", ""))) +county_populations <- st_transform(county_populations, crs = 4326) %>% filter(County %in% focus_county) +census_tract_populations <- st_transform(get_decennial( + year = 2020, + geography = "block", + variables = "P1_001N", + state = "WI", + county = focus_county, + geometry = TRUE +), crs = 4326) + +census_tract_crashes <- st_join(census_tract_populations, st_as_sf(TOPS_data_filtered %>% filter(!is.na(latitude)), coords = c("longitude", "latitude"), crs = 4326), join = st_contains) %>% + group_by(GEOID) %>% + summarise(count = n(), .groups = 'drop') + +hexgrid <- rowid_to_column(st_transform(st_as_sf(st_make_grid(st_transform(county_populations, crs = 32616), + cellsize = 3000, + what = 'polygons', + square = FALSE + )), crs = 4326), "ID") + +hex_crashes <- st_join(hexgrid, st_as_sf(TOPS_data_filtered %>% filter(!is.na(latitude)), coords = c("longitude", "latitude"), crs = 4326), join = st_contains) %>% + filter(!is.na(year)) %>% + filter(date >= (max(date) - (365 * 5))) %>% + mutate(lastyear = ifelse((date <= max(date) - 365), + "priorfive", + "lastyear")) %>% + group_by(ID, lastyear) %>% + summarise(count = n(), .groups = 'drop') %>% + st_drop_geometry() %>% + pivot_wider(id_cols = ID, names_from = lastyear, values_from = count) %>% + mutate(across(-ID, ~ replace_na(., 0))) %>% + mutate(total = rowSums(dplyr::select(., -ID), na.rm = TRUE)) + +hex_crashes <- st_as_sf(left_join(hexgrid, hex_crashes), crs = 4326) + +hex_crashes <- hex_crashes %>% + mutate(lastyearchange = (lastyear - priorfive/5)/(priorfive/5)) + +hex_crashes_points <- st_centroid(hex_crashes) + +``` + +```{r MilwaukeeMaps, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +# get basemap +bbox <- st_bbox(county_populations) +bbox <- c(left = as.double(bbox[1]), + bottom = as.double(bbox[2]), + right = as.double(bbox[3]), + top = as.double(bbox[4])) +basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") + +# generate map with bubbles +ggmap(basemap) + + labs(title = paste0("Crashes between cars and pedestrians"), + subtitle = paste0(str_to_title(focus_county), + " County | ", + min(year(TOPS_data$date), na.rm = TRUE), + " - ", + max(year(TOPS_data$date), na.rm = TRUE)), + caption = paste0("crash data from UW TOPS lab - retrieved ", + strftime(retrieve_date, format = "%m/%Y"), + "\nper direction of the WisDOT Bureau of Transportation Safety", + "\nbasemap from StadiaMaps and OpenStreetMap Contributers"), + x = NULL, + y = NULL, + size = paste0("Total crashes"), + fill = "last 12 months\ncompared to previous") + + theme(axis.text=element_blank(), + axis.ticks=element_blank(), + plot.caption = element_text(color = "grey", size = 8)) + + # add crash locations + geom_sf(data = hex_crashes_points %>% filter(is.double(total), !is.na(total)), + inherit.aes = FALSE, + aes(size = total, + fill = lastyearchange), + linewidth = 0, + shape = 21, + color = "black") + + scale_size_area() + + scale_fill_gradient2( + low = "darkgreen", + mid = "white", + high = "red", + midpoint = 0, + limits = c(-2, 2), + oob = scales::squish, + labels = scales::percent + ) + +ggsave(file = paste0("figures/MilWALKee_Walks/", + "milwaukee_map.png"), + device = png, + height = 8.5, + width = 11, + units = "in", + create.dir = TRUE) +``` + + +## identify Halloween trick-or-treating days +```{r trickortreatdays, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +trickortreatdays <- data_frame(year = seq(year(min(TOPS_data$date)), year(max(TOPS_data$date)), 1)) +trickortreatdays <- trickortreatdays %>% + mutate(halloween = ymd(paste(year, "10, 31"))) %>% + mutate(wday = wday(halloween, label = TRUE)) %>% + mutate(satbefore = floor_date(halloween, "week", week_start = 6), + sunbefore = floor_date(halloween, "week")) + +trickortreatdays <- c(trickortreatdays$halloween, trickortreatdays$satbefore, trickortreatdays$sunbefore) +TOPS_data_filtered <- TOPS_data_filtered %>% mutate(trickortreat = ifelse(date %in% trickortreatdays, TRUE, FALSE)) +TOPS_data <- TOPS_data %>% mutate(trickortreat = ifelse(date %in% trickortreatdays, TRUE, FALSE)) + +``` + +## explore graphs +```{r exploreGraphs, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +ggplot(data = TOPS_data_filtered %>% +# filter(ped_inj %in% c("K", "A", "B")) %>% +# filter(ped_age <=18) %>% +# filter(vulnerable_role == "Pedestrian") %>% + mutate(mday = mday(date)) %>% + mutate(date_yearagnostic = ymd(paste("2025", month, mday))) %>% + group_by(date_yearagnostic, year, trickortreat) %>% + summarize(total = n())) + + geom_col(aes(x = date_yearagnostic, + y = total, + fill = trickortreat)) + + scale_x_date(minor_breaks = "month", date_labels = "%b", expand = expansion(mult = c(0,0))) + + scale_fill_manual(values = c("black", "orange")) + + facet_grid(year ~ .) + + labs(title = paste0("Crashes involved pedestrians - Halloween"), + subtitle = paste0(str_to_title(focus_county), " County | ", year_min, " - ", year_max), + x = NULL, + y = "Crashes", + caption = paste0("crash data from UW TOPS lab - retrieved ", + strftime(retrieve_date, format = "%m/%Y"), + "\nper direction of the WisDOT Bureau of Transportation Safety")) + + theme(plot.caption = element_text(color = "grey")) + + +ggplot(data = TOPS_data_filtered %>% + # filter(ped_inj %in% c("K", "A", "B")) %>% + mutate(age = ifelse(ped_age <= 18, "child", "adult"))) + + geom_bar(aes(x = month, + fill = age), + position = "fill") + +ggplot(data = TOPS_data_filtered %>% +# filter(ped_age <=18) %>% +# filter(vulnerable_role == "Pedestrian") %>% + mutate(age = ifelse(ped_age <= 18, "child", "adult")) %>% + mutate(date_yearagnostic = ymd(paste("2025", month, mday(date)))) %>% + group_by(date_yearagnostic, year, age, trickortreat) %>% + summarize(total = n())) + +# geom_vline(aes(xintercept = ymd("2025-10-31")), +# linetype = "dashed", +# alpha = 0.5) + + geom_col(aes(x = date_yearagnostic, + y = total, + fill = trickortreat)) + + scale_x_date(minor_breaks = "month", date_labels = "%b", expand = expansion(mult = c(0,0))) + + scale_fill_manual(values = c("black", "orange")) + + facet_grid(year ~ .) + +``` \ No newline at end of file diff --git a/R/TOPS_data_process.Rmd b/R/TOPS_data_process.Rmd index fc5e7ca..f7b0288 100644 --- a/R/TOPS_data_process.Rmd +++ b/R/TOPS_data_process.Rmd @@ -86,7 +86,8 @@ TOPS_data <- TOPS_data %>% mutate(ped_inj = ifelse(ROLE1 %in% vuln_roles, INJSVR1, ifelse(ROLE2 %in% vuln_roles, INJSVR2, - NA))) + NA))) %>% + mutate(ped_age = ifelse(ROLE1 %in% vuln_roles, age1, age2)) TOPS_data <- left_join(TOPS_data, injury_severity %>% select(InjSevName, code), join_by(ped_inj == code)) %>% mutate(InjSevName = factor(InjSevName, levels = injury_severity$InjSevName)) %>%