From ceb3fc3896c0b22f208771a6a7ef8b4c5e3d1527 Mon Sep 17 00:00:00 2001 From: Ben Varick Date: Mon, 27 Jan 2025 13:57:54 -0600 Subject: [PATCH] added TOPS_data_process.Rmd --- Makefile | 8 +- R/TOPS_data_process.Rmd | 105 ++++++++++++++++++++ crash_data.Rproj => wisconsin_crashes.Rproj | 3 + 3 files changed, 113 insertions(+), 3 deletions(-) create mode 100644 R/TOPS_data_process.Rmd rename crash_data.Rproj => wisconsin_crashes.Rproj (74%) diff --git a/Makefile b/Makefile index 00cc984..835a1e4 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,11 @@ osrm: osrm-data osrm-containers -crashmaps_pdf: route_analysis.Rmd - R -e 'library("rmarkdown"); old_path <- Sys.getenv("PATH"); Sys.setenv(PATH = paste(old_path, "/usr/local/bin", sep = ":")); rmarkdown::render(knit_root_dir = "./", output_dir = "./html", input = "./R/route_analysis.Rmd", output_file = "./html/route_analysis.html")' +TOPS_data_process: R/TOPS_data_process.Rmd + R -e 'library("rmarkdown"); old_path <- Sys.getenv("PATH"); Sys.setenv(PATH = paste(old_path, "/usr/local/bin", sep = ":")); rmarkdown::render(knit_root_dir = "../", output_dir = "./html", input = "./R/TOPS_data_process.Rmd", output_file = "./html/TOPS_data_process.html")' -crashmaps_dynamic: cycle_brouter +crashmaps_PDFs: + +crashmaps_dynamic: osrm-data: cd ./docker/osrm/; wget https://download.geofabrik.de/north-america/us/wisconsin-latest.osm.pbf -O ./data-raw/wisconsin-latest.osm.pbf diff --git a/R/TOPS_data_process.Rmd b/R/TOPS_data_process.Rmd new file mode 100644 index 0000000..5e79fab --- /dev/null +++ b/R/TOPS_data_process.Rmd @@ -0,0 +1,105 @@ +--- +title: "TOPS data process" +output: + html_document: + toc: true + toc_depth: 5 + toc_float: + collapsed: false + smooth_scroll: true +editor_options: + chunk_output_type: console +--- + + +# Input Data & Configuration + +## Libraries +```{r libs, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +date() +rm(list=ls()) +library(tidyverse) +``` + +## Compile TOPS data from multiple years +```{r topsdata, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +## add data from WiscTransPortal Crash Data Retrieval Facility ---- +## query: SELECT * +## FROM DTCRPRD.SUMMARY_COMBINED C +## WHERE C.CRSHDATE BETWEEN TO_DATE('2022-JAN','YYYY-MM') AND +## LAST_DAY(TO_DATE('2022-DEC','YYYY-MM')) AND +## (C.BIKEFLAG = 'Y' OR C.PEDFLAG = 'Y') +## ORDER BY C.DOCTNMBR + +## Load TOPS data ---- +## load TOPS data for the whole state (crashes involving bikes and pedestrians), +TOPS_data <- as.list(NULL) +for (file in list.files(path = "data/TOPS/", pattern = "crash-data-download")) { + message(paste("importing data from file: ", file)) + year <- substr(file, 21, 24) + csv_run <- read_csv(file = paste0("data/TOPS/",file), col_types = cols(.default = "c")) + csv_run["retreive_date"] <- file.info(file = paste0("data/TOPS/",file))$mtime + TOPS_data[[file]] <- csv_run +} +rm(csv_run, file, year) +TOPS_data <- bind_rows(TOPS_data) +``` + +## Clean up data +```{r cleandata, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +TOPS_data <- TOPS_data %>% + mutate(date = ymd(CRSHDATE), + age1 = as.double(AGE1), + age2 = as.double(AGE2), + latitude = as.double(LATDECDG), + longitude = as.double(LONDECDG)) %>% + mutate(month = month(date, label = TRUE), + year = as.factor(year(date))) + +retrieve_date <- max(TOPS_data %>% filter(year %in% max(year(TOPS_data$date), na.rm = TRUE)) %>% pull(retreive_date)) +``` + +## Add injury severity index and assign bike/ped roles +```{r injuryseverity, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +# Injury Severity Index and Color ------------------------------------------- +# injury severity index +injury_severity <- data.frame(InjSevName = c("Injury severity unknown", "No apparent injury", "Possible Injury", "Suspected Minor Injury","Suspected Serious Injury","Fatality"), + code = c(NA, "O", "C", "B", "A", "K"), + color = c("grey", "#fafa6e", "#edc346", "#d88d2d", "#bd5721", "#9b1c1c")) + +#injury_severity_pal <- colorFactor(palette = injury_severity$color, levels = injury_severity$InjSevName) + +TOPS_data <- left_join(TOPS_data, injury_severity %>% select(InjSevName, code), join_by(INJSVR1 == code)) %>% + mutate(InjSevName = factor(InjSevName, levels = injury_severity$InjSevName)) %>% + rename(InjSevName1 = InjSevName) +TOPS_data <- left_join(TOPS_data, injury_severity %>% select(InjSevName, code), join_by(INJSVR2 == code)) %>% + mutate(InjSevName = factor(InjSevName, levels = injury_severity$InjSevName)) %>% + rename(InjSevName2 = InjSevName) +# add bike or pedestrian roles ---- + +bike_roles <- c("BIKE", "O BIKE") +ped_roles <- c("PED", "O PED", "PED NO") +vuln_roles <- c(bike_roles, ped_roles) + +TOPS_data <- TOPS_data %>% mutate(ped_inj = ifelse(ROLE1 %in% vuln_roles, + INJSVR1, + ifelse(ROLE2 %in% vuln_roles, + INJSVR2, + NA))) + +TOPS_data <- left_join(TOPS_data, injury_severity %>% select(InjSevName, code), join_by(ped_inj == code)) %>% + mutate(InjSevName = factor(InjSevName, levels = injury_severity$InjSevName)) %>% + rename(ped_inj_name = InjSevName) + +# bike or ped +TOPS_data <- TOPS_data %>% mutate(vulnerable_role = ifelse(ROLE1 %in% bike_roles | ROLE2 %in% bike_roles, + "Bicyclist", + ifelse(ROLE1 %in% ped_roles | ROLE2 %in% ped_roles, + "Pedestrian", + NA))) +``` + +## Save resulting data table as an Rda file for use in other documents +```{r savecleaneddata, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE} +save(TOPS_data, file = "data/TOPS/TOPS_data.Rda") +``` diff --git a/crash_data.Rproj b/wisconsin_crashes.Rproj similarity index 74% rename from crash_data.Rproj rename to wisconsin_crashes.Rproj index 8e3c2eb..5dea28e 100644 --- a/crash_data.Rproj +++ b/wisconsin_crashes.Rproj @@ -1,4 +1,5 @@ Version: 1.0 +ProjectId: 0da73295-ef24-454f-bd60-31bef147eca9 RestoreWorkspace: Default SaveWorkspace: Default @@ -11,3 +12,5 @@ Encoding: UTF-8 RnwWeave: Sweave LaTeX: pdfLaTeX + +BuildType: Makefile