library(tidyverse) ## Load TOPS data ---- ## To load TOPS data for the whole state for crashes involving bikes and pedestrians): ## Step 1 - download csv from the TOPS Data Retrieval Tool with the query: SELECT * FROM DTCRPRD.SUMMARY_COMBINED C WHERE C.CRSHDATE BETWEEN TO_DATE('2023-JAN','YYYY-MM') AND LAST_DAY(TO_DATE('2023-DEC','YYYY-MM')) AND (C.BIKEFLAG = 'Y' OR C.PEDFLAG = 'Y') ORDER BY C.DOCTNMBR ## Step 2 - include RACE1 and RACE2 for download in preferences ## Step 3 - save the csv in the "data" directory as crash-data-download_2023.csv TOPS_data <- as.list(NULL) for (file in list.files(path = "data/TOPS/", pattern = "crash-data-download")) { message(paste("importing data from file: ", file)) year <- substr(file, 21, 24) csv_run <- read_csv(file = paste0("data/TOPS/",file), col_types = cols(.default = "c")) TOPS_data[[file]] <- csv_run } rm(csv_run, file, year) TOPS_data <- bind_rows(TOPS_data) ## clean up data ---- TOPS_data <- TOPS_data %>% mutate(date = mdy(CRSHDATE), age1 = as.double(AGE1), age2 = as.double(AGE2), latitude = as.double(LATDECDG), longitude = as.double(LONDECDG)) %>% mutate(month = month(date, label = TRUE), year = as.factor(year(date))) # Injury Severy Index and Color ----- injury_severity <- data.frame(InjSevName = c("No apparent injury", "Possible Injury", "Suspected Minor Injury","Suspected Serious Injury","Fatality"), code = c("O", "C", "B", "A", "K"), color = c("#fafa6e", "#edc346", "#d88d2d", "#bd5721", "#9b1c1c")) TOPS_data <- left_join(TOPS_data, injury_severity %>% select(InjSevName, code), join_by(INJSVR1 == code)) %>% mutate(InjSevName = factor(InjSevName, levels = injury_severity$InjSevName)) %>% rename(InjSevName1 = InjSevName) TOPS_data <- left_join(TOPS_data, injury_severity %>% select(InjSevName, code), join_by(INJSVR2 == code)) %>% mutate(InjSevName = factor(InjSevName, levels = injury_severity$InjSevName)) %>% rename(InjSevName2 = InjSevName) # Race names race <- data.frame(race_name = c("Asian", "Black", "Indian","Hispanic","White"), code = c("A", "B", "I", "H", "W")) TOPS_data <- left_join(TOPS_data, race %>% select(race_name, code), join_by(RACE1 == code)) %>% rename(race_name1 = race_name) TOPS_data <- left_join(TOPS_data, race %>% select(race_name, code), join_by(RACE2 == code)) %>% rename(race_name2 = race_name) ## set parameters ---- county_focus <- c("MILWAUKEE") municipality_focus <- c("MILWAUKEE") ## build data summaries for city ---- data_summary <- list(NULL) # crashes by year that resulted in a pedestrian fatality or severe injury data_summary[["crash_by_year"]] <- TOPS_data %>% filter(MUNINAME %in% municipality_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA))) %>% group_by(MUNINAME, year, ped_type, ped_inj) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by race of pedestrian/bicyclist for focus year data_summary[["crash_by_race"]] <- TOPS_data %>% filter(MUNINAME %in% municipality_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), ped_race = ifelse(ROLE1 %in% c("BIKE", "PED"), race_name1, ifelse(ROLE2 %in% c("BIKE", "PED"), race_name2, NA))) %>% group_by(MUNINAME, ped_type, ped_inj, ped_race) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by race of driver that resulted in a pedestrian fatality or severe injury data_summary[["crash_by_driver_race"]] <- TOPS_data %>% filter(MUNINAME %in% municipality_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), driver_race = ifelse(ROLE1 %in% c("DR"), race_name1, ifelse(ROLE2 %in% c("DR"), race_name2, NA))) %>% group_by(MUNINAME, year, ped_type, ped_inj, driver_race) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by age of pedestrian/bicyclist data_summary[["crash_by_age"]] <- TOPS_data %>% filter(MUNINAME %in% municipality_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), ped_age = ifelse(ROLE1 %in% c("BIKE", "PED"), age1, ifelse(ROLE2 %in% c("BIKE", "PED"), age2, NA))) %>% group_by(MUNINAME, year, ped_type, ped_inj, ped_age) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by age of driver that resulted in a severe injury or fatality of a pedestrian/bicyclist data_summary[["crash_by_driver_age"]] <- TOPS_data %>% filter(MUNINAME %in% municipality_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), driver_age = ifelse(ROLE1 %in% c("DR"), age1, ifelse(ROLE2 %in% c("BIKE", "PED"), age2, NA))) %>% group_by(MUNINAME, year, ped_type, ped_inj, driver_age) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by sex of pedestrian/bicyclist data_summary[["crash_by_sex"]] <- TOPS_data %>% filter(MUNINAME %in% municipality_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), ped_sex = ifelse(ROLE1 %in% c("BIKE", "PED"), SEX1, ifelse(ROLE2 %in% c("BIKE", "PED"), SEX1, NA))) %>% group_by(MUNINAME, year, ped_type, ped_inj, ped_sex) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by sex of driver that resulted in a severe injury or fatality of a pedestrian/bicyclist data_summary[["crash_by_driver_sex"]] <- TOPS_data %>% filter(MUNINAME %in% municipality_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), driver_sex = ifelse(ROLE1 %in% c("DR"), SEX1, ifelse(ROLE2 %in% c("BIKE", "PED"), SEX1, NA))) %>% group_by(MUNINAME, year, ped_type, ped_inj, driver_sex) %>% summarise(count = n_distinct(DOCTNMBR)) ## export csv files for city ---- for(table_name in as.vector(names(data_summary[-1]))) { write_csv(data_summary[[table_name]], file = paste0("data_summaries/city/",table_name, ".csv")) } ## build data summaries for county ---- data_summary <- list(NULL) # crashes by year that resulted in a pedestrian fatality or severe injury data_summary[["crash_by_year"]] <- TOPS_data %>% filter(CNTYNAME %in% county_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA))) %>% group_by(CNTYNAME, year, ped_type, ped_inj) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by race of pedestrian/bicyclist for focus year data_summary[["crash_by_race"]] <- TOPS_data %>% filter(CNTYNAME %in% county_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), ped_race = ifelse(ROLE1 %in% c("BIKE", "PED"), race_name1, ifelse(ROLE2 %in% c("BIKE", "PED"), race_name2, NA))) %>% group_by(CNTYNAME, ped_type, ped_inj, ped_race) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by race of driver that resulted in a pedestrian fatality or severe injury data_summary[["crash_by_driver_race"]] <- TOPS_data %>% filter(CNTYNAME %in% county_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), driver_race = ifelse(ROLE1 %in% c("DR"), race_name1, ifelse(ROLE2 %in% c("DR"), race_name2, NA))) %>% group_by(CNTYNAME, year, ped_type, ped_inj, driver_race) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by age of pedestrian/bicyclist data_summary[["crash_by_age"]] <- TOPS_data %>% filter(CNTYNAME %in% county_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), ped_age = ifelse(ROLE1 %in% c("BIKE", "PED"), age1, ifelse(ROLE2 %in% c("BIKE", "PED"), age2, NA))) %>% group_by(CNTYNAME, year, ped_type, ped_inj, ped_age) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by age of driver that resulted in a severe injury or fatality of a pedestrian/bicyclist data_summary[["crash_by_driver_age"]] <- TOPS_data %>% filter(CNTYNAME %in% county_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), driver_age = ifelse(ROLE1 %in% c("DR"), age1, ifelse(ROLE2 %in% c("BIKE", "PED"), age2, NA))) %>% group_by(CNTYNAME, year, ped_type, ped_inj, driver_age) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by sex of pedestrian/bicyclist data_summary[["crash_by_sex"]] <- TOPS_data %>% filter(CNTYNAME %in% county_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), ped_sex = ifelse(ROLE1 %in% c("BIKE", "PED"), SEX1, ifelse(ROLE2 %in% c("BIKE", "PED"), SEX1, NA))) %>% group_by(CNTYNAME, year, ped_type, ped_inj, ped_sex) %>% summarise(count = n_distinct(DOCTNMBR)) # crashes by sex of driver that resulted in a severe injury or fatality of a pedestrian/bicyclist data_summary[["crash_by_driver_sex"]] <- TOPS_data %>% filter(CNTYNAME %in% county_focus) %>% filter(ROLE1 %in% c("BIKE", "PED") & INJSVR1 %in% c("A", "K") | ROLE2 %in% c("BIKE", "PED") & INJSVR2 %in% c("A", "K") ) %>% mutate(ped_type = ifelse(ROLE1 %in% c("BIKE", "PED"), ROLE1, ifelse(ROLE2 %in% c("BIKE", "PED"), ROLE2, NA)), ped_inj = ifelse(ROLE1 %in% c("BIKE", "PED"), as.character(InjSevName1), ifelse(ROLE2 %in% c("BIKE", "PED"), as.character(InjSevName2), NA)), driver_sex = ifelse(ROLE1 %in% c("DR"), SEX1, ifelse(ROLE2 %in% c("BIKE", "PED"), SEX1, NA))) %>% group_by(CNTYNAME, year, ped_type, ped_inj, driver_sex) %>% summarise(count = n_distinct(DOCTNMBR)) ## export csv files for county ---- for(table_name in as.vector(names(data_summary[-1]))) { write_csv(data_summary[[table_name]], file = paste0("data_summaries/county/",table_name, ".csv")) }