2024-11-19 11:14:45 -06:00
library ( tidyverse )
library ( influxdbclient )
library ( glue )
library ( ggmap )
library ( sf )
# parameters needed to make connection to Database
2024-11-22 12:22:43 -06:00
token <- substr ( read_file ( file = ' api_keys/influxdb_madison-metro' ) , 1 , 88 )
2024-11-20 21:39:17 -06:00
org <- " 32b7fde0efd8a3b3"
bucket <- " metro_vehicles"
2024-11-19 11:14:45 -06:00
2024-11-23 21:39:14 -06:00
days <- 3
2024-11-19 11:14:45 -06:00
influx_connection <- InfluxDBClient $ new ( url = " https://influxdb.dendroalsia.net" ,
token = token ,
org = org )
2024-11-23 21:39:14 -06:00
#--- Query influxdb for data
time_start <- Sys.time ( )
2024-11-19 11:14:45 -06:00
2024-11-23 23:14:49 -06:00
query_string <- glue ( ' from(bucket: "{bucket}") ' ,
' |> range(start: -{days}d) ' ,
' |> filter(fn: (r) => r["_measurement"] == "vehicle_data")' ,
' |> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")' )
results <- influx_connection $ query ( query_string )
rm ( influx_connection , query_string , bucket , org , token )
metro_raw <- results %>%
bind_rows ( ) %>%
distinct ( `_time` , pid , vid , lat , lon , spd , .keep_all = TRUE )
nrow ( metro_raw )
2024-11-23 21:39:14 -06:00
time_end <- Sys.time ( )
2024-11-23 23:14:49 -06:00
difftime ( time_end , time_start )
rm ( time_end , time_start , results )
2024-11-19 11:14:45 -06:00
2024-11-23 23:14:49 -06:00
#-----
2024-11-20 16:20:48 -06:00
routes_categorized <- read_csv ( file = " routes_categorized.csv" , col_types = " cc" )
2024-11-19 11:14:45 -06:00
metro_data <- metro_raw %>%
mutate ( time = with_tz ( time , " America/Chicago" ) ,
spd = as.double ( spd ) ,
pdist = as.double ( pdist ) ,
lon = as.double ( lon ) ,
lat = as.double ( lat ) ) %>%
2024-11-20 16:20:48 -06:00
mutate ( date = date ( time ) ) %>%
2024-11-20 09:46:23 -06:00
group_by ( pid , vid ) %>%
2024-11-19 11:14:45 -06:00
arrange ( time ) %>%
2024-11-20 16:20:48 -06:00
mutate ( pdist_lag = lag ( pdist ) ,
time_lag = lag ( time ) ) %>%
2024-11-21 17:24:51 -06:00
ungroup ( ) %>%
2024-11-20 16:20:48 -06:00
mutate ( spd_calc = case_when ( pdist_lag > pdist ~ NA ,
pdist_lag <= pdist ~ ( pdist - pdist_lag ) / as.double ( difftime ( time , time_lag , units = " hours" ) ) / 5280 ) ) %>%
left_join ( routes_categorized , by = " pid" )
2024-11-19 11:14:45 -06:00
2024-11-19 13:37:11 -06:00
2024-11-20 16:20:48 -06:00
bucket_feet <- 500
2024-11-20 09:46:23 -06:00
lat_round <- bucket_feet / 364481.35
lon_round <- bucket_feet / 267203.05
2024-11-19 13:37:11 -06:00
2024-11-19 11:14:45 -06:00
metro_summary <- metro_data %>%
2024-11-20 09:46:23 -06:00
mutate ( lat_bucket = round ( lat / lat_round ) * lat_round ,
lon_bucket = round ( lon / lon_round ) * lon_round ) %>%
2024-11-20 16:20:48 -06:00
group_by ( rt , name , pid , lat_bucket , lon_bucket ) %>%
summarise ( spd = median ( spd , na.rm = TRUE ) ,
2024-11-20 09:46:23 -06:00
spd_calc = median ( spd_calc , na.rm = TRUE ) ,
pdist = median ( pdist ) ,
2024-11-19 11:14:45 -06:00
trip_count = length ( unique ( origtatripno ) ) )
2024-11-21 17:24:51 -06:00
metro_data_sf <- st_as_sf ( metro_data %>% filter ( ! is.na ( lon ) , ! is.na ( lat ) ) , coords = c ( " lon" , " lat" ) , remove = FALSE )
metro_summary_sf <- st_as_sf ( metro_summary %>% filter ( ! is.na ( lon_bucket ) , ! is.na ( lat_bucket ) ) , coords = c ( " lon_bucket" , " lat_bucket" ) , remove = FALSE )
2024-11-19 11:14:45 -06:00
2024-11-20 16:20:48 -06:00
metro_segments <- metro_summary %>%
2024-11-20 09:46:23 -06:00
group_by ( rt , pid ) %>%
2024-11-20 16:20:48 -06:00
arrange ( pdist ) %>%
mutate ( lat_bucket_lag = lag ( lat_bucket ) ,
lon_bucket_lag = lag ( lon_bucket ) ) %>%
filter ( ! is.na ( lat_bucket ) & ! is.na ( lon_bucket ) & ! is.na ( lat_bucket_lag ) & ! is.na ( lon_bucket_lag ) ) %>%
2024-11-19 11:14:45 -06:00
mutate (
2024-11-20 16:20:48 -06:00
geometry = pmap ( list ( lat_bucket , lon_bucket , lat_bucket_lag , lon_bucket_lag ) ,
~ st_linestring ( matrix ( c ( ..2 , ..1 , ..4 , ..3 ) , ncol = 2 , byrow = TRUE ) ) )
2024-11-19 11:14:45 -06:00
) %>%
2024-11-20 16:20:48 -06:00
st_as_sf ( sf_column_name = " geometry" ) %>%
group_by ( rt , name , lat_bucket , lon_bucket ) %>%
summarise ( spd_calc = weighted.mean ( spd_calc , trip_count ) )
2024-11-19 11:14:45 -06:00
# get counts of routes
route_counts <- metro_data %>% group_by ( pid , rt , des ) %>% summarise ( route_count = length ( unique ( origtatripno ) ) )
# make charts
2024-11-23 23:14:49 -06:00
ggplot ( data = metro_data %>% filter ( pid %in% ( routes_categorized %>% filter ( name %in% c ( " B_North" , " B_South" ) ) %>% pull ( pid ) ) ) ,
2024-11-20 09:46:23 -06:00
aes ( x = pdist ,
2024-11-23 23:14:49 -06:00
y = spd ) ) +
geom_point ( size = 0.1 ) +
2024-11-19 11:14:45 -06:00
geom_smooth ( ) +
2024-11-20 16:20:48 -06:00
facet_grid ( name ~ .)
2024-11-23 23:14:49 -06:00
ggplot ( data = metro_data %>% filter ( ! is.na ( name ) ) ,
2024-11-20 16:20:48 -06:00
aes ( x = name ,
y = spd_calc ) ) +
2024-11-23 23:14:49 -06:00
geom_boxplot ( outlier.shape = NA )
2024-11-19 11:14:45 -06:00
register_stadiamaps ( key = substr ( read_file ( file = " api_keys/stadia_api_key" ) , 1 , 36 ) )
2024-11-21 17:24:51 -06:00
bbox <- c ( left = min ( metro_data $ lon , na.rm = TRUE ) ,
bottom = min ( metro_data $ lat , na.rm = TRUE ) ,
right = max ( metro_data $ lon , na.rm = TRUE ) ,
top = max ( metro_data $ lat , na.rm = TRUE ) )
2024-11-19 11:14:45 -06:00
#get basemap
basemap <- get_stadiamap ( bbox = bbox , zoom = 13 , maptype = " stamen_toner_lite" )
2024-11-20 16:20:48 -06:00
quantile ( metro_segments %>% filter ( name %in% c ( " A_West" ) ) %>% pull ( spd_calc ) , c ( 0 , 0.25 , 0.5 , 0.75 , 1 ) , na.rm = TRUE )
2024-11-19 11:14:45 -06:00
2024-11-19 13:20:20 -06:00
for ( route in unique ( routes_categorized $ name ) ) {
route_focus <- routes_categorized %>% filter ( name == route ) %>% pull ( pid )
ggmap ( basemap ) +
labs ( title = paste0 ( " Metro Route Speed - " , route ) ,
subtitle = paste0 ( " averaged between " ,
sum ( route_counts %>% filter ( pid %in% route_focus ) %>% pull ( route_count ) ) ,
" bus trips - " ,
min ( date ( metro_data $ time ) ) ,
" to " ,
max ( date ( metro_data $ time ) ) ) ,
x = NULL ,
y = NULL ) +
theme ( axis.text = element_blank ( ) ,
axis.ticks = element_blank ( ) ,
plot.caption = element_text ( color = " grey" ) ) +
2024-11-21 17:33:44 -06:00
geom_tile ( data = metro_summary %>%
filter ( name %in% route ) %>%
group_by ( lon_bucket , lat_bucket ) %>%
summarise ( spd_calc = weighted.mean ( spd_calc , trip_count ) ) ,
2024-11-19 13:20:20 -06:00
inherit.aes = FALSE ,
2024-11-21 17:33:44 -06:00
aes ( x = lon_bucket ,
y = lat_bucket ,
fill = spd_calc ,
height = lat_round ,
width = lon_round ) ) +
scale_fill_distiller ( palette = " RdYlGn" , direction = " reverse" , limits = c ( 0 , 70 ) , name = " Average speed or segment\n(calculated with locations, not reported speed)" )
2024-11-19 13:20:20 -06:00
ggsave ( file = paste0 ( " figures/" ,
route ,
2024-11-20 16:20:48 -06:00
" _map.pdf" ) ,
title = paste0 ( " Metro Route Speed - " , route ) ,
device = pdf ,
height = 8.5 ,
width = 11 ,
units = " in" ,
create.dir = TRUE )
ggplot ( data = metro_data %>% filter ( name %in% route ) ) +
geom_boxplot ( aes ( x = date ,
2024-11-21 17:24:51 -06:00
y = spd_calc ,
group = date ) )
2024-11-20 16:20:48 -06:00
ggsave ( file = paste0 ( " figures/" ,
route ,
" _date.pdf" ) ,
2024-11-19 13:20:20 -06:00
title = paste0 ( " Metro Route Speed - " , route ) ,
device = pdf ,
height = 8.5 ,
width = 11 ,
units = " in" ,
create.dir = TRUE )
2024-11-23 23:14:49 -06:00
}