2024-11-19 11:14:45 -06:00
library ( tidyverse )
library ( influxdbclient )
library ( glue )
library ( ggmap )
library ( sf )
# parameters needed to make connection to Database
token <- substr ( read_file ( file = ' api_keys/influxdb_madison-metro' ) , 1 , 88 )
org <- " e2581d54779b077f"
bucket <- " madison-metro"
days <- 1
influx_connection <- InfluxDBClient $ new ( url = " https://influxdb.dendroalsia.net" ,
token = token ,
org = org )
#---
# Fields you want to query
fields <- c ( " spd" , " pdist" , " pid" , " lon" , " lat" , " vid" , " dly" , " origtatripno" )
# Creating an empty list to store results for each field
results <- vector ( " list" , length ( fields ) )
# Loop through each field, get data, and coerce types if needed
for ( i in seq_along ( fields ) ) {
field <- fields [i ]
query_string <- glue ( ' from(bucket: "{bucket}") ' ,
' |> range(start: -{days}d) ' ,
' |> filter(fn: (r) => r["_measurement"] == "vehicle_data")' ,
' |> filter(fn: (r) => r["_field"] == "{field}")' )
data <- influx_connection $ query ( query_string )
# Ensure the columns are coerced to consistent types
# (Optionally add coercion based on your expected types)
data <- bind_rows ( data ) %>%
mutate ( value = as.character ( `_value` ) ,
field = `_field` ) %>%
select ( time , rt , des , value , field )
results [ [i ] ] <- data
}
# Bind all results together
metro_raw <- bind_rows ( results )
metro_raw <- pivot_wider ( metro_raw , values_from = value , names_from = field ) %>%
distinct ( pid , vid , lat , lon , spd , .keep_all = TRUE )
metro_data <- metro_raw %>%
mutate ( time = with_tz ( time , " America/Chicago" ) ,
spd = as.double ( spd ) ,
pdist = as.double ( pdist ) ,
lon = as.double ( lon ) ,
lat = as.double ( lat ) ) %>%
group_by ( origtatripno ) %>%
arrange ( time ) %>%
mutate ( lag_pdist = lag ( pdist ) ,
lag_time = lag ( time ) ) %>%
mutate ( lag_spd = ( pdist - lag_pdist ) / as.double ( difftime ( time , lag_time , units = " hours" ) ) / 5280 )
metro_summary <- metro_data %>%
2024-11-19 13:20:20 -06:00
mutate ( pdist_bucket = round ( pdist / 200 ) * 100 ) %>%
2024-11-19 11:14:45 -06:00
group_by ( pdist_bucket , rt , des , pid ) %>%
summarise ( lat = median ( lat , na.rm = TRUE ) ,
lon = median ( lon , na.rm = TRUE ) ,
spd = median ( spd , na.rm = TRUE ) ,
lag_spd = median ( lag_spd , na.rm = TRUE ) ,
trip_count = length ( unique ( origtatripno ) ) )
metro_data_sf <- st_as_sf ( metro_data %>% filter ( ! is.na ( lon ) ) , coords = c ( " lon" , " lat" ) , remove = FALSE )
metro_summary_sf <- st_as_sf ( metro_summary %>% filter ( ! is.na ( lon ) ) , coords = c ( " lon" , " lat" ) , remove = FALSE )
segments_sf <- metro_summary_sf %>%
group_by ( rt , pid , des ) %>%
arrange ( pid , pdist_bucket ) %>% # Ensure points within each route are sorted if needed
mutate (
lead_geom = lead ( geometry ) ,
lead_spd = lead ( spd )
) %>%
filter ( ! is.na ( lead_geom ) ) %>%
# Create a segment for each pair of points
rowwise ( ) %>%
mutate (
segment = st_cast ( st_union ( geometry , lead_geom ) , " LINESTRING" )
) %>%
ungroup ( ) %>%
as.data.frame ( ) %>%
select ( rt , pid , des , pdist_bucket , spd , segment , lag_spd ) %>%
st_as_sf ( )
# get counts of routes
route_counts <- metro_data %>% group_by ( pid , rt , des ) %>% summarise ( route_count = length ( unique ( origtatripno ) ) )
# make charts
ggplot ( data = metro_summary %>% filter ( pid %in% c ( " 421" , " 422" ) ) ,
aes ( x = pdist_bucket ,
y = lag_spd ) ) +
geom_point ( ) +
geom_smooth ( ) +
facet_grid ( paste0 ( rt , " -" , des ) ~ .)
register_stadiamaps ( key = substr ( read_file ( file = " api_keys/stadia_api_key" ) , 1 , 36 ) )
bbox <- c ( left = min ( metro_data $ lon ) ,
bottom = min ( metro_data $ lat ) ,
right = max ( metro_data $ lon ) ,
top = max ( metro_data $ lat ) )
#get basemap
basemap <- get_stadiamap ( bbox = bbox , zoom = 13 , maptype = " stamen_toner_lite" )
2024-11-19 13:20:20 -06:00
# A West
routes_categorized <- read_csv ( file = " routes_categorized.csv" , col_types = " cc" )
2024-11-19 11:14:45 -06:00
quantile ( segments_sf %>% filter ( pid %in% c ( " 469" ) ) %>% pull ( lag_spd ) , c ( 0 , 0.25 , 0.5 , 0.75 , 1 ) )
2024-11-19 13:20:20 -06:00
for ( route in unique ( routes_categorized $ name ) ) {
route_focus <- routes_categorized %>% filter ( name == route ) %>% pull ( pid )
ggmap ( basemap ) +
labs ( title = paste0 ( " Metro Route Speed - " , route ) ,
subtitle = paste0 ( " averaged between " ,
sum ( route_counts %>% filter ( pid %in% route_focus ) %>% pull ( route_count ) ) ,
" bus trips - " ,
min ( date ( metro_data $ time ) ) ,
" to " ,
max ( date ( metro_data $ time ) ) ) ,
x = NULL ,
y = NULL ) +
theme ( axis.text = element_blank ( ) ,
axis.ticks = element_blank ( ) ,
plot.caption = element_text ( color = " grey" ) ) +
geom_sf ( data = segments_sf %>% filter ( pid %in% route_focus ) ,
inherit.aes = FALSE ,
aes ( color = lag_spd ) ,
linewidth = 1 ) +
scale_color_distiller ( palette = " RdYlGn" , direction = " reverse" , limits = c ( 0 , 70 ) , name = " Average speed or segment\n(calculated with locations, not reported speed)" )
ggsave ( file = paste0 ( " figures/" ,
route ,
" .pdf" ) ,
title = paste0 ( " Metro Route Speed - " , route ) ,
device = pdf ,
height = 8.5 ,
width = 11 ,
units = " in" ,
create.dir = TRUE )
}