madison-metro/madison-metro.R

200 lines
7.4 KiB
R
Raw Normal View History

2024-11-19 11:14:45 -06:00
library(tidyverse)
library(influxdbclient)
library(glue)
library(ggmap)
library(sf)
# parameters needed to make connection to Database
2024-11-22 12:22:43 -06:00
token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88)
2024-11-20 21:39:17 -06:00
org <- "32b7fde0efd8a3b3"
bucket <- "metro_vehicles"
2024-11-19 11:14:45 -06:00
2024-11-22 12:22:43 -06:00
days <- 7
2024-11-19 11:14:45 -06:00
influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net",
token = token,
org = org)
#---
2024-11-20 16:20:48 -06:00
# Fields to query
2024-11-20 09:46:23 -06:00
fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno")
2024-11-19 11:14:45 -06:00
2024-11-20 16:20:48 -06:00
# An empty list to store results for each field
2024-11-19 11:14:45 -06:00
results <- vector("list", length(fields))
# Loop through each field, get data, and coerce types if needed
for (i in seq_along(fields)) {
field <- fields[i]
query_string <- glue('from(bucket: "{bucket}") ',
'|> range(start: -{days}d) ',
'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
'|> filter(fn: (r) => r["_field"] == "{field}")')
data <- influx_connection$query(query_string)
# Ensure the columns are coerced to consistent types
# (Optionally add coercion based on your expected types)
data <- bind_rows(data) %>%
mutate(value = as.character(`_value`),
field = `_field`) %>%
2024-11-20 09:46:23 -06:00
select(time, rt, pid, vid, value, field)
2024-11-19 11:14:45 -06:00
results[[i]] <- data
}
# Bind all results together
metro_raw <- bind_rows(results)
metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>%
distinct(pid, vid, lat, lon, spd, .keep_all = TRUE)
2024-11-20 16:20:48 -06:00
routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc")
2024-11-19 11:14:45 -06:00
metro_data <- metro_raw %>%
mutate(time = with_tz(time, "America/Chicago"),
spd = as.double(spd),
pdist = as.double(pdist),
lon = as.double(lon),
lat = as.double(lat)) %>%
2024-11-20 16:20:48 -06:00
mutate(date = date(time)) %>%
2024-11-20 09:46:23 -06:00
group_by(pid, vid) %>%
2024-11-19 11:14:45 -06:00
arrange(time) %>%
2024-11-20 16:20:48 -06:00
mutate(pdist_lag = lag(pdist),
time_lag = lag(time)) %>%
2024-11-21 17:24:51 -06:00
ungroup() %>%
2024-11-20 16:20:48 -06:00
mutate(spd_calc = case_when(pdist_lag > pdist ~ NA,
pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>%
left_join(routes_categorized, by = "pid")
2024-11-19 11:14:45 -06:00
2024-11-20 16:20:48 -06:00
bucket_feet <- 500
2024-11-20 09:46:23 -06:00
lat_round <- bucket_feet/364481.35
lon_round <- bucket_feet/267203.05
2024-11-19 11:14:45 -06:00
metro_summary <- metro_data %>%
2024-11-20 09:46:23 -06:00
mutate(lat_bucket = round(lat / lat_round) * lat_round,
lon_bucket = round(lon / lon_round) * lon_round) %>%
2024-11-20 16:20:48 -06:00
group_by(rt, name, pid, lat_bucket, lon_bucket) %>%
summarise(spd = median(spd, na.rm = TRUE),
2024-11-20 09:46:23 -06:00
spd_calc = median(spd_calc, na.rm = TRUE),
pdist = median(pdist),
2024-11-19 11:14:45 -06:00
trip_count = length(unique(origtatripno)))
2024-11-21 17:24:51 -06:00
metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon), !is.na(lat)), coords = c("lon", "lat"), remove = FALSE)
metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket), !is.na(lat_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE)
2024-11-19 11:14:45 -06:00
2024-11-20 16:20:48 -06:00
metro_segments <- metro_summary %>%
2024-11-20 09:46:23 -06:00
group_by(rt, pid) %>%
2024-11-20 16:20:48 -06:00
arrange(pdist) %>%
mutate(lat_bucket_lag = lag(lat_bucket),
lon_bucket_lag = lag(lon_bucket)) %>%
filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>%
2024-11-19 11:14:45 -06:00
mutate(
2024-11-20 16:20:48 -06:00
geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag),
~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE)))
2024-11-19 11:14:45 -06:00
) %>%
2024-11-20 16:20:48 -06:00
st_as_sf(sf_column_name = "geometry") %>%
group_by(rt, name, lat_bucket, lon_bucket) %>%
summarise(spd_calc = weighted.mean(spd_calc, trip_count))
2024-11-19 11:14:45 -06:00
# get counts of routes
route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno)))
# make charts
2024-11-20 16:20:48 -06:00
ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))),
2024-11-20 09:46:23 -06:00
aes(x = pdist,
y = spd_calc)) +
2024-11-19 11:14:45 -06:00
geom_point() +
geom_smooth() +
2024-11-20 16:20:48 -06:00
facet_grid(name ~ .)
ggplot(data = metro_summary %>% filter(!is.na(name)),
aes(x = name,
y = spd_calc)) +
geom_boxplot()
2024-11-19 11:14:45 -06:00
register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36))
2024-11-21 17:24:51 -06:00
bbox <- c(left = min(metro_data$lon, na.rm = TRUE),
bottom = min(metro_data$lat, na.rm = TRUE),
right = max(metro_data$lon, na.rm = TRUE),
top = max(metro_data$lat, na.rm = TRUE))
2024-11-19 11:14:45 -06:00
#get basemap
basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite")
2024-11-20 16:20:48 -06:00
quantile(metro_segments %>% filter(name %in% c("A_West")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE)
2024-11-19 11:14:45 -06:00
2024-11-19 13:20:20 -06:00
for (route in unique(routes_categorized$name)){
route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid)
ggmap(basemap) +
labs(title = paste0("Metro Route Speed - ", route),
subtitle = paste0("averaged between ",
sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_tile(data = metro_summary %>%
filter(name %in% route) %>%
group_by(lon_bucket, lat_bucket) %>%
summarise(spd_calc = weighted.mean(spd_calc, trip_count)),
2024-11-19 13:20:20 -06:00
inherit.aes = FALSE,
aes(x = lon_bucket,
y = lat_bucket,
fill = spd_calc,
height = lat_round,
width = lon_round)) +
scale_fill_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)")
2024-11-19 13:20:20 -06:00
ggsave(file = paste0("figures/",
route,
2024-11-20 16:20:48 -06:00
"_map.pdf"),
title = paste0("Metro Route Speed - ", route),
device = pdf,
height = 8.5,
width = 11,
units = "in",
create.dir = TRUE)
ggplot(data = metro_data %>% filter(name %in% route)) +
geom_boxplot(aes(x = date,
2024-11-21 17:24:51 -06:00
y = spd_calc,
group = date))
2024-11-20 16:20:48 -06:00
ggsave(file = paste0("figures/",
route,
"_date.pdf"),
2024-11-19 13:20:20 -06:00
title = paste0("Metro Route Speed - ", route),
device = pdf,
height = 8.5,
width = 11,
units = "in",
create.dir = TRUE)
}
# ggmap(basemap) +
# labs(title = paste0("Metro Route Speed - ", route),
# subtitle = paste0("averaged between ",
# sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)),
# " bus trips - ",
# min(date(metro_data$time)),
# " to ",
# max(date(metro_data$time))),
# x = NULL,
# y = NULL) +
# theme(axis.text=element_blank(),
# axis.ticks=element_blank(),
# plot.caption = element_text(color = "grey")) +
# geom_sf(data = metro_segments %>% filter(name %in% route),
# inherit.aes = FALSE,
# aes(color = spd_calc),
# linewidth = 1) +
# scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)")