diff --git a/.Rhistory b/.Rhistory index 16d0144..a9ea7e0 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,338 +1,35 @@ -mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) -routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") -#--- -# Fields you want to query -fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno", "tmstmp") -# Creating an empty list to store results for each field -results <- vector("list", length(fields)) -# Loop through each field, get data, and coerce types if needed -for (i in seq_along(fields)) { -field <- fields[i] -query_string <- glue('from(bucket: "{bucket}") ', -'|> range(start: -{days}d) ', -'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', -'|> filter(fn: (r) => r["_field"] == "{field}")') -data <- influx_connection$query(query_string) -# Ensure the columns are coerced to consistent types -# (Optionally add coercion based on your expected types) -data <- bind_rows(data) %>% -mutate(value = as.character(`_value`), -field = `_field`) %>% -select(time, rt, pid, vid, value, field) -results[[i]] <- data -} -query_string <- glue('from(bucket: "{bucket}") ', -'|> range(start: -{days}d) ', -'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', -'|> filter(fn: (r) => r["_field"] == "{field}")') -data <- influx_connection$query(query_string) -field <- spd -field <- "spd" -query_string <- glue('from(bucket: "{bucket}") ', -'|> range(start: -{days}d) ', -'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', -'|> filter(fn: (r) => r["_field"] == "{field}")') -data <- influx_connection$query(query_string) -View(data) -#--- -# Fields you want to query -fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno") -# Creating an empty list to store results for each field -results <- vector("list", length(fields)) -# Loop through each field, get data, and coerce types if needed -for (i in seq_along(fields)) { -field <- fields[i] -query_string <- glue('from(bucket: "{bucket}") ', -'|> range(start: -{days}d) ', -'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', -'|> filter(fn: (r) => r["_field"] == "{field}")') -data <- influx_connection$query(query_string) -# Ensure the columns are coerced to consistent types -# (Optionally add coercion based on your expected types) -data <- bind_rows(data) %>% -mutate(value = as.character(`_value`), -field = `_field`) %>% -select(time, rt, pid, vid, value, field) -results[[i]] <- data -} -# Bind all results together -metro_raw <- bind_rows(results) -metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% -distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) -metro_data <- metro_raw %>% -mutate(time = with_tz(time, "America/Chicago"), -spd = as.double(spd), -pdist = as.double(pdist), -lon = as.double(lon), -lat = as.double(lat)) %>% -group_by(origtatripno) %>% -arrange(time) %>% -mutate(lag_pdist = lag(pdist), -lag_time = lag(time)) %>% -mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) -1/364481.35 -bucket_feet <- 200 -bucket_lat <- bucket_feet/364481.35 -bucket_lon <- bucket_feet/267203.05 -bucket_feet <- 200 +~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) +) %>% +st_as_sf(sf_column_name = "geometry") +bucket_feet <- 500 lat_round <- bucket_feet/364481.35 lon_round <- bucket_feet/267203.05 metro_summary <- metro_data %>% left_join(routes_categorized, by = "pid") %>% mutate(lat_bucket = round(lat / lat_round) * lat_round, lon_bucket = round(lon / lon_round) * lon_round) %>% -group_by(pdist_bucket, rt, des, pid) %>% -summarise(lat = median(lat, na.rm = TRUE), -lon = median(lon, na.rm = TRUE), -spd = median(spd, na.rm = TRUE), -lag_spd = median(lag_spd, na.rm = TRUE), -trip_count = length(unique(origtatripno))) -metro_summary <- metro_data %>% -left_join(routes_categorized, by = "pid") %>% -mutate(lat_bucket = round(lat / lat_round) * lat_round, -lon_bucket = round(lon / lon_round) * lon_round) %>% -group_by(lat_bucket, lon_bucket, rt, des, pid) %>% -summarise(lat = median(lat, na.rm = TRUE), -lon = median(lon, na.rm = TRUE), -spd = median(spd, na.rm = TRUE), -lag_spd = median(lag_spd, na.rm = TRUE), -trip_count = length(unique(origtatripno))) -View(metro_summary) -metro_summary <- metro_data %>% -left_join(routes_categorized, by = "pid") %>% -mutate(lat_bucket = round(lat / lat_round) * lat_round, -lon_bucket = round(lon / lon_round) * lon_round) %>% -group_by(lat_bucket, lon_bucket, rt, des, pid) %>% -summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), -lon_bucket = median(lon_bucket, na.rm = TRUE), -spd = median(spd, na.rm = TRUE), -lag_spd = median(lag_spd, na.rm = TRUE), -trip_count = length(unique(origtatripno))) -metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) -metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) -metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) -segments_sf <- metro_summary_sf %>% -group_by(rt, pid, des) %>% -arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed -mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) -) %>% -filter(!is.na(lead_geom)) %>% -# Create a segment for each pair of points -rowwise() %>% -mutate( -segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") -) %>% -ungroup() %>% -as.data.frame() %>% -select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% -st_as_sf() -segments_sf <- metro_summary_sf %>% -group_by(rt, pid, des) %>% -arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed -mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) -) %>% -filter(!is.na(lead_geom)) %>% -# Create a segment for each pair of points -rowwise() %>% -mutate( -segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") -) %>% -ungroup() %>% -as.data.frame() %>% -select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% -st_as_sf() -segments_sf <- metro_summary_sf %>% -group_by(rt, pid, des) %>% -arrange(vid, time) %>% # Ensure points within each route are sorted if needed -mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) -) %>% -filter(!is.na(lead_geom)) %>% -# Create a segment for each pair of points -rowwise() %>% -mutate( -segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") -) %>% -ungroup() %>% -as.data.frame() %>% -select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% -st_as_sf() -segments_sf <- metro_summary_sf %>% -group_by(rt, pid, des, vid) %>% -arrange(vid, time) %>% # Ensure points within each route are sorted if needed -mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) -) %>% -filter(!is.na(lead_geom)) %>% -# Create a segment for each pair of points -rowwise() %>% -mutate( -segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") -) %>% -ungroup() %>% -as.data.frame() %>% -select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% -st_as_sf() -segments_sf <- metro_summary_sf %>% -group_by(rt, pid, des, vid) %>% -arrange(pid, time) %>% # Ensure points within each route are sorted if needed -mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) -) %>% -filter(!is.na(lead_geom)) %>% -# Create a segment for each pair of points -rowwise() %>% -mutate( -segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") -) %>% -ungroup() %>% -as.data.frame() %>% -select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% -st_as_sf() -segments_sf <- metro_summary_sf %>% -group_by(rt, pid, des) %>% -arrange(pid, time) %>% # Ensure points within each route are sorted if needed -mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) -) %>% -filter(!is.na(lead_geom)) %>% -# Create a segment for each pair of points -rowwise() %>% -mutate( -segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") -) %>% -ungroup() %>% -as.data.frame() %>% -select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% -st_as_sf() -segments_sf <- metro_summary_sf %>% -group_by(rt, pid, des) %>% -arrange(pid, time) %>% # Ensure points within each route are sorted if needed -mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) -) -metro_summary <- metro_data %>% -left_join(routes_categorized, by = "pid") %>% -mutate(lat_bucket = round(lat / lat_round) * lat_round, -lon_bucket = round(lon / lon_round) * lon_round) %>% -group_by(lat_bucket, lon_bucket, rt, des, pid) %>% -summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), -lon_bucket = median(lon_bucket, na.rm = TRUE), -spd = median(spd, na.rm = TRUE), -lag_spd = median(lag_spd, na.rm = TRUE), -trip_count = length(unique(origtatripno))) -metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) -metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) -segments_sf <- metro_summary_sf %>% -group_by(rt, pid, des) %>% -arrange(pid, time) %>% # Ensure points within each route are sorted if needed -mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) -) -View(metro_data_sf) -# get counts of routes -route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) -metro_summary <- metro_data %>% -left_join(routes_categorized, by = "pid") %>% -mutate(lat_bucket = round(lat / lat_round) * lat_round, -lon_bucket = round(lon / lon_round) * lon_round) %>% -group_by(lat_bucket, lon_bucket, rt, des, pid) %>% -summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), -lon_bucket = median(lon_bucket, na.rm = TRUE), -spd = median(spd, na.rm = TRUE), -lag_spd = median(lag_spd, na.rm = TRUE), -pdist = median(pdist), -trip_count = length(unique(origtatripno))) -metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) -metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) -# make charts -ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), -aes(x = pdist, -y = lag_spd)) + -geom_point() + -geom_smooth() + -facet_grid(paste0(rt, "-", des) ~ .) -metro_data <- metro_raw %>% -mutate(time = with_tz(time, "America/Chicago"), -spd = as.double(spd), -pdist = as.double(pdist), -lon = as.double(lon), -lat = as.double(lat)) %>% -group_by(origtatripno) %>% -arrange(time) %>% -mutate(lag_pdist = lag(pdist), -lag_time = lag(time)) %>% -mutate(spd_calc = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) -routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") -bucket_feet <- 200 -lat_round <- bucket_feet/364481.35 -lon_round <- bucket_feet/267203.05 -metro_summary <- metro_data %>% -left_join(routes_categorized, by = "pid") %>% -mutate(lat_bucket = round(lat / lat_round) * lat_round, -lon_bucket = round(lon / lon_round) * lon_round) %>% -group_by(lat_bucket, lon_bucket, rt, des, pid) %>% -summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), -lon_bucket = median(lon_bucket, na.rm = TRUE), -spd = median(spd, na.rm = TRUE), +group_by(rt, des, pid, lat_bucket, lon_bucket) %>% +summarise(spd = median(spd, na.rm = TRUE), spd_calc = median(spd_calc, na.rm = TRUE), pdist = median(pdist), trip_count = length(unique(origtatripno))) metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) -segments_sf <- metro_summary_sf %>% +metro_segments <- metro_summary %>% group_by(rt, pid) %>% -arrange(pid, time) %>% # Ensure points within each route are sorted if needed +arrange(pdist) %>% +mutate(lat_bucket_lag = lag(lat_bucket), +lon_bucket_lag = lag(lon_bucket)) %>% +filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) +geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), +~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) ) %>% -filter(!is.na(lead_geom)) %>% -# Create a segment for each pair of points -rowwise() %>% -mutate( -segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") -) %>% -ungroup() %>% -as.data.frame() %>% -select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, spd_calc) %>% -st_as_sf() -segments_sf <- metro_summary_sf %>% -group_by(rt, pid) -segments_sf <- metro_summary_sf %>% -group_by(rt, pid) %>% -arrange(pid, time) -segments_sf <- metro_summary_sf %>% -group_by(rt, pid) %>% -arrange(pid, pdist) %>% # Ensure points within each route are sorted if needed -mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) -) %>% -filter(!is.na(lead_geom)) %>% -# Create a segment for each pair of points -rowwise() %>% -mutate( -segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") -) %>% -ungroup() %>% -as.data.frame() %>% -select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, spd_calc) %>% -st_as_sf() +st_as_sf(sf_column_name = "geometry") # get counts of routes route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) # make charts -ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), +ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), aes(x = pdist, y = spd_calc)) + geom_point() + @@ -346,9 +43,7 @@ top = max(metro_data$lat)) #get basemap basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") # A West -quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1)) -# A West -quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) +quantile(metro_segments %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) for (route in unique(routes_categorized$name)){ route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) ggmap(basemap) + @@ -364,7 +59,7 @@ y = NULL) + theme(axis.text=element_blank(), axis.ticks=element_blank(), plot.caption = element_text(color = "grey")) + -geom_sf(data = segments_sf %>% filter(pid %in% route_focus), +geom_sf(data = metro_segments %>% filter(pid %in% route_focus), inherit.aes = FALSE, aes(color = spd_calc), linewidth = 1) + @@ -379,6 +74,189 @@ width = 11, units = "in", create.dir = TRUE) } +View(metro_data) +View(metro_summary) +metro_summary <- metro_data %>% +left_join(routes_categorized, by = "pid") %>% +mutate(lat_bucket = round(lat / lat_round) * lat_round, +lon_bucket = round(lon / lon_round) * lon_round) +View(metro_summary) +metro_summary <- metro_data %>% +left_join(routes_categorized, by = "pid") %>% +mutate(lat_bucket = round(lat / lat_round) * lat_round, +lon_bucket = round(lon / lon_round) * lon_round) %>% +group_by(rt, name, pid, lat_bucket, lon_bucket) %>% +summarise(spd = median(spd, na.rm = TRUE), +spd_calc = median(spd_calc, na.rm = TRUE), +pdist = median(pdist), +trip_count = length(unique(origtatripno))) +metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) +metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) +metro_segments <- metro_summary %>% +group_by(rt, pid) %>% +arrange(pdist) %>% +mutate(lat_bucket_lag = lag(lat_bucket), +lon_bucket_lag = lag(lon_bucket)) %>% +filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% +mutate( +geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), +~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) +) %>% +st_as_sf(sf_column_name = "geometry") %>% +group_by(rt, name, lat_bucket, lon_bucket) %>% +summarise(weighted.mean(spd_calc, trip_count)) +View(metro_segments) +metro_segments <- metro_summary %>% +group_by(rt, pid) %>% +arrange(pdist) %>% +mutate(lat_bucket_lag = lag(lat_bucket), +lon_bucket_lag = lag(lon_bucket)) %>% +filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% +mutate( +geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), +~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) +) %>% +st_as_sf(sf_column_name = "geometry") %>% +group_by(rt, name, lat_bucket, lon_bucket) %>% +summarise(spd_calc = weighted.mean(spd_calc, trip_count)) +View(metro_segments) +for (route in unique(routes_categorized$name)){ +route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) +ggmap(basemap) + +labs(title = paste0("Metro Route Speed - ", route), +subtitle = paste0("averaged between ", +sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = metro_segments %>% filter(pid %in% route_focus), +inherit.aes = FALSE, +aes(color = spd_calc), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") +ggsave(file = paste0("figures/", +route, +".pdf"), +title = paste0("Metro Route Speed - ", route), +device = pdf, +height = 8.5, +width = 11, +units = "in", +create.dir = TRUE) +} +for (route in unique(routes_categorized$name)){ +route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) +ggmap(basemap) + +labs(title = paste0("Metro Route Speed - ", route), +subtitle = paste0("averaged between ", +sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = metro_segments %>% filter(name %in route), +ggmap(basemap) + +labs(title = paste0("Metro Route Speed - ", route), +subtitle = paste0("averaged between ", +sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = metro_segments %>% filter(name %in% route), +inherit.aes = FALSE, +aes(color = spd_calc), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") +for (route in unique(routes_categorized$name)){ +route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) +ggmap(basemap) + +labs(title = paste0("Metro Route Speed - ", route), +subtitle = paste0("averaged between ", +sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = metro_segments %>% filter(name %in% route), +inherit.aes = FALSE, +aes(color = spd_calc), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") +ggsave(file = paste0("figures/", +route, +".pdf"), +title = paste0("Metro Route Speed - ", route), +device = pdf, +height = 8.5, +width = 11, +units = "in", +create.dir = TRUE) +} +# A West +quantile(metro_segments %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) +quantile(metro_segments %>% filter(name %in% c("A_West")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) +metro_data <- metro_raw %>% +mutate(time = with_tz(time, "America/Chicago"), +spd = as.double(spd), +pdist = as.double(pdist), +lon = as.double(lon), +lat = as.double(lat)) %>% +group_by(pid, vid) %>% +arrange(time) %>% +mutate(pdist_lag = lag(pdist), +time_lag = lag(time)) %>% +mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, +pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% +left_join(routes_categorized, by = "pid") +ggplot(data = metro_data %>% filter(name %in% route)) + +geom_violin(aes(x = time, +y = spd_calc)) +metro_data <- metro_raw %>% +mutate(time = with_tz(time, "America/Chicago"), +spd = as.double(spd), +pdist = as.double(pdist), +lon = as.double(lon), +lat = as.double(lat)) %>% +mutate(date = date(time)) %>% +group_by(pid, vid) %>% +arrange(time) %>% +mutate(pdist_lag = lag(pdist), +time_lag = lag(time)) %>% +mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, +pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% +left_join(routes_categorized, by = "pid") +ggplot(data = metro_data %>% filter(name %in% route)) + +geom_violin(aes(x = time, +y = spd_calc, +group = date)) +ggplot(data = metro_data %>% filter(name %in% route)) + +geom_violin(aes(x = date, +y = spd_calc)) +ggplot(data = metro_data %>% filter(name %in% route)) + +geom_boxplot(aes(x = date, +y = spd_calc)) library(tidyverse) library(influxdbclient) library(glue) @@ -417,60 +295,151 @@ results[[i]] <- data metro_raw <- bind_rows(results) metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) +routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") metro_data <- metro_raw %>% mutate(time = with_tz(time, "America/Chicago"), spd = as.double(spd), pdist = as.double(pdist), lon = as.double(lon), lat = as.double(lat)) %>% +mutate(date = date(time)) %>% group_by(pid, vid) %>% arrange(time) %>% -mutate(lag_pdist = lag(pdist), -lag_time = lag(time)) %>% -mutate(spd_calc = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) -routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") -bucket_feet <- 200 +mutate(pdist_lag = lag(pdist), +time_lag = lag(time)) %>% +mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, +pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% +left_join(routes_categorized, by = "pid") +bucket_feet <- 500 lat_round <- bucket_feet/364481.35 lon_round <- bucket_feet/267203.05 metro_summary <- metro_data %>% -left_join(routes_categorized, by = "pid") %>% mutate(lat_bucket = round(lat / lat_round) * lat_round, lon_bucket = round(lon / lon_round) * lon_round) %>% -group_by(lat_bucket, lon_bucket, rt, des, pid) %>% -summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), -lon_bucket = median(lon_bucket, na.rm = TRUE), -spd = median(spd, na.rm = TRUE), +group_by(rt, name, pid, lat_bucket, lon_bucket) %>% +summarise(spd = median(spd, na.rm = TRUE), spd_calc = median(spd_calc, na.rm = TRUE), pdist = median(pdist), trip_count = length(unique(origtatripno))) metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) -segments_sf <- metro_summary_sf %>% +metro_segments <- metro_summary %>% group_by(rt, pid) %>% -arrange(pid, pdist) %>% # Ensure points within each route are sorted if needed +arrange(pdist) %>% +mutate(lat_bucket_lag = lag(lat_bucket), +lon_bucket_lag = lag(lon_bucket)) %>% +filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% mutate( -lead_geom = lead(geometry), -lead_spd = lead(spd) +geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), +~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) ) %>% -filter(!is.na(lead_geom)) %>% -# Create a segment for each pair of points -rowwise() %>% -mutate( -segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") -) %>% -ungroup() %>% -as.data.frame() %>% -select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, spd_calc) %>% -st_as_sf() +st_as_sf(sf_column_name = "geometry") %>% +group_by(rt, name, lat_bucket, lon_bucket) %>% +summarise(spd_calc = weighted.mean(spd_calc, trip_count)) # get counts of routes route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) # make charts -ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), +ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), aes(x = pdist, y = spd_calc)) + geom_point() + geom_smooth() + facet_grid(paste0(rt, "-", des) ~ .) +# make charts +ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), +aes(x = pdist, +y = spd_calc)) + +geom_point() + +geom_smooth() + +facet_grid(name ~ .) +library(tidyverse) +library(influxdbclient) +library(glue) +library(ggmap) +library(sf) +# parameters needed to make connection to Database +token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) +org <- "e2581d54779b077f" +bucket <- "madison-metro" +days <- 1 +influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", +token = token, +org = org) +#--- +# Fields you want to query +fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno") +# Creating an empty list to store results for each field +results <- vector("list", length(fields)) +# Loop through each field, get data, and coerce types if needed +for (i in seq_along(fields)) { +field <- fields[i] +query_string <- glue('from(bucket: "{bucket}") ', +'|> range(start: -{days}d) ', +'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', +'|> filter(fn: (r) => r["_field"] == "{field}")') +data <- influx_connection$query(query_string) +# Ensure the columns are coerced to consistent types +# (Optionally add coercion based on your expected types) +data <- bind_rows(data) %>% +mutate(value = as.character(`_value`), +field = `_field`) %>% +select(time, rt, pid, vid, value, field) +results[[i]] <- data +} +# Bind all results together +metro_raw <- bind_rows(results) +metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% +distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) +routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") +metro_data <- metro_raw %>% +mutate(time = with_tz(time, "America/Chicago"), +spd = as.double(spd), +pdist = as.double(pdist), +lon = as.double(lon), +lat = as.double(lat)) %>% +mutate(date = date(time)) %>% +group_by(pid, vid) %>% +arrange(time) %>% +mutate(pdist_lag = lag(pdist), +time_lag = lag(time)) %>% +mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, +pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% +left_join(routes_categorized, by = "pid") +bucket_feet <- 500 +lat_round <- bucket_feet/364481.35 +lon_round <- bucket_feet/267203.05 +metro_summary <- metro_data %>% +mutate(lat_bucket = round(lat / lat_round) * lat_round, +lon_bucket = round(lon / lon_round) * lon_round) %>% +group_by(rt, name, pid, lat_bucket, lon_bucket) %>% +summarise(spd = median(spd, na.rm = TRUE), +spd_calc = median(spd_calc, na.rm = TRUE), +pdist = median(pdist), +trip_count = length(unique(origtatripno))) +metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) +metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) +metro_segments <- metro_summary %>% +group_by(rt, pid) %>% +arrange(pdist) %>% +mutate(lat_bucket_lag = lag(lat_bucket), +lon_bucket_lag = lag(lon_bucket)) %>% +filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% +mutate( +geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), +~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) +) %>% +st_as_sf(sf_column_name = "geometry") %>% +group_by(rt, name, lat_bucket, lon_bucket) %>% +summarise(spd_calc = weighted.mean(spd_calc, trip_count)) +# get counts of routes +route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) +# make charts +ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), +aes(x = pdist, +y = spd_calc)) + +geom_point() + +geom_smooth() + +facet_grid(name ~ .) register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) bbox <- c(left = min(metro_data$lon), bottom = min(metro_data$lat), @@ -478,8 +447,7 @@ right = max(metro_data$lon), top = max(metro_data$lat)) #get basemap basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") -# A West -quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) +quantile(metro_segments %>% filter(name %in% c("A_West")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) for (route in unique(routes_categorized$name)){ route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) ggmap(basemap) + @@ -495,14 +463,26 @@ y = NULL) + theme(axis.text=element_blank(), axis.ticks=element_blank(), plot.caption = element_text(color = "grey")) + -geom_sf(data = segments_sf %>% filter(pid %in% route_focus), +geom_sf(data = metro_segments %>% filter(name %in% route), inherit.aes = FALSE, aes(color = spd_calc), linewidth = 1) + scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") ggsave(file = paste0("figures/", route, -".pdf"), +"_map.pdf"), +title = paste0("Metro Route Speed - ", route), +device = pdf, +height = 8.5, +width = 11, +units = "in", +create.dir = TRUE) +ggplot(data = metro_data %>% filter(name %in% route)) + +geom_boxplot(aes(x = date, +y = spd_calc)) +ggsave(file = paste0("figures/", +route, +"_date.pdf"), title = paste0("Metro Route Speed - ", route), device = pdf, height = 8.5, @@ -510,3 +490,23 @@ width = 11, units = "in", create.dir = TRUE) } +ggplot(data = metro_summary %>% filter(!is.blank(name)), +aes(x = pdist, +y = spd_calc)) + +geom_boxplot() +ggplot(data = metro_summary %>% filter(!is.na(name)), +aes(x = pdist, +y = spd_calc)) + +geom_boxplot() +ggplot(data = metro_summary %>% filter(!is.na(name)), +aes(x = name, +y = spd_calc)) + +geom_boxplot() +ggplot(data = metro_summary %>% filter(!is.na(name)), +aes(x = name, +y = spd_calc)) + +geom_violin() +ggplot(data = metro_summary %>% filter(!is.na(name)), +aes(x = name, +y = spd_calc)) + +geom_boxplot() diff --git a/madison-metro.R b/madison-metro.R index 10edc74..aad0eae 100644 --- a/madison-metro.R +++ b/madison-metro.R @@ -15,10 +15,10 @@ influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net" token = token, org = org) #--- -# Fields you want to query +# Fields to query fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno") -# Creating an empty list to store results for each field +# An empty list to store results for each field results <- vector("list", length(fields)) # Loop through each field, get data, and coerce types if needed @@ -48,33 +48,34 @@ metro_raw <- bind_rows(results) metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) +routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") + metro_data <- metro_raw %>% mutate(time = with_tz(time, "America/Chicago"), spd = as.double(spd), pdist = as.double(pdist), lon = as.double(lon), lat = as.double(lat)) %>% + mutate(date = date(time)) %>% group_by(pid, vid) %>% arrange(time) %>% - mutate(lag_pdist = lag(pdist), - lag_time = lag(time)) %>% - mutate(spd_calc = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) + mutate(pdist_lag = lag(pdist), + time_lag = lag(time)) %>% + mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, + pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% + left_join(routes_categorized, by = "pid") -routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") -bucket_feet <- 200 +bucket_feet <- 500 lat_round <- bucket_feet/364481.35 lon_round <- bucket_feet/267203.05 metro_summary <- metro_data %>% - left_join(routes_categorized, by = "pid") %>% mutate(lat_bucket = round(lat / lat_round) * lat_round, lon_bucket = round(lon / lon_round) * lon_round) %>% - group_by(lat_bucket, lon_bucket, rt, des, pid) %>% - summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), - lon_bucket = median(lon_bucket, na.rm = TRUE), - spd = median(spd, na.rm = TRUE), + group_by(rt, name, pid, lat_bucket, lon_bucket) %>% + summarise(spd = median(spd, na.rm = TRUE), spd_calc = median(spd_calc, na.rm = TRUE), pdist = median(pdist), trip_count = length(unique(origtatripno))) @@ -82,34 +83,35 @@ metro_summary <- metro_data %>% metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) -segments_sf <- metro_summary_sf %>% +metro_segments <- metro_summary %>% group_by(rt, pid) %>% - arrange(pid, pdist) %>% # Ensure points within each route are sorted if needed + arrange(pdist) %>% + mutate(lat_bucket_lag = lag(lat_bucket), + lon_bucket_lag = lag(lon_bucket)) %>% + filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% mutate( - lead_geom = lead(geometry), - lead_spd = lead(spd) + geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), + ~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) ) %>% - filter(!is.na(lead_geom)) %>% - # Create a segment for each pair of points - rowwise() %>% - mutate( - segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") - ) %>% - ungroup() %>% - as.data.frame() %>% - select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, spd_calc) %>% - st_as_sf() + st_as_sf(sf_column_name = "geometry") %>% + group_by(rt, name, lat_bucket, lon_bucket) %>% + summarise(spd_calc = weighted.mean(spd_calc, trip_count)) # get counts of routes route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) # make charts -ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), +ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), aes(x = pdist, y = spd_calc)) + geom_point() + geom_smooth() + - facet_grid(paste0(rt, "-", des) ~ .) + facet_grid(name ~ .) + +ggplot(data = metro_summary %>% filter(!is.na(name)), + aes(x = name, + y = spd_calc)) + + geom_boxplot() register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) @@ -121,8 +123,7 @@ bbox <- c(left = min(metro_data$lon), #get basemap basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") -# A West -quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) +quantile(metro_segments %>% filter(name %in% c("A_West")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) for (route in unique(routes_categorized$name)){ route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) @@ -139,14 +140,27 @@ for (route in unique(routes_categorized$name)){ theme(axis.text=element_blank(), axis.ticks=element_blank(), plot.caption = element_text(color = "grey")) + - geom_sf(data = segments_sf %>% filter(pid %in% route_focus), + geom_sf(data = metro_segments %>% filter(name %in% route), inherit.aes = FALSE, aes(color = spd_calc), linewidth = 1) + scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") ggsave(file = paste0("figures/", route, - ".pdf"), + "_map.pdf"), + title = paste0("Metro Route Speed - ", route), + device = pdf, + height = 8.5, + width = 11, + units = "in", + create.dir = TRUE) + + ggplot(data = metro_data %>% filter(name %in% route)) + + geom_boxplot(aes(x = date, + y = spd_calc)) + ggsave(file = paste0("figures/", + route, + "_date.pdf"), title = paste0("Metro Route Speed - ", route), device = pdf, height = 8.5,