From eab18f7e05f12cfcc8e2d0800791eafbdb314fcf Mon Sep 17 00:00:00 2001 From: Ben Varick Date: Tue, 19 Nov 2024 11:14:45 -0600 Subject: [PATCH] start of data processing and analysis --- .Rhistory | 512 ++++++++++++++++++++++++++++++++++++++++++++ .gitignore | 1 + madison-metro.R | 134 ++++++++++++ madison-metro.Rproj | 13 ++ 4 files changed, 660 insertions(+) create mode 100644 .Rhistory create mode 100644 madison-metro.R create mode 100644 madison-metro.Rproj diff --git a/.Rhistory b/.Rhistory new file mode 100644 index 0000000..c616cf1 --- /dev/null +++ b/.Rhistory @@ -0,0 +1,512 @@ +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +length(unique(metro_data %>% filter(pid %in% c("422")) %>% pull(origtatripno))), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("422")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) +View(metro_summary) +metro_summary <- metro_data %>% +mutate(pdist_bucket = round(pdist / 500) * 500) %>% +group_by(pdist_bucket, rt, des, pid) %>% +summarise(lat = median(lat), +lon = median(lon), +spd = median(spd), +lag_spd = median(lag_spd), +trip_count = n()) +metro_summary <- metro_data %>% +mutate(pdist_bucket = round(pdist / 500) * 500) %>% +group_by(pdist_bucket, rt, des, pid, origtatripno) %>% +summarise(lat = median(lat), +lon = median(lon), +spd = median(spd), +lag_spd = median(lag_spd), +trip_count = n()) +trip_count = length(unique(origtatripno)) +metro_summary <- metro_data %>% +mutate(pdist_bucket = round(pdist / 500) * 500) %>% +group_by(pdist_bucket, rt, des, pid) %>% +summarise(lat = median(lat), +lon = median(lon), +spd = median(spd), +lag_spd = median(lag_spd), +trip_count = length(unique(origtatripno))) +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count))), +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("422")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) +segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count) +metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) +metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE) +segments_sf <- metro_summary_sf %>% +group_by(rt, pid, des) %>% +arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed +mutate( +lead_geom = lead(geometry), +lead_spd = lead(spd) +) %>% +filter(!is.na(lead_geom)) %>% +# Create a segment for each pair of points +rowwise() %>% +mutate( +segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") +) %>% +ungroup() %>% +as.data.frame() %>% +select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% +st_as_sf() +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("422")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +metro_summary %>% filter(pid %in% c("422")) %>% pull(trip_count), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("422")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) +metro_summary %>% filter(pid %in% c("422")) +max(metro_summary %>% filter(pid %in% c("422")) %>% pull(trip_count)) +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("469")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) +library(tidyverse) +library(influxdbclient) +library(glue) +library(ggmap) +library(sf) +# parameters needed to make connection to Database +token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) +org <- "e2581d54779b077f" +bucket <- "madison-metro" +days <- 1 +influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", +token = token, +org = org) +#--- +# Fields you want to query +fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno") +# Creating an empty list to store results for each field +results <- vector("list", length(fields)) +# Loop through each field, get data, and coerce types if needed +for (i in seq_along(fields)) { +field <- fields[i] +query_string <- glue('from(bucket: "{bucket}") ', +'|> range(start: -{days}d) ', +'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', +'|> filter(fn: (r) => r["_field"] == "{field}")') +data <- influx_connection$query(query_string) +# Ensure the columns are coerced to consistent types +# (Optionally add coercion based on your expected types) +data <- bind_rows(data) %>% +mutate(value = as.character(`_value`), +field = `_field`) %>% +select(time, rt, des, value, field) +results[[i]] <- data +} +# Bind all results together +metro_raw <- bind_rows(results) +metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% +distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) +metro_data <- metro_raw %>% +mutate(time = with_tz(time, "America/Chicago"), +spd = as.double(spd), +pdist = as.double(pdist), +lon = as.double(lon), +lat = as.double(lat)) %>% +group_by(origtatripno) %>% +arrange(time) %>% +mutate(lag_pdist = lag(pdist), +lag_time = lag(time)) %>% +mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) +metro_summary <- metro_data %>% +mutate(pdist_bucket = round(pdist / 500) * 500) %>% +group_by(pdist_bucket, rt, des, pid) %>% +summarise(lat = median(lat), +lon = median(lon), +spd = median(spd), +lag_spd = median(lag_spd), +trip_count = length(unique(origtatripno))) +metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) +metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE) +segments_sf <- metro_summary_sf %>% +group_by(rt, pid, des) %>% +arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed +mutate( +lead_geom = lead(geometry), +lead_spd = lead(spd) +) %>% +filter(!is.na(lead_geom)) %>% +# Create a segment for each pair of points +rowwise() %>% +mutate( +segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") +) %>% +ungroup() %>% +as.data.frame() %>% +select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% +st_as_sf() +ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), +aes(x = pdist_bucket, +y = lag_spd)) + +geom_point() + +geom_smooth() + +facet_grid(paste0(rt, "-", des) ~ .) +register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) +bbox <- c(left = min(metro_data$lon), +bottom = min(metro_data$lat), +right = max(metro_data$lon), +top = max(metro_data$lat)) +#get basemap +basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("469")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) +library(tidyverse) +library(influxdbclient) +library(glue) +library(ggmap) +library(sf) +# parameters needed to make connection to Database +token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) +org <- "e2581d54779b077f" +bucket <- "madison-metro" +days <- 1 +influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", +token = token, +org = org) +#--- +# Fields you want to query +fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno") +# Creating an empty list to store results for each field +results <- vector("list", length(fields)) +# Loop through each field, get data, and coerce types if needed +for (i in seq_along(fields)) { +field <- fields[i] +query_string <- glue('from(bucket: "{bucket}") ', +'|> range(start: -{days}d) ', +'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', +'|> filter(fn: (r) => r["_field"] == "{field}")') +data <- influx_connection$query(query_string) +# Ensure the columns are coerced to consistent types +# (Optionally add coercion based on your expected types) +data <- bind_rows(data) %>% +mutate(value = as.character(`_value`), +field = `_field`) %>% +select(time, rt, des, value, field) +results[[i]] <- data +} +# Bind all results together +metro_raw <- bind_rows(results) +metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% +distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) +metro_data <- metro_raw %>% +mutate(time = with_tz(time, "America/Chicago"), +spd = as.double(spd), +pdist = as.double(pdist), +lon = as.double(lon), +lat = as.double(lat)) %>% +group_by(origtatripno) %>% +arrange(time) %>% +mutate(lag_pdist = lag(pdist), +lag_time = lag(time)) %>% +mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) +metro_summary <- metro_data %>% +mutate(pdist_bucket = round(pdist / 500) * 500) %>% +group_by(pdist_bucket, rt, des, pid) %>% +summarise(lat = median(lat), +lon = median(lon), +spd = median(spd), +lag_spd = median(lag_spd), +trip_count = length(unique(origtatripno))) +metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) +metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) +library(tidyverse) +library(influxdbclient) +library(glue) +library(ggmap) +library(sf) +# parameters needed to make connection to Database +token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) +org <- "e2581d54779b077f" +bucket <- "madison-metro" +days <- 1 +influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", +token = token, +org = org) +#--- +# Fields you want to query +fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno") +# Creating an empty list to store results for each field +results <- vector("list", length(fields)) +# Loop through each field, get data, and coerce types if needed +for (i in seq_along(fields)) { +field <- fields[i] +query_string <- glue('from(bucket: "{bucket}") ', +'|> range(start: -{days}d) ', +'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', +'|> filter(fn: (r) => r["_field"] == "{field}")') +data <- influx_connection$query(query_string) +# Ensure the columns are coerced to consistent types +# (Optionally add coercion based on your expected types) +data <- bind_rows(data) %>% +mutate(value = as.character(`_value`), +field = `_field`) %>% +select(time, rt, des, value, field) +results[[i]] <- data +} +# Bind all results together +metro_raw <- bind_rows(results) +metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% +distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) +metro_data <- metro_raw %>% +mutate(time = with_tz(time, "America/Chicago"), +spd = as.double(spd), +pdist = as.double(pdist), +lon = as.double(lon), +lat = as.double(lat)) %>% +group_by(origtatripno) %>% +arrange(time) %>% +mutate(lag_pdist = lag(pdist), +lag_time = lag(time)) %>% +mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) +metro_summary <- metro_data %>% +mutate(pdist_bucket = round(pdist / 500) * 500) %>% +group_by(pdist_bucket, rt, des, pid) %>% +summarise(lat = median(lat), +lon = median(lon), +spd = median(spd), +lag_spd = median(lag_spd), +trip_count = length(unique(origtatripno))) +metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) +metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE) +metro_summary <- metro_data %>% +mutate(pdist_bucket = round(pdist / 500) * 500) %>% +group_by(pdist_bucket, rt, des, pid) %>% +summarise(lat = median(lat, na.rm = TRUE), +lon = median(lon, na.rm = TRUE), +spd = median(spd, na.rm = TRUE), +lag_spd = median(lag_spd, na.rm = TRUE), +trip_count = length(unique(origtatripno))) +metro_data_sf <- st_as_sf(metro_data %>% filter(is.double(lat)), coords = c("lon", "lat"), remove = FALSE) +View(metro_data) +metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lat)), coords = c("lon", "lat"), remove = FALSE) +metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) +metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE) +View(metro_summary) +nrow(metro_data %>% filter(is.na(lon))) +metro_summary <- metro_data %>% +mutate(pdist_bucket = round(pdist / 500) * 500) %>% +group_by(pdist_bucket, rt, des, pid) %>% +summarise(lat = median(lat, na.rm = TRUE), +lon = median(lon, na.rm = TRUE), +spd = median(spd, na.rm = TRUE), +lag_spd = median(lag_spd, na.rm = TRUE), +trip_count = length(unique(origtatripno))) +metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) +metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) +segments_sf <- metro_summary_sf %>% +group_by(rt, pid, des) %>% +arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed +mutate( +lead_geom = lead(geometry), +lead_spd = lead(spd) +) %>% +filter(!is.na(lead_geom)) %>% +# Create a segment for each pair of points +rowwise() %>% +mutate( +segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") +) %>% +ungroup() %>% +as.data.frame() %>% +select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% +st_as_sf() +ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), +aes(x = pdist_bucket, +y = lag_spd)) + +geom_point() + +geom_smooth() + +facet_grid(paste0(rt, "-", des) ~ .) +register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) +bbox <- c(left = min(metro_data$lon), +bottom = min(metro_data$lat), +right = max(metro_data$lon), +top = max(metro_data$lat)) +#get basemap +basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("469")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) +quantile(segments_sf %>% filter(pid %in% c("469") %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1)) +) +quantile(segments_sf %>% filter(pid %in% c("469") %>% pull(lag_spd)), c(0,0.25, 0.5, 0.75, 1)) +quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1)) +quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1)) +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("469")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) +metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) +# get counts of routes +route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) +View(route_counts) +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +metro_summary %>% filter(pid %in% c("1280")) %>% pull(trip_count), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("469")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) +ggmap(basemap) + +labs(title = "Metro Route Speed", +subtitle = paste0("averaged between ", +metro_summary %>% filter(pid %in% c("1280")) %>% pull(trip_count), +" bus trips - ", +min(date(metro_data$time)), +" to ", +max(date(metro_data$time))), +x = NULL, +y = NULL) + +theme(axis.text=element_blank(), +axis.ticks=element_blank(), +plot.caption = element_text(color = "grey")) + +geom_sf(data = segments_sf %>% filter(pid %in% c("1280")), +inherit.aes = FALSE, +aes(color = lag_spd), +linewidth = 1) + +scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + +facet_wrap(paste0(rt, "-", des) ~ .) diff --git a/.gitignore b/.gitignore index f59eb66..35a6d9b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ api_keys/* figures/* +.Rproj.user diff --git a/madison-metro.R b/madison-metro.R new file mode 100644 index 0000000..9566d86 --- /dev/null +++ b/madison-metro.R @@ -0,0 +1,134 @@ +library(tidyverse) +library(influxdbclient) +library(glue) +library(ggmap) +library(sf) + +# parameters needed to make connection to Database +token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) +org <- "e2581d54779b077f" +bucket <- "madison-metro" + +days <- 1 + +influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", + token = token, + org = org) +#--- +# Fields you want to query +fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno") + +# Creating an empty list to store results for each field +results <- vector("list", length(fields)) + +# Loop through each field, get data, and coerce types if needed +for (i in seq_along(fields)) { + field <- fields[i] + + query_string <- glue('from(bucket: "{bucket}") ', + '|> range(start: -{days}d) ', + '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', + '|> filter(fn: (r) => r["_field"] == "{field}")') + + data <- influx_connection$query(query_string) + + # Ensure the columns are coerced to consistent types + # (Optionally add coercion based on your expected types) + data <- bind_rows(data) %>% + mutate(value = as.character(`_value`), + field = `_field`) %>% + select(time, rt, des, value, field) + + results[[i]] <- data +} + +# Bind all results together +metro_raw <- bind_rows(results) + +metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% + distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) + +metro_data <- metro_raw %>% + mutate(time = with_tz(time, "America/Chicago"), + spd = as.double(spd), + pdist = as.double(pdist), + lon = as.double(lon), + lat = as.double(lat)) %>% + group_by(origtatripno) %>% + arrange(time) %>% + mutate(lag_pdist = lag(pdist), + lag_time = lag(time)) %>% + mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) + +metro_summary <- metro_data %>% + mutate(pdist_bucket = round(pdist / 500) * 500) %>% + group_by(pdist_bucket, rt, des, pid) %>% + summarise(lat = median(lat, na.rm = TRUE), + lon = median(lon, na.rm = TRUE), + spd = median(spd, na.rm = TRUE), + lag_spd = median(lag_spd, na.rm = TRUE), + trip_count = length(unique(origtatripno))) + +metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) +metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) + +segments_sf <- metro_summary_sf %>% + group_by(rt, pid, des) %>% + arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed + mutate( + lead_geom = lead(geometry), + lead_spd = lead(spd) + ) %>% + filter(!is.na(lead_geom)) %>% + # Create a segment for each pair of points + rowwise() %>% + mutate( + segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") + ) %>% + ungroup() %>% + as.data.frame() %>% + select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% + st_as_sf() + +# get counts of routes +route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) + +# make charts +ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), + aes(x = pdist_bucket, + y = lag_spd)) + + geom_point() + + geom_smooth() + + facet_grid(paste0(rt, "-", des) ~ .) + +register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) + +bbox <- c(left = min(metro_data$lon), + bottom = min(metro_data$lat), + right = max(metro_data$lon), + top = max(metro_data$lat)) + +#get basemap +basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") + +quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1)) + +ggmap(basemap) + + labs(title = "Metro Route Speed", + subtitle = paste0("averaged between ", + metro_summary %>% filter(pid %in% c("1280")) %>% pull(trip_count), + " bus trips - ", + min(date(metro_data$time)), + " to ", + max(date(metro_data$time))), + x = NULL, + y = NULL) + + theme(axis.text=element_blank(), + axis.ticks=element_blank(), + plot.caption = element_text(color = "grey")) + + geom_sf(data = segments_sf %>% filter(pid %in% c("1280")), + inherit.aes = FALSE, + aes(color = lag_spd), + linewidth = 1) + + scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + + facet_wrap(paste0(rt, "-", des) ~ .) diff --git a/madison-metro.Rproj b/madison-metro.Rproj new file mode 100644 index 0000000..8e3c2eb --- /dev/null +++ b/madison-metro.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX