diff --git a/.Rhistory b/.Rhistory
index c616cf1..8cd9e28 100644
--- a/.Rhistory
+++ b/.Rhistory
@@ -1,64 +1,3 @@
-labs(title = "Metro Route Speed",
-subtitle = paste0("averaged between ",
-length(unique(metro_data %>% filter(pid %in% c("422")) %>% pull(origtatripno))),
-" bus trips - ",
-min(date(metro_data$time)),
-" to ",
-max(date(metro_data$time))),
-x = NULL,
-y = NULL) +
-theme(axis.text=element_blank(),
-axis.ticks=element_blank(),
-plot.caption = element_text(color = "grey")) +
-geom_sf(data = segments_sf %>% filter(pid %in% c("422")),
-inherit.aes = FALSE,
-aes(color = lag_spd),
-linewidth = 1) +
-scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
-facet_wrap(paste0(rt, "-", des) ~ .)
-View(metro_summary)
-metro_summary <- metro_data %>%
-mutate(pdist_bucket = round(pdist / 500) * 500) %>%
-group_by(pdist_bucket, rt, des, pid) %>%
-summarise(lat = median(lat),
-lon = median(lon),
-spd = median(spd),
-lag_spd = median(lag_spd),
-trip_count = n())
-metro_summary <- metro_data %>%
-mutate(pdist_bucket = round(pdist / 500) * 500) %>%
-group_by(pdist_bucket, rt, des, pid, origtatripno) %>%
-summarise(lat = median(lat),
-lon = median(lon),
-spd = median(spd),
-lag_spd = median(lag_spd),
-trip_count = n())
-trip_count = length(unique(origtatripno))
-metro_summary <- metro_data %>%
-mutate(pdist_bucket = round(pdist / 500) * 500) %>%
-group_by(pdist_bucket, rt, des, pid) %>%
-summarise(lat = median(lat),
-lon = median(lon),
-spd = median(spd),
-lag_spd = median(lag_spd),
-trip_count = length(unique(origtatripno)))
-ggmap(basemap) +
-labs(title = "Metro Route Speed",
-subtitle = paste0("averaged between ",
-segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count))),
-ggmap(basemap) +
-labs(title = "Metro Route Speed",
-subtitle = paste0("averaged between ",
-segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count),
-" bus trips - ",
-min(date(metro_data$time)),
-" to ",
-max(date(metro_data$time))),
-x = NULL,
-y = NULL) +
-theme(axis.text=element_blank(),
-axis.ticks=element_blank(),
-plot.caption = element_text(color = "grey")) +
 geom_sf(data = segments_sf %>% filter(pid %in% c("422")),
 inherit.aes = FALSE,
 aes(color = lag_spd),
@@ -510,3 +449,64 @@ aes(color = lag_spd),
 linewidth = 1) +
 scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
 facet_wrap(paste0(rt, "-", des) ~ .)
+library(tidyverse)
+library(influxdbclient)
+library(glue)
+library(ggmap)
+library(sf)
+# parameters needed to make connection to Database
+token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88)
+org <- "e2581d54779b077f"
+bucket <- "madison-metro"
+days <- 1
+influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net",
+token = token,
+org = org)
+#---
+# Fields you want to query
+fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno")
+# Creating an empty list to store results for each field
+results <- vector("list", length(fields))
+# Loop through each field, get data, and coerce types if needed
+for (i in seq_along(fields)) {
+field <- fields[i]
+query_string <- glue('from(bucket: "{bucket}") ',
+'|> range(start: -{days}d) ',
+'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
+'|> filter(fn: (r) => r["_field"] == "{field}")')
+data <- influx_connection$query(query_string)
+# Ensure the columns are coerced to consistent types
+# (Optionally add coercion based on your expected types)
+data <- bind_rows(data) %>%
+mutate(value = as.character(`_value`),
+field = `_field`) %>%
+select(time, rt, des, value, field)
+results[[i]] <- data
+}
+# Bind all results together
+metro_raw <- bind_rows(results)
+metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>%
+distinct(pid, vid, lat, lon, spd, .keep_all = TRUE)
+metro_data <- metro_raw %>%
+mutate(time = with_tz(time, "America/Chicago"),
+spd = as.double(spd),
+pdist = as.double(pdist),
+lon = as.double(lon),
+lat = as.double(lat)) %>%
+group_by(origtatripno) %>%
+arrange(time) %>%
+mutate(lag_pdist = lag(pdist),
+lag_time = lag(time)) %>%
+mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280)
+routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc")
+bucket_lat <- 364481.35/200
+bucket_lon <- 267203.05/200
+metro_summary <- metro_data %>%
+left_join(routes_categorized, by = "pid") %>%
+mutate(lat_bucket = round(lat / 200) * 100) %>%
+group_by(pdist_bucket, rt, des, pid) %>%
+summarise(lat = median(lat, na.rm = TRUE),
+lon = median(lon, na.rm = TRUE),
+spd = median(spd, na.rm = TRUE),
+lag_spd = median(lag_spd, na.rm = TRUE),
+trip_count = length(unique(origtatripno)))
diff --git a/madison-metro.R b/madison-metro.R
index dda2a8e..10edc74 100644
--- a/madison-metro.R
+++ b/madison-metro.R
@@ -16,7 +16,7 @@ influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net"
                                 org = org)
 #---
 # Fields you want to query
-fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno")
+fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno")
 
 # Creating an empty list to store results for each field
 results <- vector("list", length(fields))
@@ -37,7 +37,7 @@ for (i in seq_along(fields)) {
   data <- bind_rows(data) %>%
     mutate(value = as.character(`_value`),
            field = `_field`) %>% 
-    select(time, rt, des, value, field)
+    select(time, rt, pid, vid, value, field)
   
   results[[i]] <- data
 }
@@ -54,33 +54,37 @@ metro_data <- metro_raw %>%
          pdist = as.double(pdist),
          lon = as.double(lon),
          lat = as.double(lat)) %>%
-  group_by(origtatripno) %>%
+  group_by(pid, vid) %>%
   arrange(time) %>%
   mutate(lag_pdist = lag(pdist),
          lag_time = lag(time)) %>%
-  mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280)
+  mutate(spd_calc = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280)
 
 routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc")
 
-bucket_lat <- 364481.35/200
-bucket_lon <- 267203.05/200
+bucket_feet <- 200
+
+lat_round <- bucket_feet/364481.35
+lon_round <- bucket_feet/267203.05
 
 metro_summary <- metro_data %>%
   left_join(routes_categorized, by = "pid") %>%
-  mutate(lat_bucket = round(lat / 200) * 100) %>%
-  group_by(pdist_bucket, rt, des, pid) %>%
-  summarise(lat = median(lat, na.rm = TRUE),
-            lon = median(lon, na.rm = TRUE),
+  mutate(lat_bucket = round(lat / lat_round) * lat_round,
+         lon_bucket = round(lon / lon_round) * lon_round) %>%
+  group_by(lat_bucket, lon_bucket, rt, des, pid) %>%
+  summarise(lat_bucket = median(lat_bucket, na.rm = TRUE),
+            lon_bucket = median(lon_bucket, na.rm = TRUE),
             spd = median(spd, na.rm = TRUE),
-            lag_spd = median(lag_spd, na.rm = TRUE),
+            spd_calc = median(spd_calc, na.rm = TRUE),
+            pdist = median(pdist),
             trip_count = length(unique(origtatripno)))
   
 metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE)
-metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE)
+metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE)
 
 segments_sf <- metro_summary_sf %>%
-  group_by(rt, pid, des) %>%
-  arrange(pid, pdist_bucket) %>%  # Ensure points within each route are sorted if needed
+  group_by(rt, pid) %>%
+  arrange(pid, pdist) %>%  # Ensure points within each route are sorted if needed
   mutate(
     lead_geom = lead(geometry),
     lead_spd = lead(spd)
@@ -93,7 +97,7 @@ segments_sf <- metro_summary_sf %>%
   ) %>%
   ungroup() %>%
   as.data.frame() %>%
-  select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>%
+  select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, spd_calc) %>%
   st_as_sf()
 
 # get counts of routes
@@ -101,8 +105,8 @@ route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count
 
 # make charts
 ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")),
-       aes(x = pdist_bucket,
-           y = lag_spd)) +
+       aes(x = pdist,
+           y = spd_calc)) +
   geom_point() +
   geom_smooth() +
   facet_grid(paste0(rt, "-", des) ~ .)
@@ -118,7 +122,7 @@ bbox <- c(left = min(metro_data$lon),
 basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite")
 
 # A West
-quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1))
+quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE)
 
 for (route in unique(routes_categorized$name)){
   route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid)
@@ -137,7 +141,7 @@ for (route in unique(routes_categorized$name)){
           plot.caption = element_text(color = "grey")) +
     geom_sf(data = segments_sf %>% filter(pid %in% route_focus),
             inherit.aes = FALSE,
-            aes(color = lag_spd),
+            aes(color = spd_calc),
             linewidth = 1) +
     scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)")
   ggsave(file = paste0("figures/",