changed query to be more efficient

2024-11-23 23:14:49 -06:00 · 2024-11-23 23:14:49 -06:00 · f7c2a39889
commit f7c2a39889
parent 93b97f3cec
1 changed files with 19 additions and 57 deletions
--- a/madison-metro.R
+++ b/madison-metro.R
@ -17,42 +17,24 @@ influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net"
                                org = org)
 #--- Query influxdb for data

-# Fields to query
-fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno")
-
-# An empty list to store results for each field
-results <- vector("list", length(fields))
 time_start <- Sys.time()
-# Loop through each field, get data, and coerce types if needed
-for (i in seq_along(fields)) {
-  field <- fields[i]
-  
-  query_string <- glue('from(bucket: "{bucket}") ',
-                       '|> range(start: -{days}d) ',
-                       '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
-                       '|> filter(fn: (r) => r["_field"] == "{field}")')
-  
-  data <- influx_connection$query(query_string)
-  
-  # Ensure the columns are coerced to consistent types 
-  # (Optionally add coercion based on your expected types)
-  data <- bind_rows(data) %>%
-    mutate(value = as.character(`_value`),
-           field = `_field`) %>% 
-    select(time, rt, pid, vid, value, field)
-  
-  results[[i]] <- data
-}

-# Bind all results together
-metro_raw <- bind_rows(results)
+query_string <- glue('from(bucket: "{bucket}") ',
+                     '|> range(start: -{days}d) ',
+                     '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
+                     '|> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")')

-metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>%
-  distinct(pid, vid, lat, lon, spd, .keep_all = TRUE)
+results <- influx_connection$query(query_string)
+rm(influx_connection, query_string, bucket, org, token)
+metro_raw <- results %>% 
+  bind_rows() %>%
+  distinct(`_time`, pid, vid, lat, lon, spd, .keep_all = TRUE)
+nrow(metro_raw)
 time_end <- Sys.time()
-difftime(time_start, time_end)
-
+difftime(time_end, time_start)
+rm(time_end, time_start, results)

+#-----
 routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc")

 metro_data <- metro_raw %>%
@ -107,17 +89,17 @@ metro_segments <- metro_summary %>%
 route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno)))

 # make charts
-ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))),
+ggplot(data = metro_data %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))),
       aes(x = pdist,
-           y = spd_calc)) +
-  geom_point() +
+           y = spd)) +
+  geom_point(size = 0.1) +
  geom_smooth() +
  facet_grid(name ~ .)

-ggplot(data = metro_summary %>% filter(!is.na(name)),
+ggplot(data = metro_data %>% filter(!is.na(name)),
       aes(x = name,
           y = spd_calc)) +
-  geom_boxplot()
+  geom_boxplot(outlier.shape = NA)

 register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36))

@ -180,24 +162,4 @@ for (route in unique(routes_categorized$name)){
         width = 11,
         units = "in",
         create.dir = TRUE)
-}
-
-
-# ggmap(basemap) +
-#   labs(title = paste0("Metro Route Speed - ", route),
-#        subtitle = paste0("averaged between ",
-#                          sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)),
-#                          " bus trips - ",
-#                          min(date(metro_data$time)),
-#                          " to ",
-#                          max(date(metro_data$time))),
-#        x = NULL,
-#        y = NULL) +
-#   theme(axis.text=element_blank(),
-#         axis.ticks=element_blank(),
-#         plot.caption = element_text(color = "grey")) +
-#   geom_sf(data = metro_segments %>% filter(name %in% route),
-#           inherit.aes = FALSE,
-#           aes(color = spd_calc),
-#           linewidth = 1) +
-#   scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)")
+}