changed query to be more efficient
This commit is contained in:
parent
93b97f3cec
commit
f7c2a39889
@ -17,42 +17,24 @@ influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net"
|
|||||||
org = org)
|
org = org)
|
||||||
#--- Query influxdb for data
|
#--- Query influxdb for data
|
||||||
|
|
||||||
# Fields to query
|
|
||||||
fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno")
|
|
||||||
|
|
||||||
# An empty list to store results for each field
|
|
||||||
results <- vector("list", length(fields))
|
|
||||||
time_start <- Sys.time()
|
time_start <- Sys.time()
|
||||||
# Loop through each field, get data, and coerce types if needed
|
|
||||||
for (i in seq_along(fields)) {
|
|
||||||
field <- fields[i]
|
|
||||||
|
|
||||||
query_string <- glue('from(bucket: "{bucket}") ',
|
|
||||||
'|> range(start: -{days}d) ',
|
|
||||||
'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
|
|
||||||
'|> filter(fn: (r) => r["_field"] == "{field}")')
|
|
||||||
|
|
||||||
data <- influx_connection$query(query_string)
|
|
||||||
|
|
||||||
# Ensure the columns are coerced to consistent types
|
|
||||||
# (Optionally add coercion based on your expected types)
|
|
||||||
data <- bind_rows(data) %>%
|
|
||||||
mutate(value = as.character(`_value`),
|
|
||||||
field = `_field`) %>%
|
|
||||||
select(time, rt, pid, vid, value, field)
|
|
||||||
|
|
||||||
results[[i]] <- data
|
|
||||||
}
|
|
||||||
|
|
||||||
# Bind all results together
|
query_string <- glue('from(bucket: "{bucket}") ',
|
||||||
metro_raw <- bind_rows(results)
|
'|> range(start: -{days}d) ',
|
||||||
|
'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
|
||||||
|
'|> pivot(rowKey:["_time"], columnKey: ["_field"], valueColumn: "_value")')
|
||||||
|
|
||||||
metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>%
|
results <- influx_connection$query(query_string)
|
||||||
distinct(pid, vid, lat, lon, spd, .keep_all = TRUE)
|
rm(influx_connection, query_string, bucket, org, token)
|
||||||
|
metro_raw <- results %>%
|
||||||
|
bind_rows() %>%
|
||||||
|
distinct(`_time`, pid, vid, lat, lon, spd, .keep_all = TRUE)
|
||||||
|
nrow(metro_raw)
|
||||||
time_end <- Sys.time()
|
time_end <- Sys.time()
|
||||||
difftime(time_start, time_end)
|
difftime(time_end, time_start)
|
||||||
|
rm(time_end, time_start, results)
|
||||||
|
|
||||||
|
#-----
|
||||||
routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc")
|
routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc")
|
||||||
|
|
||||||
metro_data <- metro_raw %>%
|
metro_data <- metro_raw %>%
|
||||||
@ -107,17 +89,17 @@ metro_segments <- metro_summary %>%
|
|||||||
route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno)))
|
route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno)))
|
||||||
|
|
||||||
# make charts
|
# make charts
|
||||||
ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))),
|
ggplot(data = metro_data %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))),
|
||||||
aes(x = pdist,
|
aes(x = pdist,
|
||||||
y = spd_calc)) +
|
y = spd)) +
|
||||||
geom_point() +
|
geom_point(size = 0.1) +
|
||||||
geom_smooth() +
|
geom_smooth() +
|
||||||
facet_grid(name ~ .)
|
facet_grid(name ~ .)
|
||||||
|
|
||||||
ggplot(data = metro_summary %>% filter(!is.na(name)),
|
ggplot(data = metro_data %>% filter(!is.na(name)),
|
||||||
aes(x = name,
|
aes(x = name,
|
||||||
y = spd_calc)) +
|
y = spd_calc)) +
|
||||||
geom_boxplot()
|
geom_boxplot(outlier.shape = NA)
|
||||||
|
|
||||||
register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36))
|
register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36))
|
||||||
|
|
||||||
@ -180,24 +162,4 @@ for (route in unique(routes_categorized$name)){
|
|||||||
width = 11,
|
width = 11,
|
||||||
units = "in",
|
units = "in",
|
||||||
create.dir = TRUE)
|
create.dir = TRUE)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# ggmap(basemap) +
|
|
||||||
# labs(title = paste0("Metro Route Speed - ", route),
|
|
||||||
# subtitle = paste0("averaged between ",
|
|
||||||
# sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)),
|
|
||||||
# " bus trips - ",
|
|
||||||
# min(date(metro_data$time)),
|
|
||||||
# " to ",
|
|
||||||
# max(date(metro_data$time))),
|
|
||||||
# x = NULL,
|
|
||||||
# y = NULL) +
|
|
||||||
# theme(axis.text=element_blank(),
|
|
||||||
# axis.ticks=element_blank(),
|
|
||||||
# plot.caption = element_text(color = "grey")) +
|
|
||||||
# geom_sf(data = metro_segments %>% filter(name %in% route),
|
|
||||||
# inherit.aes = FALSE,
|
|
||||||
# aes(color = spd_calc),
|
|
||||||
# linewidth = 1) +
|
|
||||||
# scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)")
|
|
Loading…
x
Reference in New Issue
Block a user