start of data processing and analysis

This commit is contained in:
Ben Varick 2024-11-19 11:14:45 -06:00
parent 863e1e52ce
commit eab18f7e05
Signed by: ben
SSH Key Fingerprint: SHA256:jWnpFDAcacYM5aPFpYRqlsamlDyKNpSj3jj+k4ojtUo
4 changed files with 660 additions and 0 deletions

512
.Rhistory Normal file
View File

@ -0,0 +1,512 @@
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
length(unique(metro_data %>% filter(pid %in% c("422")) %>% pull(origtatripno))),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("422")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)
View(metro_summary)
metro_summary <- metro_data %>%
mutate(pdist_bucket = round(pdist / 500) * 500) %>%
group_by(pdist_bucket, rt, des, pid) %>%
summarise(lat = median(lat),
lon = median(lon),
spd = median(spd),
lag_spd = median(lag_spd),
trip_count = n())
metro_summary <- metro_data %>%
mutate(pdist_bucket = round(pdist / 500) * 500) %>%
group_by(pdist_bucket, rt, des, pid, origtatripno) %>%
summarise(lat = median(lat),
lon = median(lon),
spd = median(spd),
lag_spd = median(lag_spd),
trip_count = n())
trip_count = length(unique(origtatripno))
metro_summary <- metro_data %>%
mutate(pdist_bucket = round(pdist / 500) * 500) %>%
group_by(pdist_bucket, rt, des, pid) %>%
summarise(lat = median(lat),
lon = median(lon),
spd = median(spd),
lag_spd = median(lag_spd),
trip_count = length(unique(origtatripno)))
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count))),
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("422")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)
segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count)
metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE)
metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE)
segments_sf <- metro_summary_sf %>%
group_by(rt, pid, des) %>%
arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed
mutate(
lead_geom = lead(geometry),
lead_spd = lead(spd)
) %>%
filter(!is.na(lead_geom)) %>%
# Create a segment for each pair of points
rowwise() %>%
mutate(
segment = st_cast(st_union(geometry, lead_geom), "LINESTRING")
) %>%
ungroup() %>%
as.data.frame() %>%
select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>%
st_as_sf()
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("422")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
metro_summary %>% filter(pid %in% c("422")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("422")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)
metro_summary %>% filter(pid %in% c("422"))
max(metro_summary %>% filter(pid %in% c("422")) %>% pull(trip_count))
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("469")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)
library(tidyverse)
library(influxdbclient)
library(glue)
library(ggmap)
library(sf)
# parameters needed to make connection to Database
token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88)
org <- "e2581d54779b077f"
bucket <- "madison-metro"
days <- 1
influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net",
token = token,
org = org)
#---
# Fields you want to query
fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno")
# Creating an empty list to store results for each field
results <- vector("list", length(fields))
# Loop through each field, get data, and coerce types if needed
for (i in seq_along(fields)) {
field <- fields[i]
query_string <- glue('from(bucket: "{bucket}") ',
'|> range(start: -{days}d) ',
'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
'|> filter(fn: (r) => r["_field"] == "{field}")')
data <- influx_connection$query(query_string)
# Ensure the columns are coerced to consistent types
# (Optionally add coercion based on your expected types)
data <- bind_rows(data) %>%
mutate(value = as.character(`_value`),
field = `_field`) %>%
select(time, rt, des, value, field)
results[[i]] <- data
}
# Bind all results together
metro_raw <- bind_rows(results)
metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>%
distinct(pid, vid, lat, lon, spd, .keep_all = TRUE)
metro_data <- metro_raw %>%
mutate(time = with_tz(time, "America/Chicago"),
spd = as.double(spd),
pdist = as.double(pdist),
lon = as.double(lon),
lat = as.double(lat)) %>%
group_by(origtatripno) %>%
arrange(time) %>%
mutate(lag_pdist = lag(pdist),
lag_time = lag(time)) %>%
mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280)
metro_summary <- metro_data %>%
mutate(pdist_bucket = round(pdist / 500) * 500) %>%
group_by(pdist_bucket, rt, des, pid) %>%
summarise(lat = median(lat),
lon = median(lon),
spd = median(spd),
lag_spd = median(lag_spd),
trip_count = length(unique(origtatripno)))
metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE)
metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE)
segments_sf <- metro_summary_sf %>%
group_by(rt, pid, des) %>%
arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed
mutate(
lead_geom = lead(geometry),
lead_spd = lead(spd)
) %>%
filter(!is.na(lead_geom)) %>%
# Create a segment for each pair of points
rowwise() %>%
mutate(
segment = st_cast(st_union(geometry, lead_geom), "LINESTRING")
) %>%
ungroup() %>%
as.data.frame() %>%
select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>%
st_as_sf()
ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")),
aes(x = pdist_bucket,
y = lag_spd)) +
geom_point() +
geom_smooth() +
facet_grid(paste0(rt, "-", des) ~ .)
register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36))
bbox <- c(left = min(metro_data$lon),
bottom = min(metro_data$lat),
right = max(metro_data$lon),
top = max(metro_data$lat))
#get basemap
basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite")
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("469")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)
library(tidyverse)
library(influxdbclient)
library(glue)
library(ggmap)
library(sf)
# parameters needed to make connection to Database
token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88)
org <- "e2581d54779b077f"
bucket <- "madison-metro"
days <- 1
influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net",
token = token,
org = org)
#---
# Fields you want to query
fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno")
# Creating an empty list to store results for each field
results <- vector("list", length(fields))
# Loop through each field, get data, and coerce types if needed
for (i in seq_along(fields)) {
field <- fields[i]
query_string <- glue('from(bucket: "{bucket}") ',
'|> range(start: -{days}d) ',
'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
'|> filter(fn: (r) => r["_field"] == "{field}")')
data <- influx_connection$query(query_string)
# Ensure the columns are coerced to consistent types
# (Optionally add coercion based on your expected types)
data <- bind_rows(data) %>%
mutate(value = as.character(`_value`),
field = `_field`) %>%
select(time, rt, des, value, field)
results[[i]] <- data
}
# Bind all results together
metro_raw <- bind_rows(results)
metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>%
distinct(pid, vid, lat, lon, spd, .keep_all = TRUE)
metro_data <- metro_raw %>%
mutate(time = with_tz(time, "America/Chicago"),
spd = as.double(spd),
pdist = as.double(pdist),
lon = as.double(lon),
lat = as.double(lat)) %>%
group_by(origtatripno) %>%
arrange(time) %>%
mutate(lag_pdist = lag(pdist),
lag_time = lag(time)) %>%
mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280)
metro_summary <- metro_data %>%
mutate(pdist_bucket = round(pdist / 500) * 500) %>%
group_by(pdist_bucket, rt, des, pid) %>%
summarise(lat = median(lat),
lon = median(lon),
spd = median(spd),
lag_spd = median(lag_spd),
trip_count = length(unique(origtatripno)))
metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE)
metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE)
library(tidyverse)
library(influxdbclient)
library(glue)
library(ggmap)
library(sf)
# parameters needed to make connection to Database
token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88)
org <- "e2581d54779b077f"
bucket <- "madison-metro"
days <- 1
influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net",
token = token,
org = org)
#---
# Fields you want to query
fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno")
# Creating an empty list to store results for each field
results <- vector("list", length(fields))
# Loop through each field, get data, and coerce types if needed
for (i in seq_along(fields)) {
field <- fields[i]
query_string <- glue('from(bucket: "{bucket}") ',
'|> range(start: -{days}d) ',
'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
'|> filter(fn: (r) => r["_field"] == "{field}")')
data <- influx_connection$query(query_string)
# Ensure the columns are coerced to consistent types
# (Optionally add coercion based on your expected types)
data <- bind_rows(data) %>%
mutate(value = as.character(`_value`),
field = `_field`) %>%
select(time, rt, des, value, field)
results[[i]] <- data
}
# Bind all results together
metro_raw <- bind_rows(results)
metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>%
distinct(pid, vid, lat, lon, spd, .keep_all = TRUE)
metro_data <- metro_raw %>%
mutate(time = with_tz(time, "America/Chicago"),
spd = as.double(spd),
pdist = as.double(pdist),
lon = as.double(lon),
lat = as.double(lat)) %>%
group_by(origtatripno) %>%
arrange(time) %>%
mutate(lag_pdist = lag(pdist),
lag_time = lag(time)) %>%
mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280)
metro_summary <- metro_data %>%
mutate(pdist_bucket = round(pdist / 500) * 500) %>%
group_by(pdist_bucket, rt, des, pid) %>%
summarise(lat = median(lat),
lon = median(lon),
spd = median(spd),
lag_spd = median(lag_spd),
trip_count = length(unique(origtatripno)))
metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE)
metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE)
metro_summary <- metro_data %>%
mutate(pdist_bucket = round(pdist / 500) * 500) %>%
group_by(pdist_bucket, rt, des, pid) %>%
summarise(lat = median(lat, na.rm = TRUE),
lon = median(lon, na.rm = TRUE),
spd = median(spd, na.rm = TRUE),
lag_spd = median(lag_spd, na.rm = TRUE),
trip_count = length(unique(origtatripno)))
metro_data_sf <- st_as_sf(metro_data %>% filter(is.double(lat)), coords = c("lon", "lat"), remove = FALSE)
View(metro_data)
metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lat)), coords = c("lon", "lat"), remove = FALSE)
metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE)
metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE)
View(metro_summary)
nrow(metro_data %>% filter(is.na(lon)))
metro_summary <- metro_data %>%
mutate(pdist_bucket = round(pdist / 500) * 500) %>%
group_by(pdist_bucket, rt, des, pid) %>%
summarise(lat = median(lat, na.rm = TRUE),
lon = median(lon, na.rm = TRUE),
spd = median(spd, na.rm = TRUE),
lag_spd = median(lag_spd, na.rm = TRUE),
trip_count = length(unique(origtatripno)))
metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE)
metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE)
segments_sf <- metro_summary_sf %>%
group_by(rt, pid, des) %>%
arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed
mutate(
lead_geom = lead(geometry),
lead_spd = lead(spd)
) %>%
filter(!is.na(lead_geom)) %>%
# Create a segment for each pair of points
rowwise() %>%
mutate(
segment = st_cast(st_union(geometry, lead_geom), "LINESTRING")
) %>%
ungroup() %>%
as.data.frame() %>%
select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>%
st_as_sf()
ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")),
aes(x = pdist_bucket,
y = lag_spd)) +
geom_point() +
geom_smooth() +
facet_grid(paste0(rt, "-", des) ~ .)
register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36))
bbox <- c(left = min(metro_data$lon),
bottom = min(metro_data$lat),
right = max(metro_data$lon),
top = max(metro_data$lat))
#get basemap
basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite")
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("469")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)
quantile(segments_sf %>% filter(pid %in% c("469") %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1))
)
quantile(segments_sf %>% filter(pid %in% c("469") %>% pull(lag_spd)), c(0,0.25, 0.5, 0.75, 1))
quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1))
quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1))
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("469")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)
metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno)))
# get counts of routes
route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno)))
View(route_counts)
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
metro_summary %>% filter(pid %in% c("1280")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("469")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
metro_summary %>% filter(pid %in% c("1280")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("1280")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)

1
.gitignore vendored
View File

@ -1,3 +1,4 @@
api_keys/* api_keys/*
figures/* figures/*
.Rproj.user

134
madison-metro.R Normal file
View File

@ -0,0 +1,134 @@
library(tidyverse)
library(influxdbclient)
library(glue)
library(ggmap)
library(sf)
# parameters needed to make connection to Database
token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88)
org <- "e2581d54779b077f"
bucket <- "madison-metro"
days <- 1
influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net",
token = token,
org = org)
#---
# Fields you want to query
fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno")
# Creating an empty list to store results for each field
results <- vector("list", length(fields))
# Loop through each field, get data, and coerce types if needed
for (i in seq_along(fields)) {
field <- fields[i]
query_string <- glue('from(bucket: "{bucket}") ',
'|> range(start: -{days}d) ',
'|> filter(fn: (r) => r["_measurement"] == "vehicle_data")',
'|> filter(fn: (r) => r["_field"] == "{field}")')
data <- influx_connection$query(query_string)
# Ensure the columns are coerced to consistent types
# (Optionally add coercion based on your expected types)
data <- bind_rows(data) %>%
mutate(value = as.character(`_value`),
field = `_field`) %>%
select(time, rt, des, value, field)
results[[i]] <- data
}
# Bind all results together
metro_raw <- bind_rows(results)
metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>%
distinct(pid, vid, lat, lon, spd, .keep_all = TRUE)
metro_data <- metro_raw %>%
mutate(time = with_tz(time, "America/Chicago"),
spd = as.double(spd),
pdist = as.double(pdist),
lon = as.double(lon),
lat = as.double(lat)) %>%
group_by(origtatripno) %>%
arrange(time) %>%
mutate(lag_pdist = lag(pdist),
lag_time = lag(time)) %>%
mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280)
metro_summary <- metro_data %>%
mutate(pdist_bucket = round(pdist / 500) * 500) %>%
group_by(pdist_bucket, rt, des, pid) %>%
summarise(lat = median(lat, na.rm = TRUE),
lon = median(lon, na.rm = TRUE),
spd = median(spd, na.rm = TRUE),
lag_spd = median(lag_spd, na.rm = TRUE),
trip_count = length(unique(origtatripno)))
metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE)
metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE)
segments_sf <- metro_summary_sf %>%
group_by(rt, pid, des) %>%
arrange(pid, pdist_bucket) %>% # Ensure points within each route are sorted if needed
mutate(
lead_geom = lead(geometry),
lead_spd = lead(spd)
) %>%
filter(!is.na(lead_geom)) %>%
# Create a segment for each pair of points
rowwise() %>%
mutate(
segment = st_cast(st_union(geometry, lead_geom), "LINESTRING")
) %>%
ungroup() %>%
as.data.frame() %>%
select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>%
st_as_sf()
# get counts of routes
route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno)))
# make charts
ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")),
aes(x = pdist_bucket,
y = lag_spd)) +
geom_point() +
geom_smooth() +
facet_grid(paste0(rt, "-", des) ~ .)
register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36))
bbox <- c(left = min(metro_data$lon),
bottom = min(metro_data$lat),
right = max(metro_data$lon),
top = max(metro_data$lat))
#get basemap
basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite")
quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1))
ggmap(basemap) +
labs(title = "Metro Route Speed",
subtitle = paste0("averaged between ",
metro_summary %>% filter(pid %in% c("1280")) %>% pull(trip_count),
" bus trips - ",
min(date(metro_data$time)),
" to ",
max(date(metro_data$time))),
x = NULL,
y = NULL) +
theme(axis.text=element_blank(),
axis.ticks=element_blank(),
plot.caption = element_text(color = "grey")) +
geom_sf(data = segments_sf %>% filter(pid %in% c("1280")),
inherit.aes = FALSE,
aes(color = lag_spd),
linewidth = 1) +
scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") +
facet_wrap(paste0(rt, "-", des) ~ .)

13
madison-metro.Rproj Normal file
View File

@ -0,0 +1,13 @@
Version: 1.0
RestoreWorkspace: Default
SaveWorkspace: Default
AlwaysSaveHistory: Default
EnableCodeIndexing: Yes
UseSpacesForTab: Yes
NumSpacesForTab: 2
Encoding: UTF-8
RnwWeave: Sweave
LaTeX: pdfLaTeX