start of data processing and analysis
This commit is contained in:
		
							parent
							
								
									863e1e52ce
								
							
						
					
					
						commit
						eab18f7e05
					
				
					 4 changed files with 660 additions and 0 deletions
				
			
		
							
								
								
									
										512
									
								
								.Rhistory
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										512
									
								
								.Rhistory
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,512 @@ | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | length(unique(metro_data %>% filter(pid %in% c("422")) %>% pull(origtatripno))), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("422")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
|  | View(metro_summary) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | mutate(pdist_bucket = round(pdist / 500) * 500) %>% | ||||||
|  | group_by(pdist_bucket, rt, des, pid) %>% | ||||||
|  | summarise(lat = median(lat), | ||||||
|  | lon = median(lon), | ||||||
|  | spd = median(spd), | ||||||
|  | lag_spd = median(lag_spd), | ||||||
|  | trip_count = n()) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | mutate(pdist_bucket = round(pdist / 500) * 500) %>% | ||||||
|  | group_by(pdist_bucket, rt, des, pid, origtatripno) %>% | ||||||
|  | summarise(lat = median(lat), | ||||||
|  | lon = median(lon), | ||||||
|  | spd = median(spd), | ||||||
|  | lag_spd = median(lag_spd), | ||||||
|  | trip_count = n()) | ||||||
|  | trip_count = length(unique(origtatripno)) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | mutate(pdist_bucket = round(pdist / 500) * 500) %>% | ||||||
|  | group_by(pdist_bucket, rt, des, pid) %>% | ||||||
|  | summarise(lat = median(lat), | ||||||
|  | lon = median(lon), | ||||||
|  | spd = median(spd), | ||||||
|  | lag_spd = median(lag_spd), | ||||||
|  | trip_count = length(unique(origtatripno))) | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count))), | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("422")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
|  | segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | segments_sf <- metro_summary_sf %>% | ||||||
|  | group_by(rt, pid, des) %>% | ||||||
|  | arrange(pid, pdist_bucket) %>%  # Ensure points within each route are sorted if needed | ||||||
|  | mutate( | ||||||
|  | lead_geom = lead(geometry), | ||||||
|  | lead_spd = lead(spd) | ||||||
|  | ) %>% | ||||||
|  | filter(!is.na(lead_geom)) %>% | ||||||
|  | # Create a segment for each pair of points | ||||||
|  | rowwise() %>% | ||||||
|  | mutate( | ||||||
|  | segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") | ||||||
|  | ) %>% | ||||||
|  | ungroup() %>% | ||||||
|  | as.data.frame() %>% | ||||||
|  | select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% | ||||||
|  | st_as_sf() | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | segments_sf %>% filter(pid %in% c("422")) %>% pull(trip_count), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("422")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | metro_summary %>% filter(pid %in% c("422")) %>% pull(trip_count), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("422")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
|  | metro_summary %>% filter(pid %in% c("422")) | ||||||
|  | max(metro_summary %>% filter(pid %in% c("422")) %>% pull(trip_count)) | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("469")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
|  | library(tidyverse) | ||||||
|  | library(influxdbclient) | ||||||
|  | library(glue) | ||||||
|  | library(ggmap) | ||||||
|  | library(sf) | ||||||
|  | # parameters needed to make connection to Database | ||||||
|  | token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) | ||||||
|  | org <- "e2581d54779b077f" | ||||||
|  | bucket <- "madison-metro" | ||||||
|  | days <- 1 | ||||||
|  | influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", | ||||||
|  | token = token, | ||||||
|  | org = org) | ||||||
|  | #--- | ||||||
|  | # Fields you want to query | ||||||
|  | fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno") | ||||||
|  | # Creating an empty list to store results for each field | ||||||
|  | results <- vector("list", length(fields)) | ||||||
|  | # Loop through each field, get data, and coerce types if needed | ||||||
|  | for (i in seq_along(fields)) { | ||||||
|  | field <- fields[i] | ||||||
|  | query_string <- glue('from(bucket: "{bucket}") ', | ||||||
|  | '|> range(start: -{days}d) ', | ||||||
|  | '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', | ||||||
|  | '|> filter(fn: (r) => r["_field"] == "{field}")') | ||||||
|  | data <- influx_connection$query(query_string) | ||||||
|  | # Ensure the columns are coerced to consistent types | ||||||
|  | # (Optionally add coercion based on your expected types) | ||||||
|  | data <- bind_rows(data) %>% | ||||||
|  | mutate(value = as.character(`_value`), | ||||||
|  | field = `_field`) %>% | ||||||
|  | select(time, rt, des, value, field) | ||||||
|  | results[[i]] <- data | ||||||
|  | } | ||||||
|  | # Bind all results together | ||||||
|  | metro_raw <- bind_rows(results) | ||||||
|  | metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% | ||||||
|  | distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) | ||||||
|  | metro_data <- metro_raw %>% | ||||||
|  | mutate(time = with_tz(time, "America/Chicago"), | ||||||
|  | spd = as.double(spd), | ||||||
|  | pdist = as.double(pdist), | ||||||
|  | lon = as.double(lon), | ||||||
|  | lat = as.double(lat)) %>% | ||||||
|  | group_by(origtatripno) %>% | ||||||
|  | arrange(time) %>% | ||||||
|  | mutate(lag_pdist = lag(pdist), | ||||||
|  | lag_time = lag(time)) %>% | ||||||
|  | mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | mutate(pdist_bucket = round(pdist / 500) * 500) %>% | ||||||
|  | group_by(pdist_bucket, rt, des, pid) %>% | ||||||
|  | summarise(lat = median(lat), | ||||||
|  | lon = median(lon), | ||||||
|  | spd = median(spd), | ||||||
|  | lag_spd = median(lag_spd), | ||||||
|  | trip_count = length(unique(origtatripno))) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | segments_sf <- metro_summary_sf %>% | ||||||
|  | group_by(rt, pid, des) %>% | ||||||
|  | arrange(pid, pdist_bucket) %>%  # Ensure points within each route are sorted if needed | ||||||
|  | mutate( | ||||||
|  | lead_geom = lead(geometry), | ||||||
|  | lead_spd = lead(spd) | ||||||
|  | ) %>% | ||||||
|  | filter(!is.na(lead_geom)) %>% | ||||||
|  | # Create a segment for each pair of points | ||||||
|  | rowwise() %>% | ||||||
|  | mutate( | ||||||
|  | segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") | ||||||
|  | ) %>% | ||||||
|  | ungroup() %>% | ||||||
|  | as.data.frame() %>% | ||||||
|  | select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% | ||||||
|  | st_as_sf() | ||||||
|  | ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), | ||||||
|  | aes(x = pdist_bucket, | ||||||
|  | y = lag_spd)) + | ||||||
|  | geom_point() + | ||||||
|  | geom_smooth() + | ||||||
|  | facet_grid(paste0(rt, "-", des) ~ .) | ||||||
|  | register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) | ||||||
|  | bbox <- c(left = min(metro_data$lon), | ||||||
|  | bottom = min(metro_data$lat), | ||||||
|  | right = max(metro_data$lon), | ||||||
|  | top = max(metro_data$lat)) | ||||||
|  | #get basemap | ||||||
|  | basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("469")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
|  | library(tidyverse) | ||||||
|  | library(influxdbclient) | ||||||
|  | library(glue) | ||||||
|  | library(ggmap) | ||||||
|  | library(sf) | ||||||
|  | # parameters needed to make connection to Database | ||||||
|  | token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) | ||||||
|  | org <- "e2581d54779b077f" | ||||||
|  | bucket <- "madison-metro" | ||||||
|  | days <- 1 | ||||||
|  | influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", | ||||||
|  | token = token, | ||||||
|  | org = org) | ||||||
|  | #--- | ||||||
|  | # Fields you want to query | ||||||
|  | fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno") | ||||||
|  | # Creating an empty list to store results for each field | ||||||
|  | results <- vector("list", length(fields)) | ||||||
|  | # Loop through each field, get data, and coerce types if needed | ||||||
|  | for (i in seq_along(fields)) { | ||||||
|  | field <- fields[i] | ||||||
|  | query_string <- glue('from(bucket: "{bucket}") ', | ||||||
|  | '|> range(start: -{days}d) ', | ||||||
|  | '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', | ||||||
|  | '|> filter(fn: (r) => r["_field"] == "{field}")') | ||||||
|  | data <- influx_connection$query(query_string) | ||||||
|  | # Ensure the columns are coerced to consistent types | ||||||
|  | # (Optionally add coercion based on your expected types) | ||||||
|  | data <- bind_rows(data) %>% | ||||||
|  | mutate(value = as.character(`_value`), | ||||||
|  | field = `_field`) %>% | ||||||
|  | select(time, rt, des, value, field) | ||||||
|  | results[[i]] <- data | ||||||
|  | } | ||||||
|  | # Bind all results together | ||||||
|  | metro_raw <- bind_rows(results) | ||||||
|  | metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% | ||||||
|  | distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) | ||||||
|  | metro_data <- metro_raw %>% | ||||||
|  | mutate(time = with_tz(time, "America/Chicago"), | ||||||
|  | spd = as.double(spd), | ||||||
|  | pdist = as.double(pdist), | ||||||
|  | lon = as.double(lon), | ||||||
|  | lat = as.double(lat)) %>% | ||||||
|  | group_by(origtatripno) %>% | ||||||
|  | arrange(time) %>% | ||||||
|  | mutate(lag_pdist = lag(pdist), | ||||||
|  | lag_time = lag(time)) %>% | ||||||
|  | mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | mutate(pdist_bucket = round(pdist / 500) * 500) %>% | ||||||
|  | group_by(pdist_bucket, rt, des, pid) %>% | ||||||
|  | summarise(lat = median(lat), | ||||||
|  | lon = median(lon), | ||||||
|  | spd = median(spd), | ||||||
|  | lag_spd = median(lag_spd), | ||||||
|  | trip_count = length(unique(origtatripno))) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | library(tidyverse) | ||||||
|  | library(influxdbclient) | ||||||
|  | library(glue) | ||||||
|  | library(ggmap) | ||||||
|  | library(sf) | ||||||
|  | # parameters needed to make connection to Database | ||||||
|  | token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) | ||||||
|  | org <- "e2581d54779b077f" | ||||||
|  | bucket <- "madison-metro" | ||||||
|  | days <- 1 | ||||||
|  | influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", | ||||||
|  | token = token, | ||||||
|  | org = org) | ||||||
|  | #--- | ||||||
|  | # Fields you want to query | ||||||
|  | fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno") | ||||||
|  | # Creating an empty list to store results for each field | ||||||
|  | results <- vector("list", length(fields)) | ||||||
|  | # Loop through each field, get data, and coerce types if needed | ||||||
|  | for (i in seq_along(fields)) { | ||||||
|  | field <- fields[i] | ||||||
|  | query_string <- glue('from(bucket: "{bucket}") ', | ||||||
|  | '|> range(start: -{days}d) ', | ||||||
|  | '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', | ||||||
|  | '|> filter(fn: (r) => r["_field"] == "{field}")') | ||||||
|  | data <- influx_connection$query(query_string) | ||||||
|  | # Ensure the columns are coerced to consistent types | ||||||
|  | # (Optionally add coercion based on your expected types) | ||||||
|  | data <- bind_rows(data) %>% | ||||||
|  | mutate(value = as.character(`_value`), | ||||||
|  | field = `_field`) %>% | ||||||
|  | select(time, rt, des, value, field) | ||||||
|  | results[[i]] <- data | ||||||
|  | } | ||||||
|  | # Bind all results together | ||||||
|  | metro_raw <- bind_rows(results) | ||||||
|  | metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% | ||||||
|  | distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) | ||||||
|  | metro_data <- metro_raw %>% | ||||||
|  | mutate(time = with_tz(time, "America/Chicago"), | ||||||
|  | spd = as.double(spd), | ||||||
|  | pdist = as.double(pdist), | ||||||
|  | lon = as.double(lon), | ||||||
|  | lat = as.double(lat)) %>% | ||||||
|  | group_by(origtatripno) %>% | ||||||
|  | arrange(time) %>% | ||||||
|  | mutate(lag_pdist = lag(pdist), | ||||||
|  | lag_time = lag(time)) %>% | ||||||
|  | mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | mutate(pdist_bucket = round(pdist / 500) * 500) %>% | ||||||
|  | group_by(pdist_bucket, rt, des, pid) %>% | ||||||
|  | summarise(lat = median(lat), | ||||||
|  | lon = median(lon), | ||||||
|  | spd = median(spd), | ||||||
|  | lag_spd = median(lag_spd), | ||||||
|  | trip_count = length(unique(origtatripno))) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data, coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | mutate(pdist_bucket = round(pdist / 500) * 500) %>% | ||||||
|  | group_by(pdist_bucket, rt, des, pid) %>% | ||||||
|  | summarise(lat = median(lat, na.rm = TRUE), | ||||||
|  | lon = median(lon, na.rm = TRUE), | ||||||
|  | spd = median(spd, na.rm = TRUE), | ||||||
|  | lag_spd = median(lag_spd, na.rm = TRUE), | ||||||
|  | trip_count = length(unique(origtatripno))) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data %>% filter(is.double(lat)), coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | View(metro_data) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lat)), coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_summary_sf <- st_as_sf(metro_summary, coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | View(metro_summary) | ||||||
|  | nrow(metro_data %>% filter(is.na(lon))) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | mutate(pdist_bucket = round(pdist / 500) * 500) %>% | ||||||
|  | group_by(pdist_bucket, rt, des, pid) %>% | ||||||
|  | summarise(lat = median(lat, na.rm = TRUE), | ||||||
|  | lon = median(lon, na.rm = TRUE), | ||||||
|  | spd = median(spd, na.rm = TRUE), | ||||||
|  | lag_spd = median(lag_spd, na.rm = TRUE), | ||||||
|  | trip_count = length(unique(origtatripno))) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | segments_sf <- metro_summary_sf %>% | ||||||
|  | group_by(rt, pid, des) %>% | ||||||
|  | arrange(pid, pdist_bucket) %>%  # Ensure points within each route are sorted if needed | ||||||
|  | mutate( | ||||||
|  | lead_geom = lead(geometry), | ||||||
|  | lead_spd = lead(spd) | ||||||
|  | ) %>% | ||||||
|  | filter(!is.na(lead_geom)) %>% | ||||||
|  | # Create a segment for each pair of points | ||||||
|  | rowwise() %>% | ||||||
|  | mutate( | ||||||
|  | segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") | ||||||
|  | ) %>% | ||||||
|  | ungroup() %>% | ||||||
|  | as.data.frame() %>% | ||||||
|  | select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% | ||||||
|  | st_as_sf() | ||||||
|  | ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), | ||||||
|  | aes(x = pdist_bucket, | ||||||
|  | y = lag_spd)) + | ||||||
|  | geom_point() + | ||||||
|  | geom_smooth() + | ||||||
|  | facet_grid(paste0(rt, "-", des) ~ .) | ||||||
|  | register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) | ||||||
|  | bbox <- c(left = min(metro_data$lon), | ||||||
|  | bottom = min(metro_data$lat), | ||||||
|  | right = max(metro_data$lon), | ||||||
|  | top = max(metro_data$lat)) | ||||||
|  | #get basemap | ||||||
|  | basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("469")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
|  | quantile(segments_sf %>% filter(pid %in% c("469") %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1)) | ||||||
|  | ) | ||||||
|  | quantile(segments_sf %>% filter(pid %in% c("469") %>% pull(lag_spd)), c(0,0.25, 0.5, 0.75, 1)) | ||||||
|  | quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1)) | ||||||
|  | quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1)) | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | metro_summary %>% filter(pid %in% c("469")) %>% pull(trip_count), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("469")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
|  | metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | ||||||
|  | # get counts of routes | ||||||
|  | route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | ||||||
|  | View(route_counts) | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | metro_summary %>% filter(pid %in% c("1280")) %>% pull(trip_count), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("469")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = "Metro Route Speed", | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | metro_summary %>% filter(pid %in% c("1280")) %>% pull(trip_count), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = segments_sf %>% filter(pid %in% c("1280")), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = lag_spd), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  | facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
							
								
								
									
										1
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
										
									
									
										vendored
									
									
								
							|  | @ -1,3 +1,4 @@ | ||||||
| api_keys/* | api_keys/* | ||||||
| figures/* | figures/* | ||||||
| 
 | 
 | ||||||
|  | .Rproj.user | ||||||
|  |  | ||||||
							
								
								
									
										134
									
								
								madison-metro.R
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								madison-metro.R
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,134 @@ | ||||||
|  | library(tidyverse) | ||||||
|  | library(influxdbclient) | ||||||
|  | library(glue) | ||||||
|  | library(ggmap) | ||||||
|  | library(sf) | ||||||
|  | 
 | ||||||
|  | # parameters needed to make connection to Database | ||||||
|  | token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) | ||||||
|  | org <- "e2581d54779b077f" | ||||||
|  | bucket <- "madison-metro" | ||||||
|  | 
 | ||||||
|  | days <- 1 | ||||||
|  | 
 | ||||||
|  | influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", | ||||||
|  |                                 token = token, | ||||||
|  |                                 org = org) | ||||||
|  | #--- | ||||||
|  | # Fields you want to query | ||||||
|  | fields <- c("spd", "pdist", "pid", "lon", "lat", "vid", "dly", "origtatripno") | ||||||
|  | 
 | ||||||
|  | # Creating an empty list to store results for each field | ||||||
|  | results <- vector("list", length(fields)) | ||||||
|  | 
 | ||||||
|  | # Loop through each field, get data, and coerce types if needed | ||||||
|  | for (i in seq_along(fields)) { | ||||||
|  |   field <- fields[i] | ||||||
|  |    | ||||||
|  |   query_string <- glue('from(bucket: "{bucket}") ', | ||||||
|  |                        '|> range(start: -{days}d) ', | ||||||
|  |                        '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', | ||||||
|  |                        '|> filter(fn: (r) => r["_field"] == "{field}")') | ||||||
|  |    | ||||||
|  |   data <- influx_connection$query(query_string) | ||||||
|  |    | ||||||
|  |   # Ensure the columns are coerced to consistent types  | ||||||
|  |   # (Optionally add coercion based on your expected types) | ||||||
|  |   data <- bind_rows(data) %>% | ||||||
|  |     mutate(value = as.character(`_value`), | ||||||
|  |            field = `_field`) %>%  | ||||||
|  |     select(time, rt, des, value, field) | ||||||
|  |    | ||||||
|  |   results[[i]] <- data | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | # Bind all results together | ||||||
|  | metro_raw <- bind_rows(results) | ||||||
|  | 
 | ||||||
|  | metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% | ||||||
|  |   distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) | ||||||
|  | 
 | ||||||
|  | metro_data <- metro_raw %>% | ||||||
|  |   mutate(time = with_tz(time, "America/Chicago"), | ||||||
|  |          spd = as.double(spd), | ||||||
|  |          pdist = as.double(pdist), | ||||||
|  |          lon = as.double(lon), | ||||||
|  |          lat = as.double(lat)) %>% | ||||||
|  |   group_by(origtatripno) %>% | ||||||
|  |   arrange(time) %>% | ||||||
|  |   mutate(lag_pdist = lag(pdist), | ||||||
|  |          lag_time = lag(time)) %>% | ||||||
|  |   mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) | ||||||
|  | 
 | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  |   mutate(pdist_bucket = round(pdist / 500) * 500) %>% | ||||||
|  |   group_by(pdist_bucket, rt, des, pid) %>% | ||||||
|  |   summarise(lat = median(lat, na.rm = TRUE), | ||||||
|  |             lon = median(lon, na.rm = TRUE), | ||||||
|  |             spd = median(spd, na.rm = TRUE), | ||||||
|  |             lag_spd = median(lag_spd, na.rm = TRUE), | ||||||
|  |             trip_count = length(unique(origtatripno))) | ||||||
|  |    | ||||||
|  | metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | 
 | ||||||
|  | segments_sf <- metro_summary_sf %>% | ||||||
|  |   group_by(rt, pid, des) %>% | ||||||
|  |   arrange(pid, pdist_bucket) %>%  # Ensure points within each route are sorted if needed | ||||||
|  |   mutate( | ||||||
|  |     lead_geom = lead(geometry), | ||||||
|  |     lead_spd = lead(spd) | ||||||
|  |   ) %>% | ||||||
|  |   filter(!is.na(lead_geom)) %>% | ||||||
|  |   # Create a segment for each pair of points | ||||||
|  |   rowwise() %>% | ||||||
|  |   mutate( | ||||||
|  |     segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") | ||||||
|  |   ) %>% | ||||||
|  |   ungroup() %>% | ||||||
|  |   as.data.frame() %>% | ||||||
|  |   select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% | ||||||
|  |   st_as_sf() | ||||||
|  | 
 | ||||||
|  | # get counts of routes | ||||||
|  | route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | ||||||
|  | 
 | ||||||
|  | # make charts | ||||||
|  | ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), | ||||||
|  |        aes(x = pdist_bucket, | ||||||
|  |            y = lag_spd)) + | ||||||
|  |   geom_point() + | ||||||
|  |   geom_smooth() + | ||||||
|  |   facet_grid(paste0(rt, "-", des) ~ .) | ||||||
|  | 
 | ||||||
|  | register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) | ||||||
|  | 
 | ||||||
|  | bbox <- c(left = min(metro_data$lon), | ||||||
|  |           bottom = min(metro_data$lat), | ||||||
|  |           right = max(metro_data$lon), | ||||||
|  |           top = max(metro_data$lat)) | ||||||
|  | 
 | ||||||
|  | #get basemap | ||||||
|  | basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") | ||||||
|  | 
 | ||||||
|  | quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(lag_spd), c(0,0.25, 0.5, 0.75, 1)) | ||||||
|  | 
 | ||||||
|  | ggmap(basemap) + | ||||||
|  |   labs(title = "Metro Route Speed", | ||||||
|  |        subtitle = paste0("averaged between ", | ||||||
|  |                          metro_summary %>% filter(pid %in% c("1280")) %>% pull(trip_count), | ||||||
|  |                          " bus trips - ", | ||||||
|  |                          min(date(metro_data$time)), | ||||||
|  |                          " to ", | ||||||
|  |                          max(date(metro_data$time))), | ||||||
|  |        x = NULL, | ||||||
|  |        y = NULL) + | ||||||
|  |   theme(axis.text=element_blank(), | ||||||
|  |         axis.ticks=element_blank(), | ||||||
|  |         plot.caption = element_text(color = "grey")) + | ||||||
|  |   geom_sf(data = segments_sf %>% filter(pid %in% c("1280")), | ||||||
|  |           inherit.aes = FALSE, | ||||||
|  |           aes(color = lag_spd), | ||||||
|  |           linewidth = 1) + | ||||||
|  |   scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed\n(calculated with consecutive points)") + | ||||||
|  |   facet_wrap(paste0(rt, "-", des) ~ .) | ||||||
							
								
								
									
										13
									
								
								madison-metro.Rproj
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								madison-metro.Rproj
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,13 @@ | ||||||
|  | Version: 1.0 | ||||||
|  | 
 | ||||||
|  | RestoreWorkspace: Default | ||||||
|  | SaveWorkspace: Default | ||||||
|  | AlwaysSaveHistory: Default | ||||||
|  | 
 | ||||||
|  | EnableCodeIndexing: Yes | ||||||
|  | UseSpacesForTab: Yes | ||||||
|  | NumSpacesForTab: 2 | ||||||
|  | Encoding: UTF-8 | ||||||
|  | 
 | ||||||
|  | RnwWeave: Sweave | ||||||
|  | LaTeX: pdfLaTeX | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue