changed buckets from pdist to lat-lon
This commit is contained in:
		
							parent
							
								
									e59afa8c59
								
							
						
					
					
						commit
						f6fbdde273
					
				
					 2 changed files with 397 additions and 383 deletions
				
			
		
							
								
								
									
										702
									
								
								.Rhistory
									
										
									
									
									
								
							
							
						
						
									
										702
									
								
								.Rhistory
									
										
									
									
									
								
							|  | @ -1,338 +1,35 @@ | ||||||
| mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) | ~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) | ||||||
| routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") | ) %>% | ||||||
| #--- | st_as_sf(sf_column_name = "geometry") | ||||||
| # Fields you want to query | bucket_feet <- 500 | ||||||
| fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno", "tmstmp") |  | ||||||
| # Creating an empty list to store results for each field |  | ||||||
| results <- vector("list", length(fields)) |  | ||||||
| # Loop through each field, get data, and coerce types if needed |  | ||||||
| for (i in seq_along(fields)) { |  | ||||||
| field <- fields[i] |  | ||||||
| query_string <- glue('from(bucket: "{bucket}") ', |  | ||||||
| '|> range(start: -{days}d) ', |  | ||||||
| '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', |  | ||||||
| '|> filter(fn: (r) => r["_field"] == "{field}")') |  | ||||||
| data <- influx_connection$query(query_string) |  | ||||||
| # Ensure the columns are coerced to consistent types |  | ||||||
| # (Optionally add coercion based on your expected types) |  | ||||||
| data <- bind_rows(data) %>% |  | ||||||
| mutate(value = as.character(`_value`), |  | ||||||
| field = `_field`) %>% |  | ||||||
| select(time, rt, pid, vid, value, field) |  | ||||||
| results[[i]] <- data |  | ||||||
| } |  | ||||||
| query_string <- glue('from(bucket: "{bucket}") ', |  | ||||||
| '|> range(start: -{days}d) ', |  | ||||||
| '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', |  | ||||||
| '|> filter(fn: (r) => r["_field"] == "{field}")') |  | ||||||
| data <- influx_connection$query(query_string) |  | ||||||
| field <- spd |  | ||||||
| field <- "spd" |  | ||||||
| query_string <- glue('from(bucket: "{bucket}") ', |  | ||||||
| '|> range(start: -{days}d) ', |  | ||||||
| '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', |  | ||||||
| '|> filter(fn: (r) => r["_field"] == "{field}")') |  | ||||||
| data <- influx_connection$query(query_string) |  | ||||||
| View(data) |  | ||||||
| #--- |  | ||||||
| # Fields you want to query |  | ||||||
| fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno") |  | ||||||
| # Creating an empty list to store results for each field |  | ||||||
| results <- vector("list", length(fields)) |  | ||||||
| # Loop through each field, get data, and coerce types if needed |  | ||||||
| for (i in seq_along(fields)) { |  | ||||||
| field <- fields[i] |  | ||||||
| query_string <- glue('from(bucket: "{bucket}") ', |  | ||||||
| '|> range(start: -{days}d) ', |  | ||||||
| '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', |  | ||||||
| '|> filter(fn: (r) => r["_field"] == "{field}")') |  | ||||||
| data <- influx_connection$query(query_string) |  | ||||||
| # Ensure the columns are coerced to consistent types |  | ||||||
| # (Optionally add coercion based on your expected types) |  | ||||||
| data <- bind_rows(data) %>% |  | ||||||
| mutate(value = as.character(`_value`), |  | ||||||
| field = `_field`) %>% |  | ||||||
| select(time, rt, pid, vid, value, field) |  | ||||||
| results[[i]] <- data |  | ||||||
| } |  | ||||||
| # Bind all results together |  | ||||||
| metro_raw <- bind_rows(results) |  | ||||||
| metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% |  | ||||||
| distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) |  | ||||||
| metro_data <- metro_raw %>% |  | ||||||
| mutate(time = with_tz(time, "America/Chicago"), |  | ||||||
| spd = as.double(spd), |  | ||||||
| pdist = as.double(pdist), |  | ||||||
| lon = as.double(lon), |  | ||||||
| lat = as.double(lat)) %>% |  | ||||||
| group_by(origtatripno) %>% |  | ||||||
| arrange(time) %>% |  | ||||||
| mutate(lag_pdist = lag(pdist), |  | ||||||
| lag_time = lag(time)) %>% |  | ||||||
| mutate(lag_spd = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) |  | ||||||
| 1/364481.35 |  | ||||||
| bucket_feet <- 200 |  | ||||||
| bucket_lat <- bucket_feet/364481.35 |  | ||||||
| bucket_lon <- bucket_feet/267203.05 |  | ||||||
| bucket_feet <- 200 |  | ||||||
| lat_round <- bucket_feet/364481.35 | lat_round <- bucket_feet/364481.35 | ||||||
| lon_round <- bucket_feet/267203.05 | lon_round <- bucket_feet/267203.05 | ||||||
| metro_summary <- metro_data %>% | metro_summary <- metro_data %>% | ||||||
| left_join(routes_categorized, by = "pid") %>% | left_join(routes_categorized, by = "pid") %>% | ||||||
| mutate(lat_bucket = round(lat / lat_round) * lat_round, | mutate(lat_bucket = round(lat / lat_round) * lat_round, | ||||||
| lon_bucket = round(lon / lon_round) * lon_round) %>% | lon_bucket = round(lon / lon_round) * lon_round) %>% | ||||||
| group_by(pdist_bucket, rt, des, pid) %>% | group_by(rt, des, pid, lat_bucket, lon_bucket) %>% | ||||||
| summarise(lat = median(lat, na.rm = TRUE), | summarise(spd = median(spd, na.rm = TRUE), | ||||||
| lon = median(lon, na.rm = TRUE), |  | ||||||
| spd = median(spd, na.rm = TRUE), |  | ||||||
| lag_spd = median(lag_spd, na.rm = TRUE), |  | ||||||
| trip_count = length(unique(origtatripno))) |  | ||||||
| metro_summary <- metro_data %>% |  | ||||||
| left_join(routes_categorized, by = "pid") %>% |  | ||||||
| mutate(lat_bucket = round(lat / lat_round) * lat_round, |  | ||||||
| lon_bucket = round(lon / lon_round) * lon_round) %>% |  | ||||||
| group_by(lat_bucket, lon_bucket, rt, des, pid) %>% |  | ||||||
| summarise(lat = median(lat, na.rm = TRUE), |  | ||||||
| lon = median(lon, na.rm = TRUE), |  | ||||||
| spd = median(spd, na.rm = TRUE), |  | ||||||
| lag_spd = median(lag_spd, na.rm = TRUE), |  | ||||||
| trip_count = length(unique(origtatripno))) |  | ||||||
| View(metro_summary) |  | ||||||
| metro_summary <- metro_data %>% |  | ||||||
| left_join(routes_categorized, by = "pid") %>% |  | ||||||
| mutate(lat_bucket = round(lat / lat_round) * lat_round, |  | ||||||
| lon_bucket = round(lon / lon_round) * lon_round) %>% |  | ||||||
| group_by(lat_bucket, lon_bucket, rt, des, pid) %>% |  | ||||||
| summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), |  | ||||||
| lon_bucket = median(lon_bucket, na.rm = TRUE), |  | ||||||
| spd = median(spd, na.rm = TRUE), |  | ||||||
| lag_spd = median(lag_spd, na.rm = TRUE), |  | ||||||
| trip_count = length(unique(origtatripno))) |  | ||||||
| metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) |  | ||||||
| metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) |  | ||||||
| metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid, des) %>% |  | ||||||
| arrange(pid, pdist_bucket) %>%  # Ensure points within each route are sorted if needed |  | ||||||
| mutate( |  | ||||||
| lead_geom = lead(geometry), |  | ||||||
| lead_spd = lead(spd) |  | ||||||
| ) %>% |  | ||||||
| filter(!is.na(lead_geom)) %>% |  | ||||||
| # Create a segment for each pair of points |  | ||||||
| rowwise() %>% |  | ||||||
| mutate( |  | ||||||
| segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
| ) %>% |  | ||||||
| ungroup() %>% |  | ||||||
| as.data.frame() %>% |  | ||||||
| select(rt, pid, des, pdist_bucket, spd, segment, lag_spd) %>% |  | ||||||
| st_as_sf() |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid, des) %>% |  | ||||||
| arrange(pid, pdist_bucket) %>%  # Ensure points within each route are sorted if needed |  | ||||||
| mutate( |  | ||||||
| lead_geom = lead(geometry), |  | ||||||
| lead_spd = lead(spd) |  | ||||||
| ) %>% |  | ||||||
| filter(!is.na(lead_geom)) %>% |  | ||||||
| # Create a segment for each pair of points |  | ||||||
| rowwise() %>% |  | ||||||
| mutate( |  | ||||||
| segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
| ) %>% |  | ||||||
| ungroup() %>% |  | ||||||
| as.data.frame() %>% |  | ||||||
| select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% |  | ||||||
| st_as_sf() |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid, des) %>% |  | ||||||
| arrange(vid, time) %>%  # Ensure points within each route are sorted if needed |  | ||||||
| mutate( |  | ||||||
| lead_geom = lead(geometry), |  | ||||||
| lead_spd = lead(spd) |  | ||||||
| ) %>% |  | ||||||
| filter(!is.na(lead_geom)) %>% |  | ||||||
| # Create a segment for each pair of points |  | ||||||
| rowwise() %>% |  | ||||||
| mutate( |  | ||||||
| segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
| ) %>% |  | ||||||
| ungroup() %>% |  | ||||||
| as.data.frame() %>% |  | ||||||
| select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% |  | ||||||
| st_as_sf() |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid, des, vid) %>% |  | ||||||
| arrange(vid, time) %>%  # Ensure points within each route are sorted if needed |  | ||||||
| mutate( |  | ||||||
| lead_geom = lead(geometry), |  | ||||||
| lead_spd = lead(spd) |  | ||||||
| ) %>% |  | ||||||
| filter(!is.na(lead_geom)) %>% |  | ||||||
| # Create a segment for each pair of points |  | ||||||
| rowwise() %>% |  | ||||||
| mutate( |  | ||||||
| segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
| ) %>% |  | ||||||
| ungroup() %>% |  | ||||||
| as.data.frame() %>% |  | ||||||
| select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% |  | ||||||
| st_as_sf() |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid, des, vid) %>% |  | ||||||
| arrange(pid, time) %>%  # Ensure points within each route are sorted if needed |  | ||||||
| mutate( |  | ||||||
| lead_geom = lead(geometry), |  | ||||||
| lead_spd = lead(spd) |  | ||||||
| ) %>% |  | ||||||
| filter(!is.na(lead_geom)) %>% |  | ||||||
| # Create a segment for each pair of points |  | ||||||
| rowwise() %>% |  | ||||||
| mutate( |  | ||||||
| segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
| ) %>% |  | ||||||
| ungroup() %>% |  | ||||||
| as.data.frame() %>% |  | ||||||
| select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% |  | ||||||
| st_as_sf() |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid, des) %>% |  | ||||||
| arrange(pid, time) %>%  # Ensure points within each route are sorted if needed |  | ||||||
| mutate( |  | ||||||
| lead_geom = lead(geometry), |  | ||||||
| lead_spd = lead(spd) |  | ||||||
| ) %>% |  | ||||||
| filter(!is.na(lead_geom)) %>% |  | ||||||
| # Create a segment for each pair of points |  | ||||||
| rowwise() %>% |  | ||||||
| mutate( |  | ||||||
| segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
| ) %>% |  | ||||||
| ungroup() %>% |  | ||||||
| as.data.frame() %>% |  | ||||||
| select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, lag_spd) %>% |  | ||||||
| st_as_sf() |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid, des) %>% |  | ||||||
| arrange(pid, time) %>%  # Ensure points within each route are sorted if needed |  | ||||||
| mutate( |  | ||||||
| lead_geom = lead(geometry), |  | ||||||
| lead_spd = lead(spd) |  | ||||||
| ) |  | ||||||
| metro_summary <- metro_data %>% |  | ||||||
| left_join(routes_categorized, by = "pid") %>% |  | ||||||
| mutate(lat_bucket = round(lat / lat_round) * lat_round, |  | ||||||
| lon_bucket = round(lon / lon_round) * lon_round) %>% |  | ||||||
| group_by(lat_bucket, lon_bucket, rt, des, pid) %>% |  | ||||||
| summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), |  | ||||||
| lon_bucket = median(lon_bucket, na.rm = TRUE), |  | ||||||
| spd = median(spd, na.rm = TRUE), |  | ||||||
| lag_spd = median(lag_spd, na.rm = TRUE), |  | ||||||
| trip_count = length(unique(origtatripno))) |  | ||||||
| metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) |  | ||||||
| metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid, des) %>% |  | ||||||
| arrange(pid, time) %>%  # Ensure points within each route are sorted if needed |  | ||||||
| mutate( |  | ||||||
| lead_geom = lead(geometry), |  | ||||||
| lead_spd = lead(spd) |  | ||||||
| ) |  | ||||||
| View(metro_data_sf) |  | ||||||
| # get counts of routes |  | ||||||
| route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) |  | ||||||
| metro_summary <- metro_data %>% |  | ||||||
| left_join(routes_categorized, by = "pid") %>% |  | ||||||
| mutate(lat_bucket = round(lat / lat_round) * lat_round, |  | ||||||
| lon_bucket = round(lon / lon_round) * lon_round) %>% |  | ||||||
| group_by(lat_bucket, lon_bucket, rt, des, pid) %>% |  | ||||||
| summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), |  | ||||||
| lon_bucket = median(lon_bucket, na.rm = TRUE), |  | ||||||
| spd = median(spd, na.rm = TRUE), |  | ||||||
| lag_spd = median(lag_spd, na.rm = TRUE), |  | ||||||
| pdist = median(pdist), |  | ||||||
| trip_count = length(unique(origtatripno))) |  | ||||||
| metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) |  | ||||||
| metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) |  | ||||||
| # make charts |  | ||||||
| ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), |  | ||||||
| aes(x = pdist, |  | ||||||
| y = lag_spd)) + |  | ||||||
| geom_point() + |  | ||||||
| geom_smooth() + |  | ||||||
| facet_grid(paste0(rt, "-", des) ~ .) |  | ||||||
| metro_data <- metro_raw %>% |  | ||||||
| mutate(time = with_tz(time, "America/Chicago"), |  | ||||||
| spd = as.double(spd), |  | ||||||
| pdist = as.double(pdist), |  | ||||||
| lon = as.double(lon), |  | ||||||
| lat = as.double(lat)) %>% |  | ||||||
| group_by(origtatripno) %>% |  | ||||||
| arrange(time) %>% |  | ||||||
| mutate(lag_pdist = lag(pdist), |  | ||||||
| lag_time = lag(time)) %>% |  | ||||||
| mutate(spd_calc = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) |  | ||||||
| routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") |  | ||||||
| bucket_feet <- 200 |  | ||||||
| lat_round <- bucket_feet/364481.35 |  | ||||||
| lon_round <- bucket_feet/267203.05 |  | ||||||
| metro_summary <- metro_data %>% |  | ||||||
| left_join(routes_categorized, by = "pid") %>% |  | ||||||
| mutate(lat_bucket = round(lat / lat_round) * lat_round, |  | ||||||
| lon_bucket = round(lon / lon_round) * lon_round) %>% |  | ||||||
| group_by(lat_bucket, lon_bucket, rt, des, pid) %>% |  | ||||||
| summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), |  | ||||||
| lon_bucket = median(lon_bucket, na.rm = TRUE), |  | ||||||
| spd = median(spd, na.rm = TRUE), |  | ||||||
| spd_calc = median(spd_calc, na.rm = TRUE), | spd_calc = median(spd_calc, na.rm = TRUE), | ||||||
| pdist = median(pdist), | pdist = median(pdist), | ||||||
| trip_count = length(unique(origtatripno))) | trip_count = length(unique(origtatripno))) | ||||||
| metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
| metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) | metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) | ||||||
| segments_sf <- metro_summary_sf %>% | metro_segments <- metro_summary %>% | ||||||
| group_by(rt, pid) %>% | group_by(rt, pid) %>% | ||||||
| arrange(pid, time) %>%  # Ensure points within each route are sorted if needed | arrange(pdist) %>% | ||||||
|  | mutate(lat_bucket_lag = lag(lat_bucket), | ||||||
|  | lon_bucket_lag = lag(lon_bucket)) %>% | ||||||
|  | filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% | ||||||
| mutate( | mutate( | ||||||
| lead_geom = lead(geometry), | geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), | ||||||
| lead_spd = lead(spd) | ~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) | ||||||
| ) %>% | ) %>% | ||||||
| filter(!is.na(lead_geom)) %>% | st_as_sf(sf_column_name = "geometry") | ||||||
| # Create a segment for each pair of points |  | ||||||
| rowwise() %>% |  | ||||||
| mutate( |  | ||||||
| segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
| ) %>% |  | ||||||
| ungroup() %>% |  | ||||||
| as.data.frame() %>% |  | ||||||
| select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, spd_calc) %>% |  | ||||||
| st_as_sf() |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid) |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid) %>% |  | ||||||
| arrange(pid, time) |  | ||||||
| segments_sf <- metro_summary_sf %>% |  | ||||||
| group_by(rt, pid) %>% |  | ||||||
| arrange(pid, pdist) %>%  # Ensure points within each route are sorted if needed |  | ||||||
| mutate( |  | ||||||
| lead_geom = lead(geometry), |  | ||||||
| lead_spd = lead(spd) |  | ||||||
| ) %>% |  | ||||||
| filter(!is.na(lead_geom)) %>% |  | ||||||
| # Create a segment for each pair of points |  | ||||||
| rowwise() %>% |  | ||||||
| mutate( |  | ||||||
| segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
| ) %>% |  | ||||||
| ungroup() %>% |  | ||||||
| as.data.frame() %>% |  | ||||||
| select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, spd_calc) %>% |  | ||||||
| st_as_sf() |  | ||||||
| # get counts of routes | # get counts of routes | ||||||
| route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | ||||||
| # make charts | # make charts | ||||||
| ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), | ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), | ||||||
| aes(x = pdist, | aes(x = pdist, | ||||||
| y = spd_calc)) + | y = spd_calc)) + | ||||||
| geom_point() + | geom_point() + | ||||||
|  | @ -346,9 +43,7 @@ top = max(metro_data$lat)) | ||||||
| #get basemap | #get basemap | ||||||
| basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") | basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") | ||||||
| # A West | # A West | ||||||
| quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1)) | quantile(metro_segments %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) | ||||||
| # A West |  | ||||||
| quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) |  | ||||||
| for (route in unique(routes_categorized$name)){ | for (route in unique(routes_categorized$name)){ | ||||||
| route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) | route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) | ||||||
| ggmap(basemap) + | ggmap(basemap) + | ||||||
|  | @ -364,7 +59,7 @@ y = NULL) + | ||||||
| theme(axis.text=element_blank(), | theme(axis.text=element_blank(), | ||||||
| axis.ticks=element_blank(), | axis.ticks=element_blank(), | ||||||
| plot.caption = element_text(color = "grey")) + | plot.caption = element_text(color = "grey")) + | ||||||
| geom_sf(data = segments_sf %>% filter(pid %in% route_focus), | geom_sf(data = metro_segments %>% filter(pid %in% route_focus), | ||||||
| inherit.aes = FALSE, | inherit.aes = FALSE, | ||||||
| aes(color = spd_calc), | aes(color = spd_calc), | ||||||
| linewidth = 1) + | linewidth = 1) + | ||||||
|  | @ -379,6 +74,189 @@ width = 11, | ||||||
| units = "in", | units = "in", | ||||||
| create.dir = TRUE) | create.dir = TRUE) | ||||||
| } | } | ||||||
|  | View(metro_data) | ||||||
|  | View(metro_summary) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | left_join(routes_categorized, by = "pid") %>% | ||||||
|  | mutate(lat_bucket = round(lat / lat_round) * lat_round, | ||||||
|  | lon_bucket = round(lon / lon_round) * lon_round) | ||||||
|  | View(metro_summary) | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | left_join(routes_categorized, by = "pid") %>% | ||||||
|  | mutate(lat_bucket = round(lat / lat_round) * lat_round, | ||||||
|  | lon_bucket = round(lon / lon_round) * lon_round) %>% | ||||||
|  | group_by(rt, name, pid, lat_bucket, lon_bucket) %>% | ||||||
|  | summarise(spd = median(spd, na.rm = TRUE), | ||||||
|  | spd_calc = median(spd_calc, na.rm = TRUE), | ||||||
|  | pdist = median(pdist), | ||||||
|  | trip_count = length(unique(origtatripno))) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) | ||||||
|  | metro_segments <- metro_summary %>% | ||||||
|  | group_by(rt, pid) %>% | ||||||
|  | arrange(pdist) %>% | ||||||
|  | mutate(lat_bucket_lag = lag(lat_bucket), | ||||||
|  | lon_bucket_lag = lag(lon_bucket)) %>% | ||||||
|  | filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% | ||||||
|  | mutate( | ||||||
|  | geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), | ||||||
|  | ~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) | ||||||
|  | ) %>% | ||||||
|  | st_as_sf(sf_column_name = "geometry") %>% | ||||||
|  | group_by(rt, name, lat_bucket, lon_bucket) %>% | ||||||
|  | summarise(weighted.mean(spd_calc, trip_count)) | ||||||
|  | View(metro_segments) | ||||||
|  | metro_segments <- metro_summary %>% | ||||||
|  | group_by(rt, pid) %>% | ||||||
|  | arrange(pdist) %>% | ||||||
|  | mutate(lat_bucket_lag = lag(lat_bucket), | ||||||
|  | lon_bucket_lag = lag(lon_bucket)) %>% | ||||||
|  | filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% | ||||||
|  | mutate( | ||||||
|  | geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), | ||||||
|  | ~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) | ||||||
|  | ) %>% | ||||||
|  | st_as_sf(sf_column_name = "geometry") %>% | ||||||
|  | group_by(rt, name, lat_bucket, lon_bucket) %>% | ||||||
|  | summarise(spd_calc = weighted.mean(spd_calc, trip_count)) | ||||||
|  | View(metro_segments) | ||||||
|  | for (route in unique(routes_categorized$name)){ | ||||||
|  | route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = paste0("Metro Route Speed - ", route), | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = metro_segments %>% filter(pid %in% route_focus), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = spd_calc), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") | ||||||
|  | ggsave(file = paste0("figures/", | ||||||
|  | route, | ||||||
|  | ".pdf"), | ||||||
|  | title = paste0("Metro Route Speed - ", route), | ||||||
|  | device = pdf, | ||||||
|  | height = 8.5, | ||||||
|  | width = 11, | ||||||
|  | units = "in", | ||||||
|  | create.dir = TRUE) | ||||||
|  | } | ||||||
|  | for (route in unique(routes_categorized$name)){ | ||||||
|  | route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = paste0("Metro Route Speed - ", route), | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = metro_segments %>% filter(name %in route), | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = paste0("Metro Route Speed - ", route), | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = metro_segments %>% filter(name %in% route), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = spd_calc), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") | ||||||
|  | for (route in unique(routes_categorized$name)){ | ||||||
|  | route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) | ||||||
|  | ggmap(basemap) + | ||||||
|  | labs(title = paste0("Metro Route Speed - ", route), | ||||||
|  | subtitle = paste0("averaged between ", | ||||||
|  | sum(route_counts %>% filter(pid %in% route_focus) %>% pull(route_count)), | ||||||
|  | " bus trips - ", | ||||||
|  | min(date(metro_data$time)), | ||||||
|  | " to ", | ||||||
|  | max(date(metro_data$time))), | ||||||
|  | x = NULL, | ||||||
|  | y = NULL) + | ||||||
|  | theme(axis.text=element_blank(), | ||||||
|  | axis.ticks=element_blank(), | ||||||
|  | plot.caption = element_text(color = "grey")) + | ||||||
|  | geom_sf(data = metro_segments %>% filter(name %in% route), | ||||||
|  | inherit.aes = FALSE, | ||||||
|  | aes(color = spd_calc), | ||||||
|  | linewidth = 1) + | ||||||
|  | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") | ||||||
|  | ggsave(file = paste0("figures/", | ||||||
|  | route, | ||||||
|  | ".pdf"), | ||||||
|  | title = paste0("Metro Route Speed - ", route), | ||||||
|  | device = pdf, | ||||||
|  | height = 8.5, | ||||||
|  | width = 11, | ||||||
|  | units = "in", | ||||||
|  | create.dir = TRUE) | ||||||
|  | } | ||||||
|  | # A West | ||||||
|  | quantile(metro_segments %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) | ||||||
|  | quantile(metro_segments %>% filter(name %in% c("A_West")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) | ||||||
|  | metro_data <- metro_raw %>% | ||||||
|  | mutate(time = with_tz(time, "America/Chicago"), | ||||||
|  | spd = as.double(spd), | ||||||
|  | pdist = as.double(pdist), | ||||||
|  | lon = as.double(lon), | ||||||
|  | lat = as.double(lat)) %>% | ||||||
|  | group_by(pid, vid) %>% | ||||||
|  | arrange(time) %>% | ||||||
|  | mutate(pdist_lag = lag(pdist), | ||||||
|  | time_lag = lag(time)) %>% | ||||||
|  | mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, | ||||||
|  | pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% | ||||||
|  | left_join(routes_categorized, by = "pid") | ||||||
|  | ggplot(data = metro_data %>% filter(name %in% route)) + | ||||||
|  | geom_violin(aes(x = time, | ||||||
|  | y = spd_calc)) | ||||||
|  | metro_data <- metro_raw %>% | ||||||
|  | mutate(time = with_tz(time, "America/Chicago"), | ||||||
|  | spd = as.double(spd), | ||||||
|  | pdist = as.double(pdist), | ||||||
|  | lon = as.double(lon), | ||||||
|  | lat = as.double(lat)) %>% | ||||||
|  | mutate(date = date(time)) %>% | ||||||
|  | group_by(pid, vid) %>% | ||||||
|  | arrange(time) %>% | ||||||
|  | mutate(pdist_lag = lag(pdist), | ||||||
|  | time_lag = lag(time)) %>% | ||||||
|  | mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, | ||||||
|  | pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% | ||||||
|  | left_join(routes_categorized, by = "pid") | ||||||
|  | ggplot(data = metro_data %>% filter(name %in% route)) + | ||||||
|  | geom_violin(aes(x = time, | ||||||
|  | y = spd_calc, | ||||||
|  | group = date)) | ||||||
|  | ggplot(data = metro_data %>% filter(name %in% route)) + | ||||||
|  | geom_violin(aes(x = date, | ||||||
|  | y = spd_calc)) | ||||||
|  | ggplot(data = metro_data %>% filter(name %in% route)) + | ||||||
|  | geom_boxplot(aes(x = date, | ||||||
|  | y = spd_calc)) | ||||||
| library(tidyverse) | library(tidyverse) | ||||||
| library(influxdbclient) | library(influxdbclient) | ||||||
| library(glue) | library(glue) | ||||||
|  | @ -417,60 +295,151 @@ results[[i]] <- data | ||||||
| metro_raw <- bind_rows(results) | metro_raw <- bind_rows(results) | ||||||
| metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% | metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% | ||||||
| distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) | distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) | ||||||
|  | routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") | ||||||
| metro_data <- metro_raw %>% | metro_data <- metro_raw %>% | ||||||
| mutate(time = with_tz(time, "America/Chicago"), | mutate(time = with_tz(time, "America/Chicago"), | ||||||
| spd = as.double(spd), | spd = as.double(spd), | ||||||
| pdist = as.double(pdist), | pdist = as.double(pdist), | ||||||
| lon = as.double(lon), | lon = as.double(lon), | ||||||
| lat = as.double(lat)) %>% | lat = as.double(lat)) %>% | ||||||
|  | mutate(date = date(time)) %>% | ||||||
| group_by(pid, vid) %>% | group_by(pid, vid) %>% | ||||||
| arrange(time) %>% | arrange(time) %>% | ||||||
| mutate(lag_pdist = lag(pdist), | mutate(pdist_lag = lag(pdist), | ||||||
| lag_time = lag(time)) %>% | time_lag = lag(time)) %>% | ||||||
| mutate(spd_calc = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) | mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, | ||||||
| routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") | pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% | ||||||
| bucket_feet <- 200 | left_join(routes_categorized, by = "pid") | ||||||
|  | bucket_feet <- 500 | ||||||
| lat_round <- bucket_feet/364481.35 | lat_round <- bucket_feet/364481.35 | ||||||
| lon_round <- bucket_feet/267203.05 | lon_round <- bucket_feet/267203.05 | ||||||
| metro_summary <- metro_data %>% | metro_summary <- metro_data %>% | ||||||
| left_join(routes_categorized, by = "pid") %>% |  | ||||||
| mutate(lat_bucket = round(lat / lat_round) * lat_round, | mutate(lat_bucket = round(lat / lat_round) * lat_round, | ||||||
| lon_bucket = round(lon / lon_round) * lon_round) %>% | lon_bucket = round(lon / lon_round) * lon_round) %>% | ||||||
| group_by(lat_bucket, lon_bucket, rt, des, pid) %>% | group_by(rt, name, pid, lat_bucket, lon_bucket) %>% | ||||||
| summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), | summarise(spd = median(spd, na.rm = TRUE), | ||||||
| lon_bucket = median(lon_bucket, na.rm = TRUE), |  | ||||||
| spd = median(spd, na.rm = TRUE), |  | ||||||
| spd_calc = median(spd_calc, na.rm = TRUE), | spd_calc = median(spd_calc, na.rm = TRUE), | ||||||
| pdist = median(pdist), | pdist = median(pdist), | ||||||
| trip_count = length(unique(origtatripno))) | trip_count = length(unique(origtatripno))) | ||||||
| metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
| metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) | metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) | ||||||
| segments_sf <- metro_summary_sf %>% | metro_segments <- metro_summary %>% | ||||||
| group_by(rt, pid) %>% | group_by(rt, pid) %>% | ||||||
| arrange(pid, pdist) %>%  # Ensure points within each route are sorted if needed | arrange(pdist) %>% | ||||||
|  | mutate(lat_bucket_lag = lag(lat_bucket), | ||||||
|  | lon_bucket_lag = lag(lon_bucket)) %>% | ||||||
|  | filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% | ||||||
| mutate( | mutate( | ||||||
| lead_geom = lead(geometry), | geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), | ||||||
| lead_spd = lead(spd) | ~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) | ||||||
| ) %>% | ) %>% | ||||||
| filter(!is.na(lead_geom)) %>% | st_as_sf(sf_column_name = "geometry") %>% | ||||||
| # Create a segment for each pair of points | group_by(rt, name, lat_bucket, lon_bucket) %>% | ||||||
| rowwise() %>% | summarise(spd_calc = weighted.mean(spd_calc, trip_count)) | ||||||
| mutate( |  | ||||||
| segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
| ) %>% |  | ||||||
| ungroup() %>% |  | ||||||
| as.data.frame() %>% |  | ||||||
| select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, spd_calc) %>% |  | ||||||
| st_as_sf() |  | ||||||
| # get counts of routes | # get counts of routes | ||||||
| route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | ||||||
| # make charts | # make charts | ||||||
| ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), | ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), | ||||||
| aes(x = pdist, | aes(x = pdist, | ||||||
| y = spd_calc)) + | y = spd_calc)) + | ||||||
| geom_point() + | geom_point() + | ||||||
| geom_smooth() + | geom_smooth() + | ||||||
| facet_grid(paste0(rt, "-", des) ~ .) | facet_grid(paste0(rt, "-", des) ~ .) | ||||||
|  | # make charts | ||||||
|  | ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), | ||||||
|  | aes(x = pdist, | ||||||
|  | y = spd_calc)) + | ||||||
|  | geom_point() + | ||||||
|  | geom_smooth() + | ||||||
|  | facet_grid(name ~ .) | ||||||
|  | library(tidyverse) | ||||||
|  | library(influxdbclient) | ||||||
|  | library(glue) | ||||||
|  | library(ggmap) | ||||||
|  | library(sf) | ||||||
|  | # parameters needed to make connection to Database | ||||||
|  | token <- substr(read_file(file = 'api_keys/influxdb_madison-metro'), 1, 88) | ||||||
|  | org <- "e2581d54779b077f" | ||||||
|  | bucket <- "madison-metro" | ||||||
|  | days <- 1 | ||||||
|  | influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net", | ||||||
|  | token = token, | ||||||
|  | org = org) | ||||||
|  | #--- | ||||||
|  | # Fields you want to query | ||||||
|  | fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno") | ||||||
|  | # Creating an empty list to store results for each field | ||||||
|  | results <- vector("list", length(fields)) | ||||||
|  | # Loop through each field, get data, and coerce types if needed | ||||||
|  | for (i in seq_along(fields)) { | ||||||
|  | field <- fields[i] | ||||||
|  | query_string <- glue('from(bucket: "{bucket}") ', | ||||||
|  | '|> range(start: -{days}d) ', | ||||||
|  | '|> filter(fn: (r) => r["_measurement"] == "vehicle_data")', | ||||||
|  | '|> filter(fn: (r) => r["_field"] == "{field}")') | ||||||
|  | data <- influx_connection$query(query_string) | ||||||
|  | # Ensure the columns are coerced to consistent types | ||||||
|  | # (Optionally add coercion based on your expected types) | ||||||
|  | data <- bind_rows(data) %>% | ||||||
|  | mutate(value = as.character(`_value`), | ||||||
|  | field = `_field`) %>% | ||||||
|  | select(time, rt, pid, vid, value, field) | ||||||
|  | results[[i]] <- data | ||||||
|  | } | ||||||
|  | # Bind all results together | ||||||
|  | metro_raw <- bind_rows(results) | ||||||
|  | metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% | ||||||
|  | distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) | ||||||
|  | routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") | ||||||
|  | metro_data <- metro_raw %>% | ||||||
|  | mutate(time = with_tz(time, "America/Chicago"), | ||||||
|  | spd = as.double(spd), | ||||||
|  | pdist = as.double(pdist), | ||||||
|  | lon = as.double(lon), | ||||||
|  | lat = as.double(lat)) %>% | ||||||
|  | mutate(date = date(time)) %>% | ||||||
|  | group_by(pid, vid) %>% | ||||||
|  | arrange(time) %>% | ||||||
|  | mutate(pdist_lag = lag(pdist), | ||||||
|  | time_lag = lag(time)) %>% | ||||||
|  | mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, | ||||||
|  | pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% | ||||||
|  | left_join(routes_categorized, by = "pid") | ||||||
|  | bucket_feet <- 500 | ||||||
|  | lat_round <- bucket_feet/364481.35 | ||||||
|  | lon_round <- bucket_feet/267203.05 | ||||||
|  | metro_summary <- metro_data %>% | ||||||
|  | mutate(lat_bucket = round(lat / lat_round) * lat_round, | ||||||
|  | lon_bucket = round(lon / lon_round) * lon_round) %>% | ||||||
|  | group_by(rt, name, pid, lat_bucket, lon_bucket) %>% | ||||||
|  | summarise(spd = median(spd, na.rm = TRUE), | ||||||
|  | spd_calc = median(spd_calc, na.rm = TRUE), | ||||||
|  | pdist = median(pdist), | ||||||
|  | trip_count = length(unique(origtatripno))) | ||||||
|  | metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
|  | metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) | ||||||
|  | metro_segments <- metro_summary %>% | ||||||
|  | group_by(rt, pid) %>% | ||||||
|  | arrange(pdist) %>% | ||||||
|  | mutate(lat_bucket_lag = lag(lat_bucket), | ||||||
|  | lon_bucket_lag = lag(lon_bucket)) %>% | ||||||
|  | filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% | ||||||
|  | mutate( | ||||||
|  | geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag), | ||||||
|  | ~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) | ||||||
|  | ) %>% | ||||||
|  | st_as_sf(sf_column_name = "geometry") %>% | ||||||
|  | group_by(rt, name, lat_bucket, lon_bucket) %>% | ||||||
|  | summarise(spd_calc = weighted.mean(spd_calc, trip_count)) | ||||||
|  | # get counts of routes | ||||||
|  | route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | ||||||
|  | # make charts | ||||||
|  | ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), | ||||||
|  | aes(x = pdist, | ||||||
|  | y = spd_calc)) + | ||||||
|  | geom_point() + | ||||||
|  | geom_smooth() + | ||||||
|  | facet_grid(name ~ .) | ||||||
| register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) | register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) | ||||||
| bbox <- c(left = min(metro_data$lon), | bbox <- c(left = min(metro_data$lon), | ||||||
| bottom = min(metro_data$lat), | bottom = min(metro_data$lat), | ||||||
|  | @ -478,8 +447,7 @@ right = max(metro_data$lon), | ||||||
| top = max(metro_data$lat)) | top = max(metro_data$lat)) | ||||||
| #get basemap | #get basemap | ||||||
| basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") | basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") | ||||||
| # A West | quantile(metro_segments %>% filter(name %in% c("A_West")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) | ||||||
| quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) |  | ||||||
| for (route in unique(routes_categorized$name)){ | for (route in unique(routes_categorized$name)){ | ||||||
| route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) | route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) | ||||||
| ggmap(basemap) + | ggmap(basemap) + | ||||||
|  | @ -495,14 +463,26 @@ y = NULL) + | ||||||
| theme(axis.text=element_blank(), | theme(axis.text=element_blank(), | ||||||
| axis.ticks=element_blank(), | axis.ticks=element_blank(), | ||||||
| plot.caption = element_text(color = "grey")) + | plot.caption = element_text(color = "grey")) + | ||||||
| geom_sf(data = segments_sf %>% filter(pid %in% route_focus), | geom_sf(data = metro_segments %>% filter(name %in% route), | ||||||
| inherit.aes = FALSE, | inherit.aes = FALSE, | ||||||
| aes(color = spd_calc), | aes(color = spd_calc), | ||||||
| linewidth = 1) + | linewidth = 1) + | ||||||
| scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") | scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") | ||||||
| ggsave(file = paste0("figures/", | ggsave(file = paste0("figures/", | ||||||
| route, | route, | ||||||
| ".pdf"), | "_map.pdf"), | ||||||
|  | title = paste0("Metro Route Speed - ", route), | ||||||
|  | device = pdf, | ||||||
|  | height = 8.5, | ||||||
|  | width = 11, | ||||||
|  | units = "in", | ||||||
|  | create.dir = TRUE) | ||||||
|  | ggplot(data = metro_data %>% filter(name %in% route)) + | ||||||
|  | geom_boxplot(aes(x = date, | ||||||
|  | y = spd_calc)) | ||||||
|  | ggsave(file = paste0("figures/", | ||||||
|  | route, | ||||||
|  | "_date.pdf"), | ||||||
| title = paste0("Metro Route Speed - ", route), | title = paste0("Metro Route Speed - ", route), | ||||||
| device = pdf, | device = pdf, | ||||||
| height = 8.5, | height = 8.5, | ||||||
|  | @ -510,3 +490,23 @@ width = 11, | ||||||
| units = "in", | units = "in", | ||||||
| create.dir = TRUE) | create.dir = TRUE) | ||||||
| } | } | ||||||
|  | ggplot(data = metro_summary %>% filter(!is.blank(name)), | ||||||
|  | aes(x = pdist, | ||||||
|  | y = spd_calc)) + | ||||||
|  | geom_boxplot() | ||||||
|  | ggplot(data = metro_summary %>% filter(!is.na(name)), | ||||||
|  | aes(x = pdist, | ||||||
|  | y = spd_calc)) + | ||||||
|  | geom_boxplot() | ||||||
|  | ggplot(data = metro_summary %>% filter(!is.na(name)), | ||||||
|  | aes(x = name, | ||||||
|  | y = spd_calc)) + | ||||||
|  | geom_boxplot() | ||||||
|  | ggplot(data = metro_summary %>% filter(!is.na(name)), | ||||||
|  | aes(x = name, | ||||||
|  | y = spd_calc)) + | ||||||
|  | geom_violin() | ||||||
|  | ggplot(data = metro_summary %>% filter(!is.na(name)), | ||||||
|  | aes(x = name, | ||||||
|  | y = spd_calc)) + | ||||||
|  | geom_boxplot() | ||||||
|  |  | ||||||
|  | @ -15,10 +15,10 @@ influx_connection <- InfluxDBClient$new(url = "https://influxdb.dendroalsia.net" | ||||||
|                                 token = token, |                                 token = token, | ||||||
|                                 org = org) |                                 org = org) | ||||||
| #--- | #--- | ||||||
| # Fields you want to query | # Fields to query | ||||||
| fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno") | fields <- c("des", "spd", "pdist", "lon", "lat", "dly", "origtatripno") | ||||||
| 
 | 
 | ||||||
| # Creating an empty list to store results for each field | # An empty list to store results for each field | ||||||
| results <- vector("list", length(fields)) | results <- vector("list", length(fields)) | ||||||
| 
 | 
 | ||||||
| # Loop through each field, get data, and coerce types if needed | # Loop through each field, get data, and coerce types if needed | ||||||
|  | @ -48,33 +48,34 @@ metro_raw <- bind_rows(results) | ||||||
| metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% | metro_raw <- pivot_wider(metro_raw, values_from = value, names_from = field) %>% | ||||||
|   distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) |   distinct(pid, vid, lat, lon, spd, .keep_all = TRUE) | ||||||
| 
 | 
 | ||||||
|  | routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") | ||||||
|  | 
 | ||||||
| metro_data <- metro_raw %>% | metro_data <- metro_raw %>% | ||||||
|   mutate(time = with_tz(time, "America/Chicago"), |   mutate(time = with_tz(time, "America/Chicago"), | ||||||
|          spd = as.double(spd), |          spd = as.double(spd), | ||||||
|          pdist = as.double(pdist), |          pdist = as.double(pdist), | ||||||
|          lon = as.double(lon), |          lon = as.double(lon), | ||||||
|          lat = as.double(lat)) %>% |          lat = as.double(lat)) %>% | ||||||
|  |   mutate(date = date(time)) %>% | ||||||
|   group_by(pid, vid) %>% |   group_by(pid, vid) %>% | ||||||
|   arrange(time) %>% |   arrange(time) %>% | ||||||
|   mutate(lag_pdist = lag(pdist), |   mutate(pdist_lag = lag(pdist), | ||||||
|          lag_time = lag(time)) %>% |          time_lag = lag(time)) %>% | ||||||
|   mutate(spd_calc = (pdist - lag_pdist)/as.double(difftime(time, lag_time, units = "hours"))/5280) |   mutate(spd_calc = case_when(pdist_lag > pdist ~ NA, | ||||||
|  |                               pdist_lag <= pdist ~ (pdist - pdist_lag)/as.double(difftime(time, time_lag, units = "hours"))/5280)) %>% | ||||||
|  |   left_join(routes_categorized, by = "pid") | ||||||
| 
 | 
 | ||||||
| routes_categorized <- read_csv(file = "routes_categorized.csv", col_types = "cc") |  | ||||||
| 
 | 
 | ||||||
| bucket_feet <- 200 | bucket_feet <- 500 | ||||||
| 
 | 
 | ||||||
| lat_round <- bucket_feet/364481.35 | lat_round <- bucket_feet/364481.35 | ||||||
| lon_round <- bucket_feet/267203.05 | lon_round <- bucket_feet/267203.05 | ||||||
| 
 | 
 | ||||||
| metro_summary <- metro_data %>% | metro_summary <- metro_data %>% | ||||||
|   left_join(routes_categorized, by = "pid") %>% |  | ||||||
|   mutate(lat_bucket = round(lat / lat_round) * lat_round, |   mutate(lat_bucket = round(lat / lat_round) * lat_round, | ||||||
|          lon_bucket = round(lon / lon_round) * lon_round) %>% |          lon_bucket = round(lon / lon_round) * lon_round) %>% | ||||||
|   group_by(lat_bucket, lon_bucket, rt, des, pid) %>% |   group_by(rt, name, pid, lat_bucket, lon_bucket) %>% | ||||||
|   summarise(lat_bucket = median(lat_bucket, na.rm = TRUE), |   summarise(spd = median(spd, na.rm = TRUE), | ||||||
|             lon_bucket = median(lon_bucket, na.rm = TRUE), |  | ||||||
|             spd = median(spd, na.rm = TRUE), |  | ||||||
|             spd_calc = median(spd_calc, na.rm = TRUE), |             spd_calc = median(spd_calc, na.rm = TRUE), | ||||||
|             pdist = median(pdist), |             pdist = median(pdist), | ||||||
|             trip_count = length(unique(origtatripno))) |             trip_count = length(unique(origtatripno))) | ||||||
|  | @ -82,34 +83,35 @@ metro_summary <- metro_data %>% | ||||||
| metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | metro_data_sf <- st_as_sf(metro_data %>% filter(!is.na(lon)), coords = c("lon", "lat"), remove = FALSE) | ||||||
| metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) | metro_summary_sf <- st_as_sf(metro_summary %>% filter(!is.na(lon_bucket)), coords = c("lon_bucket", "lat_bucket"), remove = FALSE) | ||||||
| 
 | 
 | ||||||
| segments_sf <- metro_summary_sf %>% | metro_segments <- metro_summary %>% | ||||||
|   group_by(rt, pid) %>% |   group_by(rt, pid) %>% | ||||||
|   arrange(pid, pdist) %>%  # Ensure points within each route are sorted if needed |   arrange(pdist) %>% | ||||||
|  |   mutate(lat_bucket_lag = lag(lat_bucket), | ||||||
|  |          lon_bucket_lag = lag(lon_bucket)) %>% | ||||||
|  |   filter(!is.na(lat_bucket) & !is.na(lon_bucket) & !is.na(lat_bucket_lag) & !is.na(lon_bucket_lag)) %>% | ||||||
|   mutate( |   mutate( | ||||||
|     lead_geom = lead(geometry), |     geometry = pmap(list(lat_bucket, lon_bucket, lat_bucket_lag, lon_bucket_lag),  | ||||||
|     lead_spd = lead(spd) |                     ~st_linestring(matrix(c(..2, ..1, ..4, ..3), ncol = 2, byrow = TRUE))) | ||||||
|   ) %>% |   ) %>% | ||||||
|   filter(!is.na(lead_geom)) %>% |   st_as_sf(sf_column_name = "geometry") %>% | ||||||
|   # Create a segment for each pair of points |   group_by(rt, name, lat_bucket, lon_bucket) %>% | ||||||
|   rowwise() %>% |   summarise(spd_calc = weighted.mean(spd_calc, trip_count)) | ||||||
|   mutate( |  | ||||||
|     segment = st_cast(st_union(geometry, lead_geom), "LINESTRING") |  | ||||||
|   ) %>% |  | ||||||
|   ungroup() %>% |  | ||||||
|   as.data.frame() %>% |  | ||||||
|   select(rt, pid, des, lat_bucket, lon_bucket, spd, segment, spd_calc) %>% |  | ||||||
|   st_as_sf() |  | ||||||
| 
 | 
 | ||||||
| # get counts of routes | # get counts of routes | ||||||
| route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | route_counts <- metro_data %>% group_by(pid, rt, des) %>% summarise(route_count = length(unique(origtatripno))) | ||||||
| 
 | 
 | ||||||
| # make charts | # make charts | ||||||
| ggplot(data = metro_summary %>% filter(pid %in% c("421", "422")), | ggplot(data = metro_summary %>% filter(pid %in% (routes_categorized %>% filter(name %in% c("B_North", "B_South")) %>% pull (pid))), | ||||||
|        aes(x = pdist, |        aes(x = pdist, | ||||||
|            y = spd_calc)) + |            y = spd_calc)) + | ||||||
|   geom_point() + |   geom_point() + | ||||||
|   geom_smooth() + |   geom_smooth() + | ||||||
|   facet_grid(paste0(rt, "-", des) ~ .) |   facet_grid(name ~ .) | ||||||
|  | 
 | ||||||
|  | ggplot(data = metro_summary %>% filter(!is.na(name)), | ||||||
|  |        aes(x = name, | ||||||
|  |            y = spd_calc)) + | ||||||
|  |   geom_boxplot() | ||||||
| 
 | 
 | ||||||
| register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) | register_stadiamaps(key = substr(read_file(file = "api_keys/stadia_api_key"), 1, 36)) | ||||||
| 
 | 
 | ||||||
|  | @ -121,8 +123,7 @@ bbox <- c(left = min(metro_data$lon), | ||||||
| #get basemap | #get basemap | ||||||
| basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") | basemap <- get_stadiamap(bbox = bbox, zoom = 13, maptype = "stamen_toner_lite") | ||||||
| 
 | 
 | ||||||
| # A West | quantile(metro_segments %>% filter(name %in% c("A_West")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) | ||||||
| quantile(segments_sf %>% filter(pid %in% c("469")) %>% pull(spd_calc), c(0,0.25, 0.5, 0.75, 1), na.rm = TRUE) |  | ||||||
| 
 | 
 | ||||||
| for (route in unique(routes_categorized$name)){ | for (route in unique(routes_categorized$name)){ | ||||||
|   route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) |   route_focus <- routes_categorized %>% filter(name == route) %>% pull(pid) | ||||||
|  | @ -139,14 +140,27 @@ for (route in unique(routes_categorized$name)){ | ||||||
|     theme(axis.text=element_blank(), |     theme(axis.text=element_blank(), | ||||||
|           axis.ticks=element_blank(), |           axis.ticks=element_blank(), | ||||||
|           plot.caption = element_text(color = "grey")) + |           plot.caption = element_text(color = "grey")) + | ||||||
|     geom_sf(data = segments_sf %>% filter(pid %in% route_focus), |     geom_sf(data = metro_segments %>% filter(name %in% route), | ||||||
|             inherit.aes = FALSE, |             inherit.aes = FALSE, | ||||||
|             aes(color = spd_calc), |             aes(color = spd_calc), | ||||||
|             linewidth = 1) + |             linewidth = 1) + | ||||||
|     scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") |     scale_color_distiller(palette = "RdYlGn", direction = "reverse", limits = c(0,70), name = "Average speed or segment\n(calculated with locations, not reported speed)") | ||||||
|   ggsave(file = paste0("figures/", |   ggsave(file = paste0("figures/", | ||||||
|                        route, |                        route, | ||||||
|                        ".pdf"), |                        "_map.pdf"), | ||||||
|  |          title = paste0("Metro Route Speed - ", route), | ||||||
|  |          device = pdf, | ||||||
|  |          height = 8.5, | ||||||
|  |          width = 11, | ||||||
|  |          units = "in", | ||||||
|  |          create.dir = TRUE) | ||||||
|  | 
 | ||||||
|  |   ggplot(data = metro_data %>% filter(name %in% route)) + | ||||||
|  |     geom_boxplot(aes(x = date, | ||||||
|  |                     y = spd_calc)) | ||||||
|  |   ggsave(file = paste0("figures/", | ||||||
|  |                        route, | ||||||
|  |                        "_date.pdf"), | ||||||
|          title = paste0("Metro Route Speed - ", route), |          title = paste0("Metro Route Speed - ", route), | ||||||
|          device = pdf, |          device = pdf, | ||||||
|          height = 8.5, |          height = 8.5, | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Ben Varick
						Ben Varick