From 07b5c13900a7d44dfd441d91da4c2e5c6dec3ccf Mon Sep 17 00:00:00 2001 From: Ben Varick Date: Thu, 8 Jun 2023 10:36:51 -0500 Subject: [PATCH] initial commit --- .gitignore | 11 ++ USGS_NWIS.R | 403 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 414 insertions(+) create mode 100644 .gitignore create mode 100644 USGS_NWIS.R diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9521574 --- /dev/null +++ b/.gitignore @@ -0,0 +1,11 @@ +#ignore .Rhistory +.Rhistory + +#ignore all the data files +data/* + +#ignore the figure files +figures/* + +#except an example +!figures/Elwha/ElwhaDischarge_last_6_months.png diff --git a/USGS_NWIS.R b/USGS_NWIS.R new file mode 100644 index 0000000..d8f0232 --- /dev/null +++ b/USGS_NWIS.R @@ -0,0 +1,403 @@ +library(dataRetrieval, quietly = TRUE) +library(tidyverse, quietly = TRUE) +library(RColorBrewer, quietly = TRUE) +library(scales, quietly = TRUE) + +setwd("/home/ben/Documents/dataProjects/USGS_NWIS") + +rivers <- data.frame(names = c("Elwha", "Calawah", "Hoh", "Dungeness", "Duckabush", "Wenatchee", "Snake", "Wisconsin", "Cedar", "Colorado"), + siteNumber = c("12045500", "12043000", "12041200", "12048000", "12054000", "12462500", "13013650", "05407000", "12119000", "09380000")) + +#rivers <- rivers %>% filter(names %in% c("Elwha")) +#rivers <- rivers %>% filter(names %in% c("Duckabush")) +#rivers <- rivers %>% filter(names %in% c("Hoh")) + +ifelse(!dir.exists(file.path(getwd(), "figures")), dir.create(file.path(getwd(), "figures")), FALSE) +ifelse(!dir.exists(file.path(getwd(), "data")), dir.create(file.path(getwd(), "data")), FALSE) + +for(i in 1:nrow(rivers)){ + rivername <- rivers$names[i] + message(paste0(i, " of ", nrow(rivers), ": ", rivername, " River - ", Sys.time())) + ifelse(!dir.exists(file.path(getwd(), paste0("figures/", rivername))), dir.create(file.path(getwd(), paste0("figures/", rivername))), FALSE) + # this downloads most recent discharge daily value data for the river + if(sum(list.files(path = "data") %>% match(paste0(rivername,"_discharge_dv.Rda")), na.rm = TRUE) == 0){ + message("downloading all data") + discharge_dv <- readNWISdv(siteNumber = rivers$siteNumber[i], parameterCd = "00060") + } else { + + load(paste0("data/",rivername,"_discharge_dv.Rda")) + message(paste0("downloading new data: previous ", -1 * difftime(max(discharge_dv$Date), Sys.Date()), " days")) + discharge_dv <- bind_rows(discharge_dv, + readNWISdv(siteNumber = rivers$siteNumber[i], parameterCd = "00060", startDate = as.Date(max(discharge_dv$Date)) - days(1)) %>% + filter(Date > max(discharge_dv$Date))) + } + save(discharge_dv, file = paste0("data/",rivername,"_discharge_dv.Rda")) + + # this downloads most recent discharge unit value (every 15 min) data for the river + if(sum(list.files(path = "data") %>% match(paste0(rivername,"_discharge_uv.Rda")), na.rm = TRUE) == 0){ + discharge_uv <- readNWISuv(siteNumber = rivers$siteNumber[i], parameterCd = "00060") + } else { + load(paste0("data/",rivername,"_discharge_uv.Rda")) + discharge_uv <- bind_rows(discharge_uv, + readNWISuv(siteNumber = rivers$siteNumber[i], parameterCd = "00060", startDate = as.Date(max(discharge_uv$dateTime)) - days(1)) %>% + filter(dateTime > max(discharge_uv$dateTime))) + } + save(discharge_uv, file = paste0("data/",rivername,"_discharge_uv.Rda")) + + message("processing data") + discharge_dv["daymonth"] <- format(discharge_dv$Date, "%m%d") + discharge_uv["daymonth"] <- format(discharge_uv$dateTime, "%m%d") + + #removes February 29th leap days from data + discharge_dv <- discharge_dv %>% filter(daymonth != "0229") + discharge_uv <- discharge_uv %>% filter(daymonth != "0229") + + + #builds stats + dayofyear <- unique(discharge_dv %>% arrange(daymonth) %>% pull(daymonth)) + quantiles <- lapply(dayofyear, function(i) + quantile(discharge_dv %>% filter(daymonth == i, !is.na(X_00060_00003)) %>% pull(X_00060_00003), + probs = c(seq(0.05, 0.95, 0.1), 0.5))) + + river_stats_dv <- data.frame(dayofyear, bind_rows(quantiles)) + river_stats_dv["Date"] <- ymd(paste0(year(Sys.Date()), river_stats_dv$dayofyear)) + river_stats_dv["dateTime"] <- ymd_h(paste0(year(Sys.Date()), river_stats_dv$dayofyear, 12)) + river_stats_dv <- bind_rows(river_stats_dv, river_stats_dv %>% mutate(Date = Date + years(1), dateTime = dateTime + years(1)), river_stats_dv %>% mutate(Date = Date - years(1), dateTime = dateTime - years(1))) + + river_states_ribbon <- bind_rows( + data.frame(river_stats_dv %>% mutate(min = X5., max = X95.) %>% select(dayofyear, dateTime, min, max), "median" = 90), + data.frame(river_stats_dv %>% mutate(min = X15., max = X25.) %>% select(dayofyear, dateTime, min, max), "median" = 70), + data.frame(river_stats_dv %>% mutate(min = X25., max = X35.) %>% select(dayofyear, dateTime, min, max), "median" = 50), + data.frame(river_stats_dv %>% mutate(min = X35., max = X45.) %>% select(dayofyear, dateTime, min, max), "median" = 30), + data.frame(river_stats_dv %>% mutate(min = X45., max = X55.) %>% select(dayofyear, dateTime, min, max), "median" = 10)) + + + ribbon_colors <- c("#e7f3f9", "#bbddee", "#88c3e1", "#4ba5d1", "#228fc6") + names(ribbon_colors) <- c("90%", "70%", "50%", "30%", "10%") + + mindvdate <- min(discharge_dv$Date) + maxdvdate <- max(discharge_dv$Date) + minuvdate <- min(discharge_uv$dateTime) + maxuvdate <- max(discharge_uv$dateTime) + + + #plots ---- + message("building charts") + + ggplot(data = river_stats_dv %>% filter(Date > Sys.time() - days(61), + Date < Sys.time() + days(30))) + + geom_ribbon(aes(x = dateTime, + ymin = X5., + ymax = X95., + fill = "90%"), + alpha = 1) + + geom_ribbon(aes(x = dateTime, + ymin = X15., + ymax = X85., + fill = "70%"), + alpha = 1) + + geom_ribbon( aes(x = dateTime, + ymin = X25., + ymax = X75., + fill = "50%"), + alpha = 1) + + geom_ribbon(aes(x = dateTime, + ymin = X35., + ymax = X65., + fill = "30%"), + alpha = 1) + + geom_ribbon(aes(x = dateTime, + ymin = X45., + ymax = X55., + fill = "10%"), + alpha = 1) + + geom_line(data = discharge_uv %>% + filter(dateTime > floor_date(Sys.time(), "day") - days(60)), + aes(x = dateTime, + y = X_00060_00000, + color = 'current year'), + linewidth = 0.5) + + scale_fill_manual(values = ribbon_colors, name = "Median X% of\nhistoric range") + + scale_x_datetime(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(mult = c(0,0))) + + scale_y_continuous(label = comma) + + scale_color_manual(values = "black") + + theme_bw() + + theme(axis.text.x = element_text(angle = 60, hjust = 1), + plot.caption = element_text(color = "grey50"), + plot.subtitle = element_text(color = "grey50")) + + guides(fill = guide_legend(label.position = "right", + nrow = 5), + color = guide_legend(title = NULL, + label.position = "bottom", + nrow = 1)) + + labs(title = paste0(rivername, " River discharge - last two months (",format(Sys.time() - days(61), "%B %e"), " - ", format(Sys.time(), "%B %e, %Y"), ")"), + subtitle = paste("historical data", format(mindvdate, "%Y"), "-", format(maxdvdate, "%Y")), + caption = "data: USGS", + x = NULL, + y = "Discharge (cubic feet per second)") + ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_last_two_months.png"), + device = "png", + height = 11, + width = 17, + units = "in") + + ggplot(data = river_stats_dv %>% filter(Date > Sys.time() - months(6) - days(1), + Date < Sys.time() + months(3)) + ) + + geom_ribbon(aes(x = dateTime, + ymin = X5., + ymax = X95., + fill = "90%"), + alpha = 1) + + geom_ribbon(aes(x = dateTime, + ymin = X15., + ymax = X85., + fill = "70%"), + alpha = 1) + + geom_ribbon( aes(x = dateTime, + ymin = X25., + ymax = X75., + fill = "50%"), + alpha = 1) + + geom_ribbon(aes(x = dateTime, + ymin = X35., + ymax = X65., + fill = "30%"), + alpha = 1) + + geom_ribbon(aes(x = dateTime, + ymin = X45., + ymax = X55., + fill = "10%"), + alpha = 1) + + geom_line(data = discharge_uv %>% + filter(dateTime > floor_date(Sys.time(), "day") - months(6)), + aes(x = dateTime, + y = X_00060_00000, + color = "current year"), + linewidth = 0.5) + + scale_fill_manual(values = ribbon_colors, name = "Median X% of\nhistoric range") + + scale_x_datetime(date_breaks = "1 month", date_labels = "%b", expand = expansion(mult = c(0,0))) + + scale_y_continuous(label = comma, expand = expansion(mult = c(0, 0.1)), limits = c(0, NA)) + + scale_color_manual(values = "black") + + theme_bw() + + theme(axis.text.x = element_text(hjust = -1), + panel.grid.minor = element_blank(), + plot.caption = element_text(color = "grey50"), + plot.subtitle = element_text(color = "grey50")) + + guides(fill = guide_legend(label.position = "right", + nrow = 5), + color = guide_legend(title = NULL, + label.position = "bottom", + nrow = 1)) + + labs(title = paste0(rivername, " River discharge - last 6 months (",format(Sys.time() - months(6), "%B %e, %Y"), " - ", format(Sys.time(), "%B %e, %Y"), ")"), + subtitle = paste("historical data", format(mindvdate, "%Y"), "-", format(maxdvdate, "%Y")), + caption = "data: USGS", + x = NULL, + y = "Discharge (cubic feet per second)") + ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_last_6_months.png"), + device = "png", + height = 11, + width = 17, + units = "in") + + ggplot(data = river_stats_dv %>% filter(year(Date) %in% year(Sys.Date()))) + + geom_ribbon(aes(x = Date, + ymin = X5., + ymax = X95., + fill = "90%"), + alpha = 1) + + geom_ribbon(aes(x = Date, + ymin = X15., + ymax = X85., + fill = "70%"), + alpha = 1) + + geom_ribbon( aes(x = Date, + ymin = X25., + ymax = X75., + fill = "50%"), + alpha = 1) + + geom_ribbon(aes(x = Date, + ymin = X35., + ymax = X65., + fill = "30%"), + alpha = 1) + + geom_ribbon(aes(x = Date, + ymin = X45., + ymax = X55., + fill = "10%"), + alpha = 1) + + geom_line(data = discharge_dv %>% + filter(Date > ymd(paste(year(Sys.Date()) - 5, 1, 1))) %>% + mutate(years = year(Date)), + aes(x = ymd(paste(year(Sys.Date()), daymonth)), + y = X_00060_00003, + color = "Daily Value"), + linewidth = 0.5) + + facet_grid(years ~ ., scales = "free_x", space = "free_x") + + scale_fill_manual(values = ribbon_colors, name = "Median X% of\nhistoric range") + + scale_color_manual(values = "black") + + scale_x_date(date_breaks = "1 month", date_labels = "%b", expand = expansion(mult = c(0,0))) + + scale_y_continuous(label = comma) + + theme_bw() + + theme(axis.text.x = element_text(hjust = -2.5), + plot.caption = element_text(color = "grey50"), + plot.subtitle = element_text(color = "grey50")) + + guides(fill = guide_legend(label.position = "right", + nrow = 5), + color = guide_legend(title = NULL, + label.position = "bottom", + nrow = 1)) + + coord_cartesian(ylim = c(0, max(river_stats_dv$X95.))) + + labs(title = paste(rivername, "River discharge - last 5 years"), + subtitle = paste(format(ymd(paste(year(Sys.Date()) - 5, 1, 1)), "%B %e, %Y"), "-", format(Sys.Date(), "%B %e, %Y")), + caption = "data: USGS", + x = NULL, + y = "Discharge (cubic feet per second)") + ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_last5years.png"), + device = "png", + height = 11, + width = 17, + units = "in") + + ggplot() + + geom_line(data = discharge_dv %>% + filter(Date > ymd(paste(year(Sys.Date()) - 5, 1, 1))) %>% + mutate(years = year(Date)), + aes(x = ymd(paste(year(Sys.Date()), daymonth)), + y = X_00060_00003, + color = as.factor(years)), + linewidth = 1, + alpha = 0.5) + + scale_color_brewer(palette = "Set3") + + scale_x_date(date_breaks = "1 month", date_labels = "%b", expand = expansion(mult = c(0.001,0))) + + scale_y_continuous(label = comma) + + theme_bw() + + theme(axis.text.x = element_text(hjust = -1), + plot.caption = element_text(color = "grey50"), + plot.subtitle = element_text(color = "grey50")) + + labs(title = paste(rivername, "River discharge - last 5 years"), + subtitle = paste(format(ymd(paste(year(Sys.Date()) - 5, 1, 1)), "%B %e, %Y"), "-", format(Sys.Date(), "%B %e, %Y")), + caption = "USGS data", + x = NULL, + y = "Discharge (cubic feet per second)") + + guides(color = guide_legend(title = NULL, + label.position = "right", + legend.key.size = unit(3,"line"))) + ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_last5years_comb.png"), + device = "png", + height = 11, + width = 17, + units = "in") + + ggplot() + + geom_boxplot(data = discharge_uv %>% + filter(dateTime > ymd(paste(year(Sys.Date()) - 5, 1, 1))) %>% + mutate(year = year(dateTime), + month = month(dateTime, label = TRUE)), + aes(x = as.factor(month), + y = X_00060_00000, + fill = as.factor(year)), + outlier.shape=NA, + size = 0.1) + + geom_vline(xintercept=c(seq(1.5, 11.5, 1)), colour='black', linewidth = 0.3, linetype = "dotted") + + scale_fill_brewer(palette = "Set3") + + scale_y_continuous(label = comma) + + coord_cartesian(ylim = c(0, max(river_stats_dv$X95.))) + + theme_bw() + + theme(panel.grid.major = element_blank(), + panel.grid.minor.y = element_blank(), + panel.grid.minor.x = element_line(), + axis.ticks.x = element_blank(), + plot.caption = element_text(color = "grey50"), + plot.subtitle = element_text(color = "grey50")) + + guides(fill = guide_legend(title = NULL, + label.position = "right")) + + labs(title = paste(rivername, "River discharge - last 5 years"), + subtitle = paste(format(ymd(paste(year(Sys.Date()) - 5, 1, 1)), "%B %e, %Y"), "-", format(Sys.Date(), "%B %e, %Y")), + caption = "data: USGS", + x = NULL, + y = "Discharge (cubic feet per second)") + ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_boxplot.png"), + device = "png", + height = 11, + width = 17, + units = "in") + + ggplot(data = river_stats_dv %>% filter(year(Date) %in% year(Sys.Date()))) + + geom_ribbon(aes(x = Date, + ymin = X5., + ymax = X95., + fill = "90%"), + alpha = 0.5) + + geom_ribbon(aes(x = Date, + ymin = X15., + ymax = X85., + fill = "70%"), + alpha = 0.5) + + geom_ribbon( aes(x = Date, + ymin = X25., + ymax = X75., + fill = "50%"), + alpha = 0.5) + + geom_ribbon(aes(x = Date, + ymin = X35., + ymax = X65., + fill = "30%"), + alpha = 0.5) + + geom_ribbon(aes(x = Date, + ymin = X45., + ymax = X55., + fill = "10%"), + alpha = 0.5) + + geom_line(data = discharge_dv %>% + filter(Date > ymd(paste(year(Sys.Date()), 1, 1))), + aes(x = Date, + y = X_00060_00003, + color = "current year"), + linewidth = 0.7) + + scale_x_date(date_breaks = "1 month", date_labels = "%b", expand = expansion(mult = c(0.001,0))) + + scale_y_continuous(label = comma, expand = expansion(mult = c(0, NA))) + + scale_fill_manual(values = ribbon_colors, name = "Median X% of\nhistoric range") + + scale_color_manual(values = "black") + + coord_cartesian(ylim = c(0, max(river_stats_dv$X95.))) + + theme_bw() + + theme(axis.text.x = element_text(hjust = -1), + plot.caption = element_text(color = "grey50"), + plot.subtitle = element_text(color = "grey50")) + + guides(fill = guide_legend(label.position = "right", + nrow = 5), + color = guide_legend(label.position = "bottom", + nrow = 1)) + + labs(title = paste(rivername, "River discharge - current year compared to historical"), + subtitle = paste("historical data", format(mindvdate, "%Y"), "-", format(maxdvdate, "%Y")), + caption = "data: USGS", + x = NULL, + y = "Discharge (cubic feet per second)", + color = NULL) + ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_current_compared.png"), + device = "png", + height = 11, + width = 17, + units = "in") + + ggplot(data = discharge_dv %>% + mutate(decade = floor(year(Date)/10)*10)) + + geom_boxplot(aes(x = Date, + y = X_00060_00003, + group = decade), + outlier.shape = NA) + + theme(plot.caption = element_text(color = "grey50"), + plot.subtitle = element_text(color = "grey50")) + + coord_cartesian(ylim = c(0, max(river_stats_dv$X85.))) + + labs(title = paste(rivername, "River - discharge by decade"), + subtitle = paste(format(mindvdate, "%Y"), "-", format(maxdvdate, "%Y")), + caption = "data: USGS", + x = "Decade", + y = "Discharge (cubic feet per second)") + ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_decades.png"), + device = "png", + height = 11, + width = 17, + units = "in") +} +message(paste0("Finished - ", Sys.time()))