library(dataRetrieval)
library(tidyverse)
library(RColorBrewer)
library(scales)

setwd("/home/ben/Documents/dataProjects/USGS_NWIS")

rivers <- read_csv(file = "river_IDs.csv", col_types = c("c", "c"))

#rivers <- rivers %>% filter(names %in% c("Elwha"))
#rivers <- rivers %>% filter(names %in% c("Duckabush"))
rivers <- rivers %>% filter(names %in% c("Salmon"))


ifelse(!dir.exists(file.path(getwd(), "figures")), dir.create(file.path(getwd(), "figures")), FALSE)
ifelse(!dir.exists(file.path(getwd(), "data")), dir.create(file.path(getwd(), "data")), FALSE)

for(i in 1:nrow(rivers)){
  rivername <- rivers$names[i]
  message(paste0(i, " of ", nrow(rivers), ": ", rivername, " River - ", Sys.time()))
  ifelse(!dir.exists(file.path(getwd(), paste0("figures/", rivername))), dir.create(file.path(getwd(), paste0("figures/", rivername))), FALSE)
  # this downloads most recent discharge daily value data for the river
  if(sum(list.files(path = "data") %>% match(paste0(rivername,"_discharge_dv.Rda")), na.rm = TRUE) == 0){
    message("downloading all data")
    discharge_dv <- readNWISdv(siteNumber = rivers$siteNumber[i], parameterCd = "00060")
  } else {
    
    load(paste0("data/",rivername,"_discharge_dv.Rda"))
    message(paste0("downloading new data: previous ", -1 * difftime(max(discharge_dv$Date), Sys.Date()), " days"))
    discharge_dv <- bind_rows(discharge_dv,
                              readNWISdv(siteNumber = rivers$siteNumber[i], parameterCd = "00060", startDate = as.Date(max(discharge_dv$Date)) - days(1)) %>% 
                                filter(Date > max(discharge_dv$Date)))
  }
  save(discharge_dv, file = paste0("data/",rivername,"_discharge_dv.Rda"))
  
  # this downloads most recent discharge unit value (every 15 min) data for the river
  if(sum(list.files(path = "data") %>% match(paste0(rivername,"_discharge_uv.Rda")), na.rm = TRUE) == 0){
    discharge_uv <- readNWISuv(siteNumber = rivers$siteNumber[i], parameterCd = "00060")
  } else {
    load(paste0("data/",rivername,"_discharge_uv.Rda"))
    discharge_uv <- bind_rows(discharge_uv,
                              readNWISuv(siteNumber = rivers$siteNumber[i], parameterCd = "00060", startDate = as.Date(max(discharge_uv$dateTime)) - days(1)) %>% 
                                filter(dateTime > max(discharge_uv$dateTime)))
  }
  save(discharge_uv, file = paste0("data/",rivername,"_discharge_uv.Rda"))
  
  message("processing data")
  discharge_dv["daymonth"] <- format(discharge_dv$Date, "%m%d")
  discharge_uv["daymonth"] <- format(discharge_uv$dateTime, "%m%d")
  
  #removes February 29th leap days from data
  discharge_dv <- discharge_dv %>% filter(daymonth != "0229")
  discharge_uv <- discharge_uv %>% filter(daymonth != "0229")
  
  
  #builds stats
  dayofyear <- unique(discharge_dv %>% arrange(daymonth) %>% pull(daymonth))
  quantiles <- lapply(dayofyear, function(i) 
    quantile(discharge_dv %>% filter(daymonth == i, !is.na(X_00060_00003)) %>% pull(X_00060_00003), 
             probs = c(seq(0.05, 0.95, 0.1), 0.5)))
  
  river_stats_dv <- data.frame(dayofyear, bind_rows(quantiles))
  river_stats_dv["Date"] <- ymd(paste0(year(Sys.Date()), river_stats_dv$dayofyear))
  river_stats_dv["dateTime"] <- ymd_h(paste0(year(Sys.Date()), river_stats_dv$dayofyear, 12))
  river_stats_dv <- bind_rows(river_stats_dv, river_stats_dv %>% mutate(Date = Date + years(1), dateTime = dateTime + years(1)), river_stats_dv %>% mutate(Date = Date - years(1), dateTime = dateTime - years(1)))
  
  river_states_ribbon <- bind_rows(
    data.frame(river_stats_dv %>% mutate(min = X5., max = X95.) %>% select(dayofyear, dateTime, min, max), "median" = 90),
    data.frame(river_stats_dv %>% mutate(min = X15., max = X25.) %>% select(dayofyear, dateTime, min, max), "median" = 70),
    data.frame(river_stats_dv %>% mutate(min = X25., max = X35.) %>% select(dayofyear, dateTime, min, max), "median" = 50),
    data.frame(river_stats_dv %>% mutate(min = X35., max = X45.) %>% select(dayofyear, dateTime, min, max), "median" = 30),
    data.frame(river_stats_dv %>% mutate(min = X45., max = X55.) %>% select(dayofyear, dateTime, min, max), "median" = 10))
  
  
  ribbon_colors <- c("#e7f3f9", "#bbddee", "#88c3e1", "#4ba5d1", "#228fc6")
  names(ribbon_colors) <- c("90%", "70%", "50%", "30%", "10%")
  
  mindvdate <- min(discharge_dv$Date)
  maxdvdate <- max(discharge_dv$Date)
  minuvdate <- min(discharge_uv$dateTime)
  maxuvdate <- max(discharge_uv$dateTime)
  
  
  #plots ----
  message("building charts")
  
  ggplot(data = river_stats_dv %>% filter(Date > Sys.time() - days(61),
                                          Date < Sys.time() + days(30))) +
    geom_ribbon(aes(x = dateTime,
                    ymin = X5.,
                    ymax = X95.,
                    fill = "90%"),
                alpha = 1) +
    geom_ribbon(aes(x = dateTime,
                    ymin = X15.,
                    ymax = X85.,
                    fill = "70%"),
                alpha = 1) +
    geom_ribbon( aes(x = dateTime,
                     ymin = X25.,
                     ymax = X75.,
                     fill = "50%"),
                 alpha = 1) +
    geom_ribbon(aes(x = dateTime,
                    ymin = X35.,
                    ymax = X65.,
                    fill = "30%"),
                alpha = 1) +
    geom_ribbon(aes(x = dateTime,
                    ymin = X45.,
                    ymax = X55.,
                    fill = "10%"),
                alpha = 1) +
    geom_line(data = discharge_uv %>%
                filter(dateTime > floor_date(Sys.time(), "day") - days(60)),
              aes(x = dateTime,
                  y = X_00060_00000,
                  color = 'current year'),
              linewidth = 0.5) +
    scale_fill_manual(values = ribbon_colors, name = "Median X% of\nhistoric range") +
    scale_x_datetime(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(mult = c(0,0))) +
    scale_y_continuous(label = comma) +
    scale_color_manual(values = "black") +
    theme_bw() +
    theme(axis.text.x = element_text(angle = 60, hjust = 1),
          plot.caption = element_text(color = "grey50"),
          plot.subtitle = element_text(color = "grey50")) +
    guides(fill = guide_legend(label.position = "right",
                               nrow = 5),
           color = guide_legend(title = NULL,
                                label.position = "bottom",
                                nrow = 1)) +
    labs(title = paste0(rivername,  " River discharge - last two months (",format(Sys.time() - days(61), "%B %e"), " - ", format(Sys.time(), "%B %e, %Y"), ")"),
         subtitle = paste("historical data", format(mindvdate, "%Y"), "-", format(maxdvdate, "%Y")),
         caption = "data: USGS",
         x = NULL,
         y = "Discharge (cubic feet per second)")
  ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_last_two_months.png"),
         device = "png",
         height = 11,
         width = 17,
         units = "in")
  
  ggplot(data = river_stats_dv %>% filter(Date > Sys.time() - months(6) - days(1),
                                          Date < Sys.time() + months(3))
  ) +
    geom_ribbon(aes(x = dateTime,
                    ymin = X5.,
                    ymax = X95.,
                    fill = "90%"),
                alpha = 1) +
    geom_ribbon(aes(x = dateTime,
                    ymin = X15.,
                    ymax = X85.,
                    fill = "70%"),
                alpha = 1) +
    geom_ribbon( aes(x = dateTime,
                     ymin = X25.,
                     ymax = X75.,
                     fill = "50%"),
                 alpha = 1) +
    geom_ribbon(aes(x = dateTime,
                    ymin = X35.,
                    ymax = X65.,
                    fill = "30%"),
                alpha = 1) +
    geom_ribbon(aes(x = dateTime,
                    ymin = X45.,
                    ymax = X55.,
                    fill = "10%"),
                alpha = 1) +
    geom_line(data = discharge_uv %>%
                filter(dateTime > floor_date(Sys.time(), "day") - months(6)),
              aes(x = dateTime,
                  y = X_00060_00000,
                  color = "current year"),
              linewidth = 0.5) +
    scale_fill_manual(values = ribbon_colors, name = "Median X% of\nhistoric range") +
    scale_x_datetime(date_breaks = "1 month", date_labels = "%b", expand = expansion(mult = c(0,0))) +
    scale_y_continuous(label = comma, expand = expansion(mult = c(0, 0.1)), limits = c(0, NA)) +
    scale_color_manual(values = "black") +
    theme_bw() +
    theme(axis.text.x = element_text(hjust = -1),
          panel.grid.minor = element_blank(),
          plot.caption = element_text(color = "grey50"),
          plot.subtitle = element_text(color = "grey50")) +
    guides(fill = guide_legend(label.position = "right",
                               nrow = 5),
           color = guide_legend(title = NULL,
                                label.position = "bottom",
                                nrow = 1)) +
    labs(title = paste0(rivername,  " River discharge - last 6 months (",format(Sys.time() - months(6), "%B %e, %Y"), " - ", format(Sys.time(), "%B %e, %Y"), ")"),
         subtitle = paste("historical data", format(mindvdate, "%Y"), "-", format(maxdvdate, "%Y")),
         caption = "data: USGS",
         x = NULL,
         y = "Discharge (cubic feet per second)")
  ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_last_6_months.png"),
         device = "png",
         height = 11,
         width = 17,
         units = "in")
  
  ggplot(data = river_stats_dv %>% filter(year(Date) %in% year(Sys.Date()))) +
    geom_ribbon(aes(x = Date,
                    ymin = X5.,
                    ymax = X95.,
                    fill = "90%"),
                alpha = 1) +
    geom_ribbon(aes(x = Date,
                    ymin = X15.,
                    ymax = X85.,
                    fill = "70%"),
                alpha = 1) +
    geom_ribbon( aes(x = Date,
                     ymin = X25.,
                     ymax = X75.,
                     fill = "50%"),
                 alpha = 1) +
    geom_ribbon(aes(x = Date,
                    ymin = X35.,
                    ymax = X65.,
                    fill = "30%"),
                alpha = 1) +
    geom_ribbon(aes(x = Date,
                    ymin = X45.,
                    ymax = X55.,
                    fill = "10%"),
                alpha = 1) +
    geom_line(data = discharge_dv %>%
                filter(Date > ymd(paste(year(Sys.Date()) - 5, 1, 1))) %>%
                mutate(years = year(Date)),
              aes(x = ymd(paste(year(Sys.Date()), daymonth)),
                  y = X_00060_00003,
                  color = "Daily Value"),
              linewidth = 0.5) +
    facet_grid(years ~ ., scales = "free_x", space = "free_x") +
    scale_fill_manual(values = ribbon_colors, name = "Median X% of\nhistoric range") +
    scale_color_manual(values = "black") +
    scale_x_date(date_breaks = "1 month", date_labels = "%b", expand = expansion(mult = c(0,0))) +
    scale_y_continuous(label = comma) +
    theme_bw() +
    theme(axis.text.x = element_text(hjust = -2.5),
          plot.caption = element_text(color = "grey50"),
          plot.subtitle = element_text(color = "grey50")) +
    guides(fill = guide_legend(label.position = "right",
                               nrow = 5),
           color = guide_legend(title = NULL,
                                label.position = "bottom",
                                nrow = 1)) +
    coord_cartesian(ylim = c(0, max(river_stats_dv$X95.))) +
    labs(title = paste(rivername, "River discharge - last 5 years"),
         subtitle = paste(format(ymd(paste(year(Sys.Date()) - 5, 1, 1)), "%B %e, %Y"), "-", format(Sys.Date(), "%B %e, %Y")),
         caption = "data: USGS",
         x = NULL,
         y = "Discharge (cubic feet per second)")
  ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_last5years.png"),
         device = "png",
         height = 11,
         width = 17,
         units = "in")
  
  ggplot() +
    geom_line(data = discharge_dv %>%
                filter(Date > ymd(paste(year(Sys.Date()) - 5, 1, 1))) %>%
                mutate(years = year(Date)),
              aes(x = ymd(paste(year(Sys.Date()), daymonth)),
                  y = X_00060_00003,
                  color = as.factor(years)),
              linewidth = 1,
              alpha = 0.5) +
    scale_color_brewer(palette = "Set3") +
    scale_x_date(date_breaks = "1 month", date_labels = "%b",   expand = expansion(mult = c(0.001,0))) +
    scale_y_continuous(label = comma) +
    theme_bw() +
    theme(axis.text.x = element_text(hjust = -1),
          plot.caption = element_text(color = "grey50"),
          plot.subtitle = element_text(color = "grey50")) +
    labs(title = paste(rivername, "River discharge - last 5 years"),
         subtitle = paste(format(ymd(paste(year(Sys.Date()) - 5, 1, 1)), "%B %e, %Y"), "-", format(Sys.Date(), "%B %e, %Y")),
         caption = "USGS data",
         x = NULL,
         y = "Discharge (cubic feet per second)") +
    guides(color = guide_legend(title = NULL,
                                label.position = "right",
                                legend.key.size = unit(3,"line")))
  ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_last5years_comb.png"),
         device = "png",
         height = 11,
         width = 17,
         units = "in")
  
  ggplot() +
    geom_boxplot(data = discharge_uv %>%
                   filter(dateTime > ymd(paste(year(Sys.Date()) - 5, 1, 1))) %>%
                   mutate(year = year(dateTime),
                          month = month(dateTime, label = TRUE)),
                 aes(x = as.factor(month),
                     y = X_00060_00000,
                     fill = as.factor(year)),
                 outlier.shape=NA,
                 size = 0.1) +
    geom_vline(xintercept=c(seq(1.5, 11.5, 1)), colour='black', linewidth = 0.3, linetype = "dotted") +
    scale_fill_brewer(palette = "Set3") +
    scale_y_continuous(label = comma) +
    coord_cartesian(ylim = c(0, max(river_stats_dv$X95.))) +
    theme_bw() +
    theme(panel.grid.major = element_blank(), 
          panel.grid.minor.y = element_blank(), 
          panel.grid.minor.x = element_line(),
          axis.ticks.x = element_blank(),
          plot.caption = element_text(color = "grey50"),
          plot.subtitle = element_text(color = "grey50")) +
    guides(fill = guide_legend(title = NULL,
                               label.position = "right")) +
    labs(title = paste(rivername, "River discharge - last 5 years"),
         subtitle = paste(format(ymd(paste(year(Sys.Date()) - 5, 1, 1)), "%B %e, %Y"), "-", format(Sys.Date(), "%B %e, %Y")),
         caption = "data: USGS",
         x = NULL,
         y = "Discharge (cubic feet per second)")
  ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_boxplot.png"),
         device = "png",
         height = 11,
         width = 17,
         units = "in")
  
  ggplot(data = river_stats_dv %>% filter(year(Date) %in% year(Sys.Date()))) +
    geom_ribbon(aes(x = Date,
                    ymin = X5.,
                    ymax = X95.,
                    fill = "90%"),
                alpha = 0.5) +
    geom_ribbon(aes(x = Date,
                    ymin = X15.,
                    ymax = X85.,
                    fill = "70%"),
                alpha = 0.5) +
    geom_ribbon( aes(x = Date,
                     ymin = X25.,
                     ymax = X75.,
                     fill = "50%"),
                 alpha = 0.5) +
    geom_ribbon(aes(x = Date,
                    ymin = X35.,
                    ymax = X65.,
                    fill = "30%"),
                alpha = 0.5) +
    geom_ribbon(aes(x = Date,
                    ymin = X45.,
                    ymax = X55.,
                    fill = "10%"),
                alpha = 0.5) +
    geom_line(data = discharge_dv %>%
                filter(Date > ymd(paste(year(Sys.Date()), 1, 1))),
              aes(x = Date,
                  y = X_00060_00003,
                  color = "current year"),
              linewidth = 0.7) +
    scale_x_date(date_breaks = "1 month", date_labels = "%b", expand = expansion(mult = c(0.001,0))) +
    scale_y_continuous(label = comma, expand = expansion(mult = c(0, NA))) +
    scale_fill_manual(values = ribbon_colors, name = "Median X% of\nhistoric range") +
    scale_color_manual(values = "black") +
    coord_cartesian(ylim = c(0, max(river_stats_dv$X95.))) +
    theme_bw() +
    theme(axis.text.x = element_text(hjust = -1),
          plot.caption = element_text(color = "grey50"),
          plot.subtitle = element_text(color = "grey50")) +
    guides(fill = guide_legend(label.position = "right",
                               nrow = 5),
           color = guide_legend(label.position = "bottom",
                                nrow = 1)) +
    labs(title = paste(rivername, "River discharge - current year compared to historical"),
         subtitle = paste("historical data", format(mindvdate, "%Y"), "-", format(maxdvdate, "%Y")),
         caption = "data: USGS",
         x = NULL,
         y = "Discharge (cubic feet per second)",
         color = NULL)
  ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_current_compared.png"),
         device = "png",
         height = 11,
         width = 17,
         units = "in")
  
  ggplot(data = discharge_dv %>%
           mutate(decade = floor(year(Date)/10)*10)) +
    geom_boxplot(aes(x = Date,
                     y = X_00060_00003,
                     group = decade),
                 outlier.shape = NA) +
    theme(plot.caption = element_text(color = "grey50"),
          plot.subtitle = element_text(color = "grey50")) +
    coord_cartesian(ylim = c(0, max(river_stats_dv$X85.))) +
    labs(title = paste(rivername, "River - discharge by decade"),
         subtitle = paste(format(mindvdate, "%Y"), "-", format(maxdvdate, "%Y")),
         caption = "data: USGS",
         x = "Decade",
         y = "Discharge (cubic feet per second)")
  ggsave(file = paste0("figures/",rivername, "/", rivername, "Discharge_decades.png"),
         device = "png",
         height = 11,
         width = 17,
         units = "in")
}
message(paste0("Finished - ", Sys.time()))