starttime <- Sys.time()
#setup ----
library(tidyverse)
library(lubridate)
library(ggmap)
library(ggrepel)

setwd("~/Documents/dataProjects/COVID")

#download data ----
if(file.exists("data_download_time.Rda")) {
  load("data_download_time.Rda")
} else {
  downloaded_dttm <- ymd_hms('1900_01_01:00:00:00')
}

if(as.double(difftime(downloaded_dttm, Sys.time()), units = "hours") < -2){
  
  print("Downloading today's data")
  
  us_county_data <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")
  save(us_county_data, file = 'data/us_county_data.Rda')

  downloaded_dttm <- Sys.time()
  save(downloaded_dttm, file = "data_download_time.Rda")
  
} else {
  print("data is current")
  load('data/us_county_data.Rda')
}

##to redownload data, uncomment the following 2 lines
#downloaded_dttm <- downloaded_dttm - days(1)
#save(downloaded_dttm, file = "data_download_time.Rda")

#us_county_data <- read.csv("data/us-counties.csv")
#cleanup data ----
#make dates in a date format
us_county_data <- us_county_data %>%
  mutate(date = ymd(date)) %>%
  mutate(week = floor_date(date, "weeks", week_start = 1))

maxdate <- max(us_county_data$date)

#load county pop data
us_county_pop <- read.csv("data/co-est2020-alldata.csv")

us_county_pop <- us_county_pop %>%
  mutate(state = STNAME,
         county = ifelse(state != "Louisiana", sub(" County.*", "", CTYNAME), sub(" Parish.*", "", CTYNAME))) %>%
  mutate(county_state = paste0(county, ", ", state))

us_county_data["countypop"] <- us_county_pop[match(paste0(us_county_data$county, ", ", us_county_data$state), us_county_pop$county_state), 13]

us_county_data <- us_county_data %>% 
  group_by(state, county) %>%
  arrange(date) %>%
  mutate(county_state = paste0(county,", ", state),
         newcases = cases - lag(cases, n = 1, default = 0),
         twowkcases = cases - lag(cases, n = 14, default = 0)) %>%
  mutate(active_estimate = twowkcases/countypop * 100000)


important_counties <- list(washington = c("Clallam", 
                                          "Jefferson", 
                                          "Kitsap",
                                          "King"),
                           wisconsin = c("Milwaukee",
                                         "Dane",
                                         "Vilas",
                                         "Brown", 
                                         "Door"),
                           illinois = c("Cook"))

noted_dates_wa<- data.frame(event = c("Stay-at-home begins", "Phase 1 reopening", "Phase 2 reopening"), 
                            eventdate <- c(ymd("2020-03-23"), ymd("2020-04-29"), ymd("2020-06-01")))


us_county_map <- map_data("county")
us_county_map["county_state"] <- paste0(us_county_map$subregion, ", ", us_county_map$region)

us_county_map <- merge(us_county_map,
                       us_county_data %>%
                         filter(date %in% maxdate) %>%
                         mutate(county_state = tolower(county_state)), 
                       by.x = "county_state",
                       by.y = "county_state",
                       all = TRUE)

us_county_label <- us_county_map %>%
  group_by(state, county) %>%
  summarise(long = (max(long)+min(long))/2, lat = (max(lat)+min(lat))/2) %>%
  ungroup()

#Washington plots ----
#graph nearby active cases
ggplot(data = us_county_data %>% filter(state %in% "Washington",
                                        county %in% important_counties$washington)) +
  geom_line(aes(x = date,
                y = twowkcases/countypop * 100000,
                color = county,
                alpha = ifelse(county == "Clallam", 1, 1)),
            size = 0.5) +
  geom_label(data = us_county_data %>% filter(state %in% "Washington",
                                              county %in% important_counties$washington,
                                              date %in% max(date)),
             aes(x = date + 1,
                 y = twowkcases/countypop * 100000,
                 label = paste0(county,' - ', round(active_estimate,0)),
                 fill = county),
             hjust = "outward",
             size = 2.5) +
  scale_color_brewer(palette = "Set2") +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_alpha_continuous(limits = c(0, 1), guide = "none") +
  scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, 0.07))) +
  scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
  labs(title = "Case rate - nearby counties",
       subtitle = paste("Through", format(downloaded_dttm, "%B %e, %Y")),
       x = "Date",
       color = NULL,
       y = "new cases in previous 14 days per 100,000 people",
       caption = "data from The New York Times") +
  theme_bw() +
  theme(panel.grid.major.x = element_line(colour="black", size = 0.1), 
        axis.text.x = element_text(angle = 60, hjust = 1),
        plot.subtitle = element_text(color = "grey50"),
        plot.caption = element_text(color = "grey50"),
        legend.position = "bottom") +
  coord_cartesian(ylim = c(0,NA),
                  clip = "off")
ggsave(filename = "nearby_caserate.png" , 
       path = paste(getwd(),"/figures/", sep = ""), 
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

#plot new cases per week
ggplot() +
  facet_grid(county ~ .) +
  geom_col(data = us_county_data %>% filter(state %in% "Washington",
                                            county %in% important_counties$washington,
                                            county != "King"),
           aes(x = week,
               y = newcases,
               fill = county),
           position = "dodge") +
  theme_bw() +
  scale_fill_brewer(palette = "Set2") +
  scale_x_date(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(add = c(7,0), mult = 0)) +
  scale_y_continuous(limits = c(0,NA), expand = expansion(mult = c(0,0.1))) +
  theme(panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1)) +
  labs(title = "New cases of COVID-19 per week",
       subtitle = paste("through", format(maxdate, "%b %e")),
       x = "Week of",
       y = "Number of new cases")
ggsave(filename = "pen_new_per_week.png" , 
       path = paste(getwd(),"/figures/", sep = ""), 
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

#plot new cases per week per 100000 people
ggplot() +
  facet_grid(county ~ .) +
  geom_col(data = us_county_data %>% filter(state %in% "Washington",
                                            county %in% important_counties$washington,
                                            county != "King"),
           aes(x = week,
               y = newcases/countypop,
               fill = county),
           position = "dodge") +
  theme_bw() +
  scale_fill_brewer(palette = "Set2") +
  scale_x_date(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(add = c(7,0), mult = 0)) +
  scale_y_continuous(limits = c(0,NA), expand = expansion(mult = c(0,0.1))) +
  theme(panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1)) +
  labs(title = "New cases of COVID-19 per week - population normalized",
       subtitle = paste("through", format(maxdate, "%b %e")),
       x = "Week of",
       y = "Number of new cases per 100,000 people")
ggsave(filename = "pen_new_per_week_popnorm.png" , 
       path = paste(getwd(),"/figures/", sep = ""), 
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")


#all WA counties new cases per 100000
ggplot() +
  facet_wrap(county ~ .) +
  geom_rect(data = us_county_data %>% 
              filter(state %in% "Washington",
                     !county %in% important_counties$washington),
            aes(xmin = ymd("2020-01-01"),
                xmax = maxdate + 30,
                ymin = -100,
                ymax = 10000),
            fill = '#f2f2f2',
            alpha = 0.1) +
  geom_line(data = us_county_data %>% 
              filter(state %in% "Washington"),
            aes(x = date,
                y = active_estimate, 
                color = active_estimate),
            size = 1) +
  geom_line(data = us_county_data %>% filter(state %in% "Washington"),
            aes(x = date,
                y = active_estimate),
            color = "Black",
            size = 0.1) +
  theme_bw() +
  scale_x_date(date_breaks = "1 month", date_labels = "%b") +
  scale_color_distiller(palette = "YlOrRd", direction = 1, guide = "none") +
  theme(panel.grid.minor.x = element_blank(), 
        axis.text.x = element_text(angle = 60, hjust = 1),
        legend.position="bottom",
        strip.text = element_text(size = 10),
        panel.spacing = unit(0.1, "lines")) +
  coord_cartesian(xlim = c(min(us_county_data %>% filter(state %in% "Washington") %>% pull(date), na.rm = TRUE), maxdate),
                  ylim = c(0, max(us_county_data %>% filter(state %in% "Washington") %>% pull(active_estimate), na.rm = TRUE))) +
  labs(title = "Estimate of active cases by county",
       subtitle = paste("through", format(maxdate, "%b %e")),
       x = NULL,
       y = "New cases per 100,000 people in previous two weeks")
ggsave(filename = "WA_new_week_person.png" , 
       path = paste(getwd(),"/figures/", sep = ""), 
       device = "png",
       height = 8.5,
       width = 11,
       units = "in")


#new cases in the last two weeks
ggplot() +
  geom_col(data = us_county_data %>% 
             filter(state %in% "Washington",
                    date == maxdate) %>%
             mutate(peninsula = ifelse(county %in% important_counties$washington, T, F)),
           aes(x = reorder(county, active_estimate),
               y = active_estimate,
               fill = peninsula)) +
  scale_fill_manual(values = c('#595959', 'darkgreen'), guide = NULL) +
  theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
  scale_y_continuous(limits = c(0, NA)) +
  labs(title = "Cumulative new cases in the last two weeks",
       subtitle = paste(Sys.Date()-days(14), "-", Sys.Date()),
       x = "County",
       y = "Cumulative new cases in the last two weeks (per 100,000 residents)")
ggsave(filename = "WA_new_twoweek_column.png" , 
       path = paste(getwd(),"/figures/", sep = ""), 
       device = "png",
       height = 8.5,
       width = 11,
       units = "in")


#map latest cases
#make bounding box for maps

buffer <- 0.1
states <- c("Washington", "Wisconsin", "Illinois")
max_county_value <- max(us_county_data %>% filter(state %in% states) %>% pull(active_estimate), na.rm = TRUE)

for(i in 1:length(states)){
  if(file.exists(paste0("data/", states[i], "_basemap.RData"))){
    load(file = paste0("data/", states[i], "_basemap.RData"))
  } else {
    range <- c(left = min(us_county_map %>% filter(state %in% states[i]) %>% pull(long), na.rm = TRUE) - buffer, 
               bottom = min(us_county_map %>% filter(state %in% states[i]) %>% pull(lat), na.rm = TRUE) - buffer, 
               right = max(us_county_map %>% filter(state %in% states[i]) %>% pull(long), na.rm = TRUE) + buffer, 
               top = max(us_county_map %>% filter(state %in% states[i]) %>% pull(lat), na.rm = TRUE) + buffer)
    
    basemap <- ggmap::get_stamenmap(bbox = range,
                                    zoom = 8,
                                    maptype = "toner-hybrid")
    save(basemap, file = paste0("data/", states[i], "_basemap.RData"))
  }
  
  ggmap(basemap) +
    geom_polygon(data = us_county_map %>% filter(state %in% states[i]) %>% arrange(order), 
                 aes(x = long, 
                     y = lat, 
                     group = group, 
                     fill = active_estimate),
                 colour = "grey50",
                 alpha = 0.5) +
    geom_text(data= us_county_label %>% filter(state %in% states[i]), 
              aes(x = long,
                  y = lat,
                  label = str_to_title(county)),  
              size = 2)+
    scale_fill_distiller(palette = "Spectral") +
    theme_bw() +
    scale_x_continuous(breaks = NULL) +
    scale_y_continuous(breaks = NULL) +
    theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(),
          plot.subtitle = element_text(color = "grey50"),
          plot.caption = element_text(color = "grey50"),
          legend.position = "bottom",
          legend.text = element_text(angle = 60, hjust = 1)) +
    labs(title = "Cumulative new cases in the last two weeks",
         subtitle = format(downloaded_dttm, "%B %e"),
         x = NULL,
         y = NULL,
         fill = "Cumulative new cases per\n100,000 residents")
  ggsave(filename = paste0("map_active_", tolower(substr(states[i], 1, 2)), ".png"), 
         path = paste(getwd(),"/figures/", sep = ""), 
         device = "png",
         width = 11,
         height = 8.5,
         units = "in")
  
}

ggplot() +
  geom_polygon(data = us_county_map %>%
                 mutate(active_estimate = ifelse(active_estimate < 0, 0, active_estimate)) %>%
                 arrange(order), 
               aes(x = long, 
                   y = lat, 
                   group = group, 
                   fill = active_estimate),
               colour = NA) +
  scale_fill_gradientn(colors = rev(RColorBrewer::brewer.pal(n = 9, name = "Spectral")), trans = "log10") +
  theme_bw() +
  scale_x_continuous(breaks = NULL) +
  scale_y_continuous(breaks = NULL) +
  theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(),
        plot.subtitle = element_text(color = "grey50"),
        plot.caption = element_text(color = "grey50"),
        legend.position = "bottom",
        legend.text = element_text(angle = 60, hjust = 1)) +
  labs(title = "Cumulative new cases in the last two weeks",
       subtitle = format(downloaded_dttm, "%B %e"),
       x = NULL,
       y = NULL,
       fill = "Cumulative new cases per\n100,000 residents")
ggsave(filename = "map_active_us.png", 
       path = paste(getwd(),"/figures/", sep = ""), 
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

# WI data ----
#Milwaukee county plots
# add dates

noted_dates_wi<- data.frame(event = c("Stay-at-home order", "Restrictions lifted"), 
                            eventdate = c(ymd("2020-03-25"), ymd("2020-05-14")))


# WI plots ----
#graph active cases for select counties
ggplot(us_county_data %>% filter(state %in% "Wisconsin",
                                 county %in% important_counties$wisconsin)) +
  geom_line(data = us_county_data %>% filter(state %in% "Washington",
                                             county %in% c("Clallam", "King")),
            aes(x = date,
                y = active_estimate,
                group = county),
            color = "grey",
            alpha = 0.5) +
  geom_line(aes(x = date,
                y = active_estimate,
                color = county),
            size = 0.75) +
  geom_label_repel(data = us_county_data %>% filter(state %in% "Washington",
                                                    county %in% c("Clallam", "King"),
                                                    date %in% max(date)),
                   aes(x = date + 1,
                       y = active_estimate,
                       label = county),
                   hjust = "outward",
                   direction = "y",
                   size = 2.5,
                   color = "grey",) +
  geom_label_repel(data = us_county_data %>% filter(state %in% "Wisconsin",
                                                    county %in% important_counties$wisconsin,
                                                    date %in% max(date)),
                   aes(x = date + 1,
                       y = active_estimate,
                       label = paste0(county,' - ', round(active_estimate,0)),
                       fill = county),
                   hjust = "outward",
                   direction = "y",
                   size = 2.5,
                   nudge_x = 4,
                   segment.color = "black",
                   segment.size = 0.3) +
  scale_color_brewer(palette = "Set2", guide = "none") +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_x_date(date_breaks = "1 month", 
               date_labels = "%b\n%Y", 
               minor_breaks = "1 week", 
               expand = expansion(mult = c(0.02, 0.1))) +
  scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
  labs(title = "Active case estimate - Wisconsin counties",
       subtitle = paste("Through", format(downloaded_dttm, "%B %e")),
       x = "Date",
       y = "new cases in previous 14 days per 100,000 people",
       color = "County") +
  theme_bw() +
  theme(panel.grid.major.x = element_line(colour="black", size = 0.1), axis.text.x = element_text(angle = 60, hjust = 1)) +
  coord_cartesian(ylim = c(0,NA))
ggsave(filename = "WI_active_cases.png" , 
       path = paste(getwd(),"/figures/", sep = ""), 
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

ggplot() +
  geom_line(data = us_county_data %>% 
              filter(county_state %in% c("Clallam, Washington", "Cook, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", "Alameda, California", 'Salt Lake, Utah')),
            aes(x = date,
                y = active_estimate,
                color = county)) +
  geom_label_repel(data = us_county_data %>% 
                     filter(county_state %in% c("Clallam, Washington", "Cook, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", "Alameda, California", 'Salt Lake, Utah'),
                            date %in% maxdate),
                   aes(x = date + 0.5,
                       y = active_estimate,
                       label = paste0(county,' - ', round(active_estimate,0)),
                       fill = county),
                   hjust = "outward",
                   direction = "y",
                   size = 2.5,
                   nudge_x = 4,
                   box.padding = 0.01,
                   min.segment.length = 0,
                   segment.color = "black",
                   segment.size = 0.1) +
  scale_color_brewer(palette = "Set2", guide = "none") +
  scale_fill_brewer(palette = "Set2", guide = "none") +
  scale_x_date(date_breaks = "1 month", 
               date_labels = "%b\n%Y", 
               minor_breaks = "1 week", 
               expand = expansion(mult = c(0.01, .07))) +
  scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
  labs(title = "Active case estimate",
       subtitle = paste("Through", format(downloaded_dttm, "%B %e")),
       x = "Date",
       y = "new cases in previous 14 days per 100,000 people",
       color = "County",
       caption = "data from The New York Times") +
  theme_bw() +
  theme(panel.grid.major.x = element_line(colour="black", size = 0.1), 
        axis.text.x = element_text(angle = 60, hjust = 1),
        plot.subtitle = element_text(color = "grey50"),
        plot.caption = element_text(color = "grey50")) +
  coord_cartesian(ylim = c(0,NA))
ggsave(filename = "Important_active_cases.png" , 
       path = paste(getwd(),"/figures/", sep = ""), 
       device = "png",
       width = 11,
       height = 8.5,
       units = "in")

#  Animation ####
if(FALSE){
  
  library(gganimate)
  
  anim <- ggplot(data = us_county_data %>% filter(state %in% "Washington"),
                 aes(x = active_estimate,
                     y = deaths/countypop * 100000)) +
    geom_point(aes(size = countypop,
                   color = active_estimate)) +
    scale_color_distiller(palette = "Spectral") +
    labs(title = "Cases and deaths by county in WA",
         subtitle = "Date: {frame_time}",
         x = "Cases per 100,000 residents",
         y = "Deaths per 100,000 residents",
         color = "active case estimate",
         size = "population") +
    transition_time(date) +
    ease_aes('linear')
  
  animate(anim,
          nframes = as.double(difftime(maxdate, ymd("2020_01_21"), units = "days")))
  
  anim_save(filename = "WA_cases_county.gif", 
            animation = last_animation(), 
            path = paste(getwd(),"/figures/", sep = ""), 
            fps = 1, 
            loop = 0,)
} else {
  message("no animation this time")
}

message(difftime(Sys.time(), starttime))
rm(starttime)