starttime <- Sys.time() #setup ---- library(tidyverse) library(lubridate) library(ggmap) library(ggrepel) setwd("~/Documents/dataProjects/COVID") #download data ---- if(file.exists("data_download_time.Rda")) { load("data_download_time.Rda") } else { downloaded_dttm <- ymd_hms('1900_01_01:00:00:00') } if(as.double(difftime(downloaded_dttm, Sys.time()), units = "hours") < -2){ print("Downloading today's data") us_county_data <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv") save(us_county_data, file = 'data/us_county_data.Rda') downloaded_dttm <- Sys.time() save(downloaded_dttm, file = "data_download_time.Rda") } else { print("data is current") load('data/us_county_data.Rda') } ##to redownload data, uncomment the following 2 lines #downloaded_dttm <- downloaded_dttm - days(1) #save(downloaded_dttm, file = "data_download_time.Rda") #us_county_data <- read.csv("data/us-counties.csv") #cleanup data ---- #make dates in a date format us_county_data <- us_county_data %>% mutate(date = ymd(date)) %>% mutate(week = floor_date(date, "weeks", week_start = 1)) maxdate <- max(us_county_data$date) #load county pop data us_county_pop <- read.csv("data/co-est2020-alldata.csv") us_county_pop <- us_county_pop %>% mutate(state = STNAME, county = ifelse(state != "Louisiana", sub(" County.*", "", CTYNAME), sub(" Parish.*", "", CTYNAME))) %>% mutate(county_state = paste0(county, ", ", state)) us_county_data["countypop"] <- us_county_pop[match(paste0(us_county_data$county, ", ", us_county_data$state), us_county_pop$county_state), 13] us_county_data <- us_county_data %>% group_by(state, county) %>% arrange(date) %>% mutate(county_state = paste0(county,", ", state), newcases = cases - lag(cases, n = 1, default = 0), twowkcases = cases - lag(cases, n = 14, default = 0)) %>% mutate(active_estimate = twowkcases/countypop * 100000) important_counties <- list(washington = c("Clallam", "Jefferson", "Kitsap", "King"), wisconsin = c("Milwaukee", "Dane", "Vilas", "Brown", "Door"), illinois = c("Cook")) noted_dates_wa<- data.frame(event = c("Stay-at-home begins", "Phase 1 reopening", "Phase 2 reopening"), eventdate <- c(ymd("2020-03-23"), ymd("2020-04-29"), ymd("2020-06-01"))) us_county_map <- map_data("county") us_county_map["county_state"] <- paste0(us_county_map$subregion, ", ", us_county_map$region) us_county_map <- merge(us_county_map, us_county_data %>% filter(date %in% maxdate) %>% mutate(county_state = tolower(county_state)), by.x = "county_state", by.y = "county_state", all = TRUE) us_county_label <- us_county_map %>% group_by(state, county) %>% summarise(long = (max(long)+min(long))/2, lat = (max(lat)+min(lat))/2) %>% ungroup() #Washington plots ---- #graph nearby active cases ggplot(data = us_county_data %>% filter(state %in% "Washington", county %in% important_counties$washington)) + geom_line(aes(x = date, y = twowkcases/countypop * 100000, color = county, alpha = ifelse(county == "Clallam", 1, 1)), size = 0.5) + geom_label(data = us_county_data %>% filter(state %in% "Washington", county %in% important_counties$washington, date %in% max(date)), aes(x = date + 1, y = twowkcases/countypop * 100000, label = paste0(county,' - ', round(active_estimate,0)), fill = county), hjust = "outward", size = 2.5) + scale_color_brewer(palette = "Set2") + scale_fill_brewer(palette = "Set2", guide = "none") + scale_alpha_continuous(limits = c(0, 1), guide = "none") + scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, 0.07))) + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + labs(title = "Case rate - nearby counties", subtitle = paste("Through", format(downloaded_dttm, "%B %e, %Y")), x = "Date", color = NULL, y = "new cases in previous 14 days per 100,000 people", caption = "data from The New York Times") + theme_bw() + theme(panel.grid.major.x = element_line(colour="black", size = 0.1), axis.text.x = element_text(angle = 60, hjust = 1), plot.subtitle = element_text(color = "grey50"), plot.caption = element_text(color = "grey50"), legend.position = "bottom") + coord_cartesian(ylim = c(0,NA), clip = "off") ggsave(filename = "nearby_caserate.png" , path = paste(getwd(),"/figures/", sep = ""), device = "png", width = 11, height = 8.5, units = "in") #plot new cases per week ggplot() + facet_grid(county ~ .) + geom_col(data = us_county_data %>% filter(state %in% "Washington", county %in% important_counties$washington, county != "King"), aes(x = week, y = newcases, fill = county), position = "dodge") + theme_bw() + scale_fill_brewer(palette = "Set2") + scale_x_date(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(add = c(7,0), mult = 0)) + scale_y_continuous(limits = c(0,NA), expand = expansion(mult = c(0,0.1))) + theme(panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1)) + labs(title = "New cases of COVID-19 per week", subtitle = paste("through", format(maxdate, "%b %e")), x = "Week of", y = "Number of new cases") ggsave(filename = "pen_new_per_week.png" , path = paste(getwd(),"/figures/", sep = ""), device = "png", width = 11, height = 8.5, units = "in") #plot new cases per week per 100000 people ggplot() + facet_grid(county ~ .) + geom_col(data = us_county_data %>% filter(state %in% "Washington", county %in% important_counties$washington, county != "King"), aes(x = week, y = newcases/countypop, fill = county), position = "dodge") + theme_bw() + scale_fill_brewer(palette = "Set2") + scale_x_date(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(add = c(7,0), mult = 0)) + scale_y_continuous(limits = c(0,NA), expand = expansion(mult = c(0,0.1))) + theme(panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1)) + labs(title = "New cases of COVID-19 per week - population normalized", subtitle = paste("through", format(maxdate, "%b %e")), x = "Week of", y = "Number of new cases per 100,000 people") ggsave(filename = "pen_new_per_week_popnorm.png" , path = paste(getwd(),"/figures/", sep = ""), device = "png", width = 11, height = 8.5, units = "in") #all WA counties new cases per 100000 ggplot() + facet_wrap(county ~ .) + geom_rect(data = us_county_data %>% filter(state %in% "Washington", !county %in% important_counties$washington), aes(xmin = ymd("2020-01-01"), xmax = maxdate + 30, ymin = -100, ymax = 10000), fill = '#f2f2f2', alpha = 0.1) + geom_line(data = us_county_data %>% filter(state %in% "Washington"), aes(x = date, y = active_estimate, color = active_estimate), size = 1) + geom_line(data = us_county_data %>% filter(state %in% "Washington"), aes(x = date, y = active_estimate), color = "Black", size = 0.1) + theme_bw() + scale_x_date(date_breaks = "1 month", date_labels = "%b") + scale_color_distiller(palette = "YlOrRd", direction = 1, guide = "none") + theme(panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1), legend.position="bottom", strip.text = element_text(size = 10), panel.spacing = unit(0.1, "lines")) + coord_cartesian(xlim = c(min(us_county_data %>% filter(state %in% "Washington") %>% pull(date), na.rm = TRUE), maxdate), ylim = c(0, max(us_county_data %>% filter(state %in% "Washington") %>% pull(active_estimate), na.rm = TRUE))) + labs(title = "Estimate of active cases by county", subtitle = paste("through", format(maxdate, "%b %e")), x = NULL, y = "New cases per 100,000 people in previous two weeks") ggsave(filename = "WA_new_week_person.png" , path = paste(getwd(),"/figures/", sep = ""), device = "png", height = 8.5, width = 11, units = "in") #new cases in the last two weeks ggplot() + geom_col(data = us_county_data %>% filter(state %in% "Washington", date == maxdate) %>% mutate(peninsula = ifelse(county %in% important_counties$washington, T, F)), aes(x = reorder(county, active_estimate), y = active_estimate, fill = peninsula)) + scale_fill_manual(values = c('#595959', 'darkgreen'), guide = NULL) + theme(axis.text.x = element_text(angle = 60, hjust = 1)) + scale_y_continuous(limits = c(0, NA)) + labs(title = "Cumulative new cases in the last two weeks", subtitle = paste(Sys.Date()-days(14), "-", Sys.Date()), x = "County", y = "Cumulative new cases in the last two weeks (per 100,000 residents)") ggsave(filename = "WA_new_twoweek_column.png" , path = paste(getwd(),"/figures/", sep = ""), device = "png", height = 8.5, width = 11, units = "in") #map latest cases #make bounding box for maps buffer <- 0.1 states <- c("Washington", "Wisconsin", "Illinois") max_county_value <- max(us_county_data %>% filter(state %in% states) %>% pull(active_estimate), na.rm = TRUE) for(i in 1:length(states)){ if(file.exists(paste0("data/", states[i], "_basemap.RData"))){ load(file = paste0("data/", states[i], "_basemap.RData")) } else { range <- c(left = min(us_county_map %>% filter(state %in% states[i]) %>% pull(long), na.rm = TRUE) - buffer, bottom = min(us_county_map %>% filter(state %in% states[i]) %>% pull(lat), na.rm = TRUE) - buffer, right = max(us_county_map %>% filter(state %in% states[i]) %>% pull(long), na.rm = TRUE) + buffer, top = max(us_county_map %>% filter(state %in% states[i]) %>% pull(lat), na.rm = TRUE) + buffer) basemap <- ggmap::get_stamenmap(bbox = range, zoom = 8, maptype = "toner-hybrid") save(basemap, file = paste0("data/", states[i], "_basemap.RData")) } ggmap(basemap) + geom_polygon(data = us_county_map %>% filter(state %in% states[i]) %>% arrange(order), aes(x = long, y = lat, group = group, fill = active_estimate), colour = "grey50", alpha = 0.5) + geom_text(data= us_county_label %>% filter(state %in% states[i]), aes(x = long, y = lat, label = str_to_title(county)), size = 2)+ scale_fill_distiller(palette = "Spectral") + theme_bw() + scale_x_continuous(breaks = NULL) + scale_y_continuous(breaks = NULL) + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), plot.subtitle = element_text(color = "grey50"), plot.caption = element_text(color = "grey50"), legend.position = "bottom", legend.text = element_text(angle = 60, hjust = 1)) + labs(title = "Cumulative new cases in the last two weeks", subtitle = format(downloaded_dttm, "%B %e"), x = NULL, y = NULL, fill = "Cumulative new cases per\n100,000 residents") ggsave(filename = paste0("map_active_", tolower(substr(states[i], 1, 2)), ".png"), path = paste(getwd(),"/figures/", sep = ""), device = "png", width = 11, height = 8.5, units = "in") } ggplot() + geom_polygon(data = us_county_map %>% mutate(active_estimate = ifelse(active_estimate < 0, 0, active_estimate)) %>% arrange(order), aes(x = long, y = lat, group = group, fill = active_estimate), colour = NA) + scale_fill_gradientn(colors = rev(RColorBrewer::brewer.pal(n = 9, name = "Spectral")), trans = "log10") + theme_bw() + scale_x_continuous(breaks = NULL) + scale_y_continuous(breaks = NULL) + theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(), plot.subtitle = element_text(color = "grey50"), plot.caption = element_text(color = "grey50"), legend.position = "bottom", legend.text = element_text(angle = 60, hjust = 1)) + labs(title = "Cumulative new cases in the last two weeks", subtitle = format(downloaded_dttm, "%B %e"), x = NULL, y = NULL, fill = "Cumulative new cases per\n100,000 residents") ggsave(filename = "map_active_us.png", path = paste(getwd(),"/figures/", sep = ""), device = "png", width = 11, height = 8.5, units = "in") # WI data ---- #Milwaukee county plots # add dates noted_dates_wi<- data.frame(event = c("Stay-at-home order", "Restrictions lifted"), eventdate = c(ymd("2020-03-25"), ymd("2020-05-14"))) # WI plots ---- #graph active cases for select counties ggplot(us_county_data %>% filter(state %in% "Wisconsin", county %in% important_counties$wisconsin)) + geom_line(data = us_county_data %>% filter(state %in% "Washington", county %in% c("Clallam", "King")), aes(x = date, y = active_estimate, group = county), color = "grey", alpha = 0.5) + geom_line(aes(x = date, y = active_estimate, color = county), size = 0.75) + geom_label_repel(data = us_county_data %>% filter(state %in% "Washington", county %in% c("Clallam", "King"), date %in% max(date)), aes(x = date + 1, y = active_estimate, label = county), hjust = "outward", direction = "y", size = 2.5, color = "grey",) + geom_label_repel(data = us_county_data %>% filter(state %in% "Wisconsin", county %in% important_counties$wisconsin, date %in% max(date)), aes(x = date + 1, y = active_estimate, label = paste0(county,' - ', round(active_estimate,0)), fill = county), hjust = "outward", direction = "y", size = 2.5, nudge_x = 4, segment.color = "black", segment.size = 0.3) + scale_color_brewer(palette = "Set2", guide = "none") + scale_fill_brewer(palette = "Set2", guide = "none") + scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.02, 0.1))) + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + labs(title = "Active case estimate - Wisconsin counties", subtitle = paste("Through", format(downloaded_dttm, "%B %e")), x = "Date", y = "new cases in previous 14 days per 100,000 people", color = "County") + theme_bw() + theme(panel.grid.major.x = element_line(colour="black", size = 0.1), axis.text.x = element_text(angle = 60, hjust = 1)) + coord_cartesian(ylim = c(0,NA)) ggsave(filename = "WI_active_cases.png" , path = paste(getwd(),"/figures/", sep = ""), device = "png", width = 11, height = 8.5, units = "in") ggplot() + geom_line(data = us_county_data %>% filter(county_state %in% c("Clallam, Washington", "Lake, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah')), aes(x = date, y = active_estimate, color = county)) + geom_label_repel(data = us_county_data %>% filter(county_state %in% c("Clallam, Washington", "Lake, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah'), date %in% maxdate), aes(x = date + 0.5, y = active_estimate, label = paste0(county,' - ', round(active_estimate,0)), fill = county), hjust = "outward", direction = "y", size = 2.5, nudge_x = 4, box.padding = 0.01, min.segment.length = 0, segment.color = "black", segment.size = 0.1) + scale_color_brewer(palette = "Set2", guide = "none") + scale_fill_brewer(palette = "Set2", guide = "none") + scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, .07))) + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + labs(title = "Active case estimate", subtitle = paste("Through", format(downloaded_dttm, "%B %e")), x = "Date", y = "new cases in previous 14 days per 100,000 people", color = "County", caption = "data from The New York Times") + theme_bw() + theme(panel.grid.major.x = element_line(colour="black", size = 0.1), axis.text.x = element_text(angle = 60, hjust = 1), plot.subtitle = element_text(color = "grey50"), plot.caption = element_text(color = "grey50")) + coord_cartesian(ylim = c(0,NA)) ggsave(filename = "select_active_cases.png" , path = paste(getwd(),"/figures/", sep = ""), device = "png", width = 11, height = 8.5, units = "in") # Animation #### if(FALSE){ library(gganimate) anim <- ggplot(data = us_county_data %>% filter(state %in% "Washington"), aes(x = active_estimate, y = deaths/countypop * 100000)) + geom_point(aes(size = countypop, color = active_estimate)) + scale_color_distiller(palette = "Spectral") + labs(title = "Cases and deaths by county in WA", subtitle = "Date: {frame_time}", x = "Cases per 100,000 residents", y = "Deaths per 100,000 residents", color = "active case estimate", size = "population") + transition_time(date) + ease_aes('linear') animate(anim, nframes = as.double(difftime(maxdate, ymd("2020_01_21"), units = "days"))) anim_save(filename = "WA_cases_county.gif", animation = last_animation(), path = paste(getwd(),"/figures/", sep = ""), fps = 1, loop = 0,) } else { message("no animation this time") } message(difftime(Sys.time(), starttime)) rm(starttime)