starttime <- Sys.time() #setup ---- library(tidyverse) library(lubridate) library(RcppRoll) library(ggrepel) setwd("~/Documents/dataProjects/COVID") if(file.exists("data_download_time.Rda")) { load("data_download_time.Rda") } else { downloaded_dttm <- ymd_hms(paste(max(read_csv(paste0('data/us-counties-', year(Sys.Date()),'.csv'))$date), '00:00:00')) } # load data and download new data if needed ---- us_county_data <- list(NULL) for(year in 2020:year(Sys.Date())){ if(year(downloaded_dttm)>year){ us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv')) } else { if(as.double(difftime(downloaded_dttm, Sys.time()), units = "hours") < -1){ print("Downloading today's data") download.file(url = paste0('https://github.com/nytimes/covid-19-data/raw/master/rolling-averages/us-counties-',year,'.csv'), destfile = paste0('data/us-counties-',year,'.csv')) us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv')) downloaded_dttm <- Sys.time() save(downloaded_dttm, file = "data_download_time.Rda") } else { print("data is current") us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv')) } } } us_county_data <- bind_rows(us_county_data) maxdate <- max(us_county_data$date) #load population data us_county_pop <- us_county_pop <- read.csv("data/co-est2020-alldata.csv") us_county_pop['geoid'] <- paste0('USA-',strrep('0', 2 - nchar(us_county_pop$STATE)), us_county_pop$STATE, strrep('0', 3 - nchar(us_county_pop$COUNTY)), us_county_pop$COUNTY) us_county_data['population'] <- us_county_pop[match(us_county_data$geoid,us_county_pop$geoid), 20] #get cumulative two week totals us_county_data <- us_county_data %>% complete(date = full_seq(date, period = 1), fill = list(cases = 0)) %>% group_by(geoid) %>% arrange(date) %>% mutate(twowkcases = roll_sumr(cases, n = 7, na.rm = TRUE)/7) %>% mutate(active_estimate = twowkcases/population * 100000) #important counties us_county_recent <- us_county_data %>% filter(date %in% maxdate) important_counties <- bind_rows( data.frame(state = 'Washington', county = c("Clallam")), data.frame(state = "Wisconsin", county = c("Milwaukee", "Dane")), data.frame(state = "Illinois", county = c("Lake")), data.frame(state = "Utah", county = c("Salt Lake"))) important_counties['geoid'] <- us_county_recent[match(paste(important_counties$county,important_counties$state), paste(us_county_recent$county,us_county_recent$state)), 2] nearby_counties <- bind_rows( data.frame(state = 'Washington', county = c("Clallam", 'Kitsap', 'King', 'Jefferson'))) nearby_counties['geoid'] <- us_county_recent[match(paste(nearby_counties$county,nearby_counties$state), paste(us_county_recent$county,us_county_recent$state)), 2] # make charts ---- ggplot(data = us_county_data %>% filter(county == "Clallam")) + geom_line(aes(x = date, y = cases_avg_per_100k), color = 'blue', size = 0.5) + geom_line(aes(x = date, y = active_estimate), color = 'red', size = 0.5) + scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, 0.07))) + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + labs(title = "Case rate - nearby counties", subtitle = paste("Through", format(maxdate, "%B %e, %Y")), x = "Date", color = NULL, y = "7-day trailing average new cases per day per 100,000 people", caption = "data from The New York Times") + theme_bw() + theme(panel.grid.major.x = element_line(colour="black", size = 0.1), axis.text.x = element_text(angle = 60, hjust = 1), plot.subtitle = element_text(color = "grey50"), plot.caption = element_text(color = "grey50"), legend.position = "bottom") + coord_cartesian(ylim = c(0,NA), clip = "off") ggplot() + geom_line(data = us_county_data %>% filter(geoid %in% important_counties$geoid), aes(x = date, y = cases_avg_per_100k, color = county)) + geom_label_repel(data = us_county_data %>% filter(geoid %in% important_counties$geoid, county != c("Jefferson, King, Kitsap"), date %in% maxdate), aes(x = date + 0.5, y = cases_avg_per_100k, label = paste0(county,' - ', round(cases_avg_per_100k,0)), fill = county), hjust = "outward", direction = "y", size = 2.5, nudge_x = 4, box.padding = 0.01, min.segment.length = 0, segment.color = "black", segment.size = 0.1) + scale_color_brewer(palette = "Set2", guide = "none") + scale_fill_brewer(palette = "Set2", guide = "none") + scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, .07))) + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + labs(title = "Active case estimate", subtitle = paste("Through", format(maxdate, "%B %e")), x = "Date", y = "7-day trailing average new cases per day per 100,000 people", color = "County", caption = "data from The New York Times") + theme_bw() + theme(panel.grid.major.x = element_line(colour="black", size = 0.1), axis.text.x = element_text(angle = 60, hjust = 1), plot.subtitle = element_text(color = "grey50"), plot.caption = element_text(color = "grey50")) + coord_cartesian(ylim = c(0,NA)) message(difftime(Sys.time(), starttime)) rm(starttime)