diff --git a/COVID.R b/COVID.R index 5ebad87..6d0ca5a 100644 --- a/COVID.R +++ b/COVID.R @@ -416,12 +416,12 @@ ggsave(filename = "WI_active_cases.png" , ggplot() + geom_line(data = us_county_data %>% - filter(county_state %in% c("Clallam, Washington", "Cook, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah')), + filter(county_state %in% c("Clallam, Washington", "Lake, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah')), aes(x = date, y = active_estimate, color = county)) + geom_label_repel(data = us_county_data %>% - filter(county_state %in% c("Clallam, Washington", "Cook, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah'), + filter(county_state %in% c("Clallam, Washington", "Lake, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah'), date %in% maxdate), aes(x = date + 0.5, y = active_estimate, diff --git a/COVID_new.R b/COVID_new.R new file mode 100644 index 0000000..602b8bd --- /dev/null +++ b/COVID_new.R @@ -0,0 +1,145 @@ +starttime <- Sys.time() +#setup ---- +library(tidyverse) +library(lubridate) +library(RcppRoll) +library(ggrepel) + +setwd("~/Documents/dataProjects/COVID") + +if(file.exists("data_download_time.Rda")) { + load("data_download_time.Rda") +} else { + downloaded_dttm <- ymd_hms(paste(max(read_csv(paste0('data/us-counties-', year(Sys.Date()),'.csv'))$date), '00:00:00')) +} + +# load data and download new data if needed ---- + +us_county_data <- list(NULL) +for(year in 2020:year(Sys.Date())){ + if(year(downloaded_dttm)>year){ + us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv')) + } else { + if(as.double(difftime(downloaded_dttm, Sys.time()), units = "hours") < -1){ + print("Downloading today's data") + download.file(url = paste0('https://github.com/nytimes/covid-19-data/raw/master/rolling-averages/us-counties-',year,'.csv'), + destfile = paste0('data/us-counties-',year,'.csv')) + us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv')) + downloaded_dttm <- Sys.time() + save(downloaded_dttm, file = "data_download_time.Rda") + } else { + print("data is current") + us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv')) + } + } +} +us_county_data <- bind_rows(us_county_data) +maxdate <- max(us_county_data$date) + +#load population data +us_county_pop <- us_county_pop <- read.csv("data/co-est2020-alldata.csv") +us_county_pop['geoid'] <- paste0('USA-',strrep('0', 2 - nchar(us_county_pop$STATE)), us_county_pop$STATE, strrep('0', 3 - nchar(us_county_pop$COUNTY)), us_county_pop$COUNTY) +us_county_data['population'] <- us_county_pop[match(us_county_data$geoid,us_county_pop$geoid), 20] + +#get cumulative two week totals +us_county_data <- us_county_data %>% + complete(date = full_seq(date, period = 1), fill = list(cases = 0)) %>% + group_by(geoid) %>% + arrange(date) %>% + mutate(twowkcases = roll_sumr(cases, n = 7, na.rm = TRUE)/7) %>% + mutate(active_estimate = twowkcases/population * 100000) + +#important counties +us_county_recent <- us_county_data %>% filter(date %in% maxdate) +important_counties <- bind_rows( + data.frame(state = 'Washington', + county = c("Clallam")), + data.frame(state = "Wisconsin", + county = c("Milwaukee", + "Dane")), + data.frame(state = "Illinois", + county = c("Lake")), + data.frame(state = "Utah", + county = c("Salt Lake"))) +important_counties['geoid'] <- us_county_recent[match(paste(important_counties$county,important_counties$state), paste(us_county_recent$county,us_county_recent$state)), 2] + +nearby_counties <- bind_rows( + data.frame(state = 'Washington', + county = c("Clallam", + 'Kitsap', + 'King', + 'Jefferson'))) +nearby_counties['geoid'] <- us_county_recent[match(paste(nearby_counties$county,nearby_counties$state), paste(us_county_recent$county,us_county_recent$state)), 2] + +# make charts ---- +ggplot(data = us_county_data %>% filter(county == "Clallam")) + + geom_line(aes(x = date, + y = cases_avg_per_100k), + color = 'blue', + size = 0.5) + + geom_line(aes(x = date, + y = active_estimate), + color = 'red', + size = 0.5) + + scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, 0.07))) + + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + + labs(title = "Case rate - nearby counties", + subtitle = paste("Through", format(maxdate, "%B %e, %Y")), + x = "Date", + color = NULL, + y = "7-day trailing average new cases per day per 100,000 people", + caption = "data from The New York Times") + + theme_bw() + + theme(panel.grid.major.x = element_line(colour="black", size = 0.1), + axis.text.x = element_text(angle = 60, hjust = 1), + plot.subtitle = element_text(color = "grey50"), + plot.caption = element_text(color = "grey50"), + legend.position = "bottom") + + coord_cartesian(ylim = c(0,NA), + clip = "off") + + +ggplot() + + geom_line(data = us_county_data %>% + filter(geoid %in% important_counties$geoid), + aes(x = date, + y = cases_avg_per_100k, + color = county)) + + geom_label_repel(data = us_county_data %>% + filter(geoid %in% important_counties$geoid, + county != c("Jefferson, King, Kitsap"), + date %in% maxdate), + aes(x = date + 0.5, + y = cases_avg_per_100k, + label = paste0(county,' - ', round(cases_avg_per_100k,0)), + fill = county), + hjust = "outward", + direction = "y", + size = 2.5, + nudge_x = 4, + box.padding = 0.01, + min.segment.length = 0, + segment.color = "black", + segment.size = 0.1) + + scale_color_brewer(palette = "Set2", guide = "none") + + scale_fill_brewer(palette = "Set2", guide = "none") + + scale_x_date(date_breaks = "1 month", + date_labels = "%b\n%Y", + minor_breaks = "1 week", + expand = expansion(mult = c(0.01, .07))) + + scale_y_continuous(expand = expansion(mult = c(0,0.1))) + + labs(title = "Active case estimate", + subtitle = paste("Through", format(maxdate, "%B %e")), + x = "Date", + y = "7-day trailing average new cases per day per 100,000 people", + color = "County", + caption = "data from The New York Times") + + theme_bw() + + theme(panel.grid.major.x = element_line(colour="black", size = 0.1), + axis.text.x = element_text(angle = 60, hjust = 1), + plot.subtitle = element_text(color = "grey50"), + plot.caption = element_text(color = "grey50")) + + coord_cartesian(ylim = c(0,NA)) + +message(difftime(Sys.time(), starttime)) +rm(starttime) diff --git a/figures/select_active_cases.png b/figures/select_active_cases.png index 3f110fe..c27994c 100644 Binary files a/figures/select_active_cases.png and b/figures/select_active_cases.png differ