2022-02-12 00:19:36 -08:00
|
|
|
starttime <- Sys.time()
|
|
|
|
#setup ----
|
|
|
|
library(tidyverse)
|
|
|
|
library(lubridate)
|
|
|
|
library(RcppRoll)
|
|
|
|
library(ggrepel)
|
|
|
|
|
|
|
|
setwd("~/Documents/dataProjects/COVID")
|
|
|
|
|
|
|
|
if(file.exists("data_download_time.Rda")) {
|
|
|
|
load("data_download_time.Rda")
|
|
|
|
} else {
|
|
|
|
downloaded_dttm <- ymd_hms(paste(max(read_csv(paste0('data/us-counties-', year(Sys.Date()),'.csv'))$date), '00:00:00'))
|
|
|
|
}
|
|
|
|
|
|
|
|
# load data and download new data if needed ----
|
|
|
|
|
|
|
|
us_county_data <- list(NULL)
|
|
|
|
for(year in 2020:year(Sys.Date())){
|
|
|
|
if(year(downloaded_dttm)>year){
|
|
|
|
us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv'))
|
|
|
|
} else {
|
|
|
|
if(as.double(difftime(downloaded_dttm, Sys.time()), units = "hours") < -1){
|
|
|
|
print("Downloading today's data")
|
|
|
|
download.file(url = paste0('https://github.com/nytimes/covid-19-data/raw/master/rolling-averages/us-counties-',year,'.csv'),
|
|
|
|
destfile = paste0('data/us-counties-',year,'.csv'))
|
|
|
|
us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv'))
|
|
|
|
} else {
|
|
|
|
print("data is current")
|
|
|
|
us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv'))
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-03-07 10:39:58 -08:00
|
|
|
downloaded_dttm <- Sys.time()
|
|
|
|
save(downloaded_dttm, file = "data_download_time.Rda")
|
2022-02-12 00:19:36 -08:00
|
|
|
us_county_data <- bind_rows(us_county_data)
|
|
|
|
maxdate <- max(us_county_data$date)
|
|
|
|
|
|
|
|
#load population data
|
|
|
|
us_county_pop <- us_county_pop <- read.csv("data/co-est2020-alldata.csv")
|
|
|
|
us_county_pop['geoid'] <- paste0('USA-',strrep('0', 2 - nchar(us_county_pop$STATE)), us_county_pop$STATE, strrep('0', 3 - nchar(us_county_pop$COUNTY)), us_county_pop$COUNTY)
|
|
|
|
us_county_data['population'] <- us_county_pop[match(us_county_data$geoid,us_county_pop$geoid), 20]
|
|
|
|
|
|
|
|
#get cumulative two week totals
|
|
|
|
us_county_data <- us_county_data %>%
|
|
|
|
complete(date = full_seq(date, period = 1), fill = list(cases = 0)) %>%
|
|
|
|
group_by(geoid) %>%
|
|
|
|
arrange(date) %>%
|
|
|
|
mutate(twowkcases = roll_sumr(cases, n = 7, na.rm = TRUE)/7) %>%
|
|
|
|
mutate(active_estimate = twowkcases/population * 100000)
|
|
|
|
|
|
|
|
#important counties
|
|
|
|
us_county_recent <- us_county_data %>% filter(date %in% maxdate)
|
|
|
|
important_counties <- bind_rows(
|
|
|
|
data.frame(state = 'Washington',
|
|
|
|
county = c("Clallam")),
|
|
|
|
data.frame(state = "Wisconsin",
|
|
|
|
county = c("Milwaukee",
|
|
|
|
"Dane")),
|
|
|
|
data.frame(state = "Illinois",
|
|
|
|
county = c("Lake")),
|
|
|
|
data.frame(state = "Utah",
|
|
|
|
county = c("Salt Lake")))
|
|
|
|
important_counties['geoid'] <- us_county_recent[match(paste(important_counties$county,important_counties$state), paste(us_county_recent$county,us_county_recent$state)), 2]
|
|
|
|
|
|
|
|
nearby_counties <- bind_rows(
|
|
|
|
data.frame(state = 'Washington',
|
|
|
|
county = c("Clallam",
|
|
|
|
'Kitsap',
|
|
|
|
'King',
|
|
|
|
'Jefferson')))
|
|
|
|
nearby_counties['geoid'] <- us_county_recent[match(paste(nearby_counties$county,nearby_counties$state), paste(us_county_recent$county,us_county_recent$state)), 2]
|
|
|
|
|
|
|
|
# make charts ----
|
|
|
|
ggplot(data = us_county_data %>% filter(county == "Clallam")) +
|
|
|
|
geom_line(aes(x = date,
|
|
|
|
y = cases_avg_per_100k),
|
|
|
|
color = 'blue',
|
|
|
|
size = 0.5) +
|
|
|
|
geom_line(aes(x = date,
|
|
|
|
y = active_estimate),
|
|
|
|
color = 'red',
|
|
|
|
size = 0.5) +
|
|
|
|
scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, 0.07))) +
|
|
|
|
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
|
|
|
|
labs(title = "Case rate - nearby counties",
|
|
|
|
subtitle = paste("Through", format(maxdate, "%B %e, %Y")),
|
|
|
|
x = "Date",
|
|
|
|
color = NULL,
|
|
|
|
y = "7-day trailing average new cases per day per 100,000 people",
|
|
|
|
caption = "data from The New York Times") +
|
|
|
|
theme_bw() +
|
|
|
|
theme(panel.grid.major.x = element_line(colour="black", size = 0.1),
|
|
|
|
axis.text.x = element_text(angle = 60, hjust = 1),
|
|
|
|
plot.subtitle = element_text(color = "grey50"),
|
|
|
|
plot.caption = element_text(color = "grey50"),
|
|
|
|
legend.position = "bottom") +
|
|
|
|
coord_cartesian(ylim = c(0,NA),
|
|
|
|
clip = "off")
|
|
|
|
|
|
|
|
|
|
|
|
ggplot() +
|
|
|
|
geom_line(data = us_county_data %>%
|
|
|
|
filter(geoid %in% important_counties$geoid),
|
|
|
|
aes(x = date,
|
|
|
|
y = cases_avg_per_100k,
|
|
|
|
color = county)) +
|
|
|
|
geom_label_repel(data = us_county_data %>%
|
|
|
|
filter(geoid %in% important_counties$geoid,
|
|
|
|
county != c("Jefferson, King, Kitsap"),
|
|
|
|
date %in% maxdate),
|
|
|
|
aes(x = date + 0.5,
|
|
|
|
y = cases_avg_per_100k,
|
|
|
|
label = paste0(county,' - ', round(cases_avg_per_100k,0)),
|
|
|
|
fill = county),
|
|
|
|
hjust = "outward",
|
|
|
|
direction = "y",
|
|
|
|
size = 2.5,
|
|
|
|
nudge_x = 4,
|
|
|
|
box.padding = 0.01,
|
|
|
|
min.segment.length = 0,
|
|
|
|
segment.color = "black",
|
|
|
|
segment.size = 0.1) +
|
|
|
|
scale_color_brewer(palette = "Set2", guide = "none") +
|
|
|
|
scale_fill_brewer(palette = "Set2", guide = "none") +
|
|
|
|
scale_x_date(date_breaks = "1 month",
|
|
|
|
date_labels = "%b\n%Y",
|
|
|
|
minor_breaks = "1 week",
|
|
|
|
expand = expansion(mult = c(0.01, .07))) +
|
|
|
|
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
|
|
|
|
labs(title = "Active case estimate",
|
|
|
|
subtitle = paste("Through", format(maxdate, "%B %e")),
|
|
|
|
x = "Date",
|
|
|
|
y = "7-day trailing average new cases per day per 100,000 people",
|
|
|
|
color = "County",
|
|
|
|
caption = "data from The New York Times") +
|
|
|
|
theme_bw() +
|
|
|
|
theme(panel.grid.major.x = element_line(colour="black", size = 0.1),
|
|
|
|
axis.text.x = element_text(angle = 60, hjust = 1),
|
|
|
|
plot.subtitle = element_text(color = "grey50"),
|
|
|
|
plot.caption = element_text(color = "grey50")) +
|
|
|
|
coord_cartesian(ylim = c(0,NA))
|
|
|
|
|
|
|
|
message(difftime(Sys.time(), starttime))
|
|
|
|
rm(starttime)
|