COVID/COVID_new.R

146 lines
5.9 KiB
R
Raw Normal View History

starttime <- Sys.time()
#setup ----
library(tidyverse)
library(lubridate)
library(RcppRoll)
library(ggrepel)
setwd("~/Documents/dataProjects/COVID")
if(file.exists("data_download_time.Rda")) {
load("data_download_time.Rda")
} else {
downloaded_dttm <- ymd_hms(paste(max(read_csv(paste0('data/us-counties-', year(Sys.Date()),'.csv'))$date), '00:00:00'))
}
# load data and download new data if needed ----
us_county_data <- list(NULL)
for(year in 2020:year(Sys.Date())){
if(year(downloaded_dttm)>year){
us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv'))
} else {
if(as.double(difftime(downloaded_dttm, Sys.time()), units = "hours") < -1){
print("Downloading today's data")
download.file(url = paste0('https://github.com/nytimes/covid-19-data/raw/master/rolling-averages/us-counties-',year,'.csv'),
destfile = paste0('data/us-counties-',year,'.csv'))
us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv'))
} else {
print("data is current")
us_county_data[[year]] <- read_csv(paste0('data/us-counties-',year,'.csv'))
}
}
}
downloaded_dttm <- Sys.time()
save(downloaded_dttm, file = "data_download_time.Rda")
us_county_data <- bind_rows(us_county_data)
maxdate <- max(us_county_data$date)
#load population data
us_county_pop <- us_county_pop <- read.csv("data/co-est2020-alldata.csv")
us_county_pop['geoid'] <- paste0('USA-',strrep('0', 2 - nchar(us_county_pop$STATE)), us_county_pop$STATE, strrep('0', 3 - nchar(us_county_pop$COUNTY)), us_county_pop$COUNTY)
us_county_data['population'] <- us_county_pop[match(us_county_data$geoid,us_county_pop$geoid), 20]
#get cumulative two week totals
us_county_data <- us_county_data %>%
complete(date = full_seq(date, period = 1), fill = list(cases = 0)) %>%
group_by(geoid) %>%
arrange(date) %>%
mutate(twowkcases = roll_sumr(cases, n = 7, na.rm = TRUE)/7) %>%
mutate(active_estimate = twowkcases/population * 100000)
#important counties
us_county_recent <- us_county_data %>% filter(date %in% maxdate)
important_counties <- bind_rows(
data.frame(state = 'Washington',
county = c("Clallam")),
data.frame(state = "Wisconsin",
county = c("Milwaukee",
"Dane")),
data.frame(state = "Illinois",
county = c("Lake")),
data.frame(state = "Utah",
county = c("Salt Lake")))
important_counties['geoid'] <- us_county_recent[match(paste(important_counties$county,important_counties$state), paste(us_county_recent$county,us_county_recent$state)), 2]
nearby_counties <- bind_rows(
data.frame(state = 'Washington',
county = c("Clallam",
'Kitsap',
'King',
'Jefferson')))
nearby_counties['geoid'] <- us_county_recent[match(paste(nearby_counties$county,nearby_counties$state), paste(us_county_recent$county,us_county_recent$state)), 2]
# make charts ----
ggplot(data = us_county_data %>% filter(county == "Clallam")) +
geom_line(aes(x = date,
y = cases_avg_per_100k),
color = 'blue',
size = 0.5) +
geom_line(aes(x = date,
y = active_estimate),
color = 'red',
size = 0.5) +
scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, 0.07))) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
labs(title = "Case rate - nearby counties",
subtitle = paste("Through", format(maxdate, "%B %e, %Y")),
x = "Date",
color = NULL,
y = "7-day trailing average new cases per day per 100,000 people",
caption = "data from The New York Times") +
theme_bw() +
theme(panel.grid.major.x = element_line(colour="black", size = 0.1),
axis.text.x = element_text(angle = 60, hjust = 1),
plot.subtitle = element_text(color = "grey50"),
plot.caption = element_text(color = "grey50"),
legend.position = "bottom") +
coord_cartesian(ylim = c(0,NA),
clip = "off")
ggplot() +
geom_line(data = us_county_data %>%
filter(geoid %in% important_counties$geoid),
aes(x = date,
y = cases_avg_per_100k,
color = county)) +
geom_label_repel(data = us_county_data %>%
filter(geoid %in% important_counties$geoid,
county != c("Jefferson, King, Kitsap"),
date %in% maxdate),
aes(x = date + 0.5,
y = cases_avg_per_100k,
label = paste0(county,' - ', round(cases_avg_per_100k,0)),
fill = county),
hjust = "outward",
direction = "y",
size = 2.5,
nudge_x = 4,
box.padding = 0.01,
min.segment.length = 0,
segment.color = "black",
segment.size = 0.1) +
scale_color_brewer(palette = "Set2", guide = "none") +
scale_fill_brewer(palette = "Set2", guide = "none") +
scale_x_date(date_breaks = "1 month",
date_labels = "%b\n%Y",
minor_breaks = "1 week",
expand = expansion(mult = c(0.01, .07))) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
labs(title = "Active case estimate",
subtitle = paste("Through", format(maxdate, "%B %e")),
x = "Date",
y = "7-day trailing average new cases per day per 100,000 people",
color = "County",
caption = "data from The New York Times") +
theme_bw() +
theme(panel.grid.major.x = element_line(colour="black", size = 0.1),
axis.text.x = element_text(angle = 60, hjust = 1),
plot.subtitle = element_text(color = "grey50"),
plot.caption = element_text(color = "grey50")) +
coord_cartesian(ylim = c(0,NA))
message(difftime(Sys.time(), starttime))
rm(starttime)