COVID/COVID.R
Ben Varick 308eeeda22
updated figures
Signed-off-by: Ben Varick <ben@dendroalsia.net>
2023-03-07 10:40:01 -08:00

498 lines
20 KiB
R

starttime <- Sys.time()
#setup ----
library(tidyverse)
library(lubridate)
library(ggmap)
library(ggrepel)
setwd("~/Documents/dataProjects/COVID")
#download data ----
if(file.exists("data_download_time.Rda")) {
load("data_download_time.Rda")
} else {
downloaded_dttm <- ymd_hms('1900_01_01:00:00:00')
}
if(as.double(difftime(downloaded_dttm, Sys.time()), units = "hours") < -2){
print("Downloading today's data")
us_county_data <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")
save(us_county_data, file = 'data/us_county_data.Rda')
downloaded_dttm <- Sys.time()
save(downloaded_dttm, file = "data_download_time.Rda")
} else {
print("data is current")
load('data/us_county_data.Rda')
}
##to redownload data, uncomment the following 2 lines
#downloaded_dttm <- downloaded_dttm - days(1)
#save(downloaded_dttm, file = "data_download_time.Rda")
#us_county_data <- read.csv("data/us-counties.csv")
#cleanup data ----
#make dates in a date format
us_county_data <- us_county_data %>%
mutate(date = ymd(date)) %>%
mutate(week = floor_date(date, "weeks", week_start = 1))
maxdate <- max(us_county_data$date)
#load county pop data
us_county_pop <- read.csv("data/co-est2020-alldata.csv")
us_county_pop <- us_county_pop %>%
mutate(state = STNAME,
county = ifelse(state != "Louisiana", sub(" County.*", "", CTYNAME), sub(" Parish.*", "", CTYNAME))) %>%
mutate(county_state = paste0(county, ", ", state))
us_county_data["countypop"] <- us_county_pop[match(paste0(us_county_data$county, ", ", us_county_data$state), us_county_pop$county_state), 13]
us_county_data <- us_county_data %>%
group_by(state, county) %>%
arrange(date) %>%
mutate(county_state = paste0(county,", ", state),
newcases = cases - lag(cases, n = 1, default = 0),
twowkcases = cases - lag(cases, n = 14, default = 0)) %>%
mutate(active_estimate = twowkcases/countypop * 100000)
important_counties <- list(washington = c("Clallam",
"Jefferson",
"Kitsap",
"King"),
wisconsin = c("Milwaukee",
"Dane",
"Vilas",
"Brown",
"Door"),
illinois = c("Cook"))
noted_dates_wa<- data.frame(event = c("Stay-at-home begins", "Phase 1 reopening", "Phase 2 reopening"),
eventdate <- c(ymd("2020-03-23"), ymd("2020-04-29"), ymd("2020-06-01")))
us_county_map <- map_data("county")
us_county_map["county_state"] <- paste0(us_county_map$subregion, ", ", us_county_map$region)
us_county_map <- merge(us_county_map,
us_county_data %>%
filter(date %in% maxdate) %>%
mutate(county_state = tolower(county_state)),
by.x = "county_state",
by.y = "county_state",
all = TRUE)
us_county_label <- us_county_map %>%
group_by(state, county) %>%
summarise(long = (max(long)+min(long))/2, lat = (max(lat)+min(lat))/2) %>%
ungroup()
#Washington plots ----
#graph nearby active cases
ggplot(data = us_county_data %>% filter(state %in% "Washington",
county %in% important_counties$washington)) +
geom_line(aes(x = date,
y = twowkcases/countypop * 100000,
color = county,
alpha = ifelse(county == "Clallam", 1, 1)),
size = 0.5) +
geom_label(data = us_county_data %>% filter(state %in% "Washington",
county %in% important_counties$washington,
date %in% max(date)),
aes(x = date + 1,
y = twowkcases/countypop * 100000,
label = paste0(county,' - ', round(active_estimate,0)),
fill = county),
hjust = "outward",
size = 2.5) +
scale_color_brewer(palette = "Set2") +
scale_fill_brewer(palette = "Set2", guide = "none") +
scale_alpha_continuous(limits = c(0, 1), guide = "none") +
scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, 0.07))) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
labs(title = "Case rate - nearby counties",
subtitle = paste("Through", format(downloaded_dttm, "%B %e, %Y")),
x = "Date",
color = NULL,
y = "new cases in previous 14 days per 100,000 people",
caption = "data from The New York Times") +
theme_bw() +
theme(panel.grid.major.x = element_line(colour="black", size = 0.1),
axis.text.x = element_text(angle = 60, hjust = 1),
plot.subtitle = element_text(color = "grey50"),
plot.caption = element_text(color = "grey50"),
legend.position = "bottom") +
coord_cartesian(ylim = c(0,NA),
clip = "off")
ggsave(filename = "nearby_caserate.png" ,
path = paste(getwd(),"/figures/", sep = ""),
device = "png",
width = 11,
height = 8.5,
units = "in")
#plot new cases per week
ggplot() +
facet_grid(county ~ .) +
geom_col(data = us_county_data %>% filter(state %in% "Washington",
county %in% important_counties$washington,
county != "King"),
aes(x = week,
y = newcases,
fill = county),
position = "dodge") +
theme_bw() +
scale_fill_brewer(palette = "Set2") +
scale_x_date(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(add = c(7,0), mult = 0)) +
scale_y_continuous(limits = c(0,NA), expand = expansion(mult = c(0,0.1))) +
theme(panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1)) +
labs(title = "New cases of COVID-19 per week",
subtitle = paste("through", format(maxdate, "%b %e")),
x = "Week of",
y = "Number of new cases")
ggsave(filename = "pen_new_per_week.png" ,
path = paste(getwd(),"/figures/", sep = ""),
device = "png",
width = 11,
height = 8.5,
units = "in")
#plot new cases per week per 100000 people
ggplot() +
facet_grid(county ~ .) +
geom_col(data = us_county_data %>% filter(state %in% "Washington",
county %in% important_counties$washington,
county != "King"),
aes(x = week,
y = newcases/countypop,
fill = county),
position = "dodge") +
theme_bw() +
scale_fill_brewer(palette = "Set2") +
scale_x_date(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(add = c(7,0), mult = 0)) +
scale_y_continuous(limits = c(0,NA), expand = expansion(mult = c(0,0.1))) +
theme(panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1)) +
labs(title = "New cases of COVID-19 per week - population normalized",
subtitle = paste("through", format(maxdate, "%b %e")),
x = "Week of",
y = "Number of new cases per 100,000 people")
ggsave(filename = "pen_new_per_week_popnorm.png" ,
path = paste(getwd(),"/figures/", sep = ""),
device = "png",
width = 11,
height = 8.5,
units = "in")
#all WA counties new cases per 100000
ggplot() +
facet_wrap(county ~ .) +
geom_rect(data = us_county_data %>%
filter(state %in% "Washington",
!county %in% important_counties$washington),
aes(xmin = ymd("2020-01-01"),
xmax = maxdate + 30,
ymin = -100,
ymax = 10000),
fill = '#f2f2f2',
alpha = 0.1) +
geom_line(data = us_county_data %>%
filter(state %in% "Washington"),
aes(x = date,
y = active_estimate,
color = active_estimate),
size = 1) +
geom_line(data = us_county_data %>% filter(state %in% "Washington"),
aes(x = date,
y = active_estimate),
color = "Black",
size = 0.1) +
theme_bw() +
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
scale_color_distiller(palette = "YlOrRd", direction = 1, guide = "none") +
theme(panel.grid.minor.x = element_blank(),
axis.text.x = element_text(angle = 60, hjust = 1),
legend.position="bottom",
strip.text = element_text(size = 10),
panel.spacing = unit(0.1, "lines")) +
coord_cartesian(xlim = c(min(us_county_data %>% filter(state %in% "Washington") %>% pull(date), na.rm = TRUE), maxdate),
ylim = c(0, max(us_county_data %>% filter(state %in% "Washington") %>% pull(active_estimate), na.rm = TRUE))) +
labs(title = "Estimate of active cases by county",
subtitle = paste("through", format(maxdate, "%b %e")),
x = NULL,
y = "New cases per 100,000 people in previous two weeks")
ggsave(filename = "WA_new_week_person.png" ,
path = paste(getwd(),"/figures/", sep = ""),
device = "png",
height = 8.5,
width = 11,
units = "in")
#new cases in the last two weeks
ggplot() +
geom_col(data = us_county_data %>%
filter(state %in% "Washington",
date == maxdate) %>%
mutate(peninsula = ifelse(county %in% important_counties$washington, T, F)),
aes(x = reorder(county, active_estimate),
y = active_estimate,
fill = peninsula)) +
scale_fill_manual(values = c('#595959', 'darkgreen'), guide = NULL) +
theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
scale_y_continuous(limits = c(0, NA)) +
labs(title = "Cumulative new cases in the last two weeks",
subtitle = paste(Sys.Date()-days(14), "-", Sys.Date()),
x = "County",
y = "Cumulative new cases in the last two weeks (per 100,000 residents)")
ggsave(filename = "WA_new_twoweek_column.png" ,
path = paste(getwd(),"/figures/", sep = ""),
device = "png",
height = 8.5,
width = 11,
units = "in")
#map latest cases
#make bounding box for maps
buffer <- 0.1
states <- c("Washington", "Wisconsin", "Illinois")
max_county_value <- max(us_county_data %>% filter(state %in% states) %>% pull(active_estimate), na.rm = TRUE)
for(i in 1:length(states)){
if(file.exists(paste0("data/", states[i], "_basemap.RData"))){
load(file = paste0("data/", states[i], "_basemap.RData"))
} else {
range <- c(left = min(us_county_map %>% filter(state %in% states[i]) %>% pull(long), na.rm = TRUE) - buffer,
bottom = min(us_county_map %>% filter(state %in% states[i]) %>% pull(lat), na.rm = TRUE) - buffer,
right = max(us_county_map %>% filter(state %in% states[i]) %>% pull(long), na.rm = TRUE) + buffer,
top = max(us_county_map %>% filter(state %in% states[i]) %>% pull(lat), na.rm = TRUE) + buffer)
basemap <- ggmap::get_stamenmap(bbox = range,
zoom = 8,
maptype = "toner-hybrid")
save(basemap, file = paste0("data/", states[i], "_basemap.RData"))
}
ggmap(basemap) +
geom_polygon(data = us_county_map %>% filter(state %in% states[i]) %>% arrange(order),
aes(x = long,
y = lat,
group = group,
fill = active_estimate),
colour = "grey50",
alpha = 0.5) +
geom_text(data= us_county_label %>% filter(state %in% states[i]),
aes(x = long,
y = lat,
label = str_to_title(county)),
size = 2)+
scale_fill_distiller(palette = "Spectral") +
theme_bw() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(),
plot.subtitle = element_text(color = "grey50"),
plot.caption = element_text(color = "grey50"),
legend.position = "bottom",
legend.text = element_text(angle = 60, hjust = 1)) +
labs(title = "Cumulative new cases in the last two weeks",
subtitle = format(downloaded_dttm, "%B %e"),
x = NULL,
y = NULL,
fill = "Cumulative new cases per\n100,000 residents")
ggsave(filename = paste0("map_active_", tolower(substr(states[i], 1, 2)), ".png"),
path = paste(getwd(),"/figures/", sep = ""),
device = "png",
width = 11,
height = 8.5,
units = "in")
}
ggplot() +
geom_polygon(data = us_county_map %>%
mutate(active_estimate = ifelse(active_estimate < 0, 0, active_estimate)) %>%
arrange(order),
aes(x = long,
y = lat,
group = group,
fill = active_estimate),
colour = NA) +
scale_fill_gradientn(colors = rev(RColorBrewer::brewer.pal(n = 9, name = "Spectral")), trans = "log10") +
theme_bw() +
scale_x_continuous(breaks = NULL) +
scale_y_continuous(breaks = NULL) +
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(),
plot.subtitle = element_text(color = "grey50"),
plot.caption = element_text(color = "grey50"),
legend.position = "bottom",
legend.text = element_text(angle = 60, hjust = 1)) +
labs(title = "Cumulative new cases in the last two weeks",
subtitle = format(downloaded_dttm, "%B %e"),
x = NULL,
y = NULL,
fill = "Cumulative new cases per\n100,000 residents")
ggsave(filename = "map_active_us.png",
path = paste(getwd(),"/figures/", sep = ""),
device = "png",
width = 11,
height = 8.5,
units = "in")
# WI data ----
#Milwaukee county plots
# add dates
noted_dates_wi<- data.frame(event = c("Stay-at-home order", "Restrictions lifted"),
eventdate = c(ymd("2020-03-25"), ymd("2020-05-14")))
# WI plots ----
#graph active cases for select counties
ggplot(us_county_data %>% filter(state %in% "Wisconsin",
county %in% important_counties$wisconsin)) +
geom_line(data = us_county_data %>% filter(state %in% "Washington",
county %in% c("Clallam", "King")),
aes(x = date,
y = active_estimate,
group = county),
color = "grey",
alpha = 0.5) +
geom_line(aes(x = date,
y = active_estimate,
color = county),
size = 0.75) +
geom_label_repel(data = us_county_data %>% filter(state %in% "Washington",
county %in% c("Clallam", "King"),
date %in% max(date)),
aes(x = date + 1,
y = active_estimate,
label = county),
hjust = "outward",
direction = "y",
size = 2.5,
color = "grey",) +
geom_label_repel(data = us_county_data %>% filter(state %in% "Wisconsin",
county %in% important_counties$wisconsin,
date %in% max(date)),
aes(x = date + 1,
y = active_estimate,
label = paste0(county,' - ', round(active_estimate,0)),
fill = county),
hjust = "outward",
direction = "y",
size = 2.5,
nudge_x = 4,
segment.color = "black",
segment.size = 0.3) +
scale_color_brewer(palette = "Set2", guide = "none") +
scale_fill_brewer(palette = "Set2", guide = "none") +
scale_x_date(date_breaks = "1 month",
date_labels = "%b\n%Y",
minor_breaks = "1 week",
expand = expansion(mult = c(0.02, 0.1))) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
labs(title = "Active case estimate - Wisconsin counties",
subtitle = paste("Through", format(downloaded_dttm, "%B %e")),
x = "Date",
y = "new cases in previous 14 days per 100,000 people",
color = "County") +
theme_bw() +
theme(panel.grid.major.x = element_line(colour="black", size = 0.1), axis.text.x = element_text(angle = 60, hjust = 1)) +
coord_cartesian(ylim = c(0,NA))
ggsave(filename = "WI_active_cases.png" ,
path = paste(getwd(),"/figures/", sep = ""),
device = "png",
width = 11,
height = 8.5,
units = "in")
ggplot() +
geom_line(data = us_county_data %>%
filter(county_state %in% c("Clallam, Washington", "Lake, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah')),
aes(x = date,
y = active_estimate,
color = county)) +
geom_label_repel(data = us_county_data %>%
filter(county_state %in% c("Clallam, Washington", "Lake, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah'),
date %in% maxdate),
aes(x = date + 0.5,
y = active_estimate,
label = paste0(county,' - ', round(active_estimate,0)),
fill = county),
hjust = "outward",
direction = "y",
size = 2.5,
nudge_x = 4,
box.padding = 0.01,
min.segment.length = 0,
segment.color = "black",
segment.size = 0.1) +
scale_color_brewer(palette = "Set2", guide = "none") +
scale_fill_brewer(palette = "Set2", guide = "none") +
scale_x_date(date_breaks = "1 month",
date_labels = "%b\n%Y",
minor_breaks = "1 week",
expand = expansion(mult = c(0.01, .07))) +
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
labs(title = "Active case estimate",
subtitle = paste("Through", format(downloaded_dttm, "%B %e")),
x = "Date",
y = "new cases in previous 14 days per 100,000 people",
color = "County",
caption = "data from The New York Times") +
theme_bw() +
theme(panel.grid.major.x = element_line(colour="black", size = 0.1),
axis.text.x = element_text(angle = 60, hjust = 1),
plot.subtitle = element_text(color = "grey50"),
plot.caption = element_text(color = "grey50")) +
coord_cartesian(ylim = c(0,NA))
ggsave(filename = "select_active_cases.png" ,
path = paste(getwd(),"/figures/", sep = ""),
device = "png",
width = 11,
height = 8.5,
units = "in")
# Animation ####
if(FALSE){
library(gganimate)
anim <- ggplot(data = us_county_data %>% filter(state %in% "Washington"),
aes(x = active_estimate,
y = deaths/countypop * 100000)) +
geom_point(aes(size = countypop,
color = active_estimate)) +
scale_color_distiller(palette = "Spectral") +
labs(title = "Cases and deaths by county in WA",
subtitle = "Date: {frame_time}",
x = "Cases per 100,000 residents",
y = "Deaths per 100,000 residents",
color = "active case estimate",
size = "population") +
transition_time(date) +
ease_aes('linear')
animate(anim,
nframes = as.double(difftime(maxdate, ymd("2020_01_21"), units = "days")))
anim_save(filename = "WA_cases_county.gif",
animation = last_animation(),
path = paste(getwd(),"/figures/", sep = ""),
fps = 1,
loop = 0,)
} else {
message("no animation this time")
}
message(difftime(Sys.time(), starttime))
rm(starttime)