498 lines
20 KiB
R
498 lines
20 KiB
R
starttime <- Sys.time()
|
|
#setup ----
|
|
library(tidyverse)
|
|
library(lubridate)
|
|
library(ggmap)
|
|
library(ggrepel)
|
|
|
|
setwd("~/Documents/dataProjects/COVID")
|
|
|
|
#download data ----
|
|
if(file.exists("data_download_time.Rda")) {
|
|
load("data_download_time.Rda")
|
|
} else {
|
|
downloaded_dttm <- ymd_hms('1900_01_01:00:00:00')
|
|
}
|
|
|
|
if(as.double(difftime(downloaded_dttm, Sys.time()), units = "hours") < -2){
|
|
|
|
print("Downloading today's data")
|
|
|
|
us_county_data <- read_csv("https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv")
|
|
save(us_county_data, file = 'data/us_county_data.Rda')
|
|
|
|
downloaded_dttm <- Sys.time()
|
|
save(downloaded_dttm, file = "data_download_time.Rda")
|
|
|
|
} else {
|
|
print("data is current")
|
|
load('data/us_county_data.Rda')
|
|
}
|
|
|
|
##to redownload data, uncomment the following 2 lines
|
|
#downloaded_dttm <- downloaded_dttm - days(1)
|
|
#save(downloaded_dttm, file = "data_download_time.Rda")
|
|
|
|
#us_county_data <- read.csv("data/us-counties.csv")
|
|
#cleanup data ----
|
|
#make dates in a date format
|
|
us_county_data <- us_county_data %>%
|
|
mutate(date = ymd(date)) %>%
|
|
mutate(week = floor_date(date, "weeks", week_start = 1))
|
|
|
|
maxdate <- max(us_county_data$date)
|
|
|
|
#load county pop data
|
|
us_county_pop <- read.csv("data/co-est2020-alldata.csv")
|
|
|
|
us_county_pop <- us_county_pop %>%
|
|
mutate(state = STNAME,
|
|
county = ifelse(state != "Louisiana", sub(" County.*", "", CTYNAME), sub(" Parish.*", "", CTYNAME))) %>%
|
|
mutate(county_state = paste0(county, ", ", state))
|
|
|
|
us_county_data["countypop"] <- us_county_pop[match(paste0(us_county_data$county, ", ", us_county_data$state), us_county_pop$county_state), 13]
|
|
|
|
us_county_data <- us_county_data %>%
|
|
group_by(state, county) %>%
|
|
arrange(date) %>%
|
|
mutate(county_state = paste0(county,", ", state),
|
|
newcases = cases - lag(cases, n = 1, default = 0),
|
|
twowkcases = cases - lag(cases, n = 14, default = 0)) %>%
|
|
mutate(active_estimate = twowkcases/countypop * 100000)
|
|
|
|
|
|
important_counties <- list(washington = c("Clallam",
|
|
"Jefferson",
|
|
"Kitsap",
|
|
"King"),
|
|
wisconsin = c("Milwaukee",
|
|
"Dane",
|
|
"Vilas",
|
|
"Brown",
|
|
"Door"),
|
|
illinois = c("Cook"))
|
|
|
|
noted_dates_wa<- data.frame(event = c("Stay-at-home begins", "Phase 1 reopening", "Phase 2 reopening"),
|
|
eventdate <- c(ymd("2020-03-23"), ymd("2020-04-29"), ymd("2020-06-01")))
|
|
|
|
|
|
us_county_map <- map_data("county")
|
|
us_county_map["county_state"] <- paste0(us_county_map$subregion, ", ", us_county_map$region)
|
|
|
|
us_county_map <- merge(us_county_map,
|
|
us_county_data %>%
|
|
filter(date %in% maxdate) %>%
|
|
mutate(county_state = tolower(county_state)),
|
|
by.x = "county_state",
|
|
by.y = "county_state",
|
|
all = TRUE)
|
|
|
|
us_county_label <- us_county_map %>%
|
|
group_by(state, county) %>%
|
|
summarise(long = (max(long)+min(long))/2, lat = (max(lat)+min(lat))/2) %>%
|
|
ungroup()
|
|
|
|
#Washington plots ----
|
|
#graph nearby active cases
|
|
ggplot(data = us_county_data %>% filter(state %in% "Washington",
|
|
county %in% important_counties$washington)) +
|
|
geom_line(aes(x = date,
|
|
y = twowkcases/countypop * 100000,
|
|
color = county,
|
|
alpha = ifelse(county == "Clallam", 1, 1)),
|
|
size = 0.5) +
|
|
geom_label(data = us_county_data %>% filter(state %in% "Washington",
|
|
county %in% important_counties$washington,
|
|
date %in% max(date)),
|
|
aes(x = date + 1,
|
|
y = twowkcases/countypop * 100000,
|
|
label = paste0(county,' - ', round(active_estimate,0)),
|
|
fill = county),
|
|
hjust = "outward",
|
|
size = 2.5) +
|
|
scale_color_brewer(palette = "Set2") +
|
|
scale_fill_brewer(palette = "Set2", guide = "none") +
|
|
scale_alpha_continuous(limits = c(0, 1), guide = "none") +
|
|
scale_x_date(date_breaks = "1 month", date_labels = "%b\n%Y", minor_breaks = "1 week", expand = expansion(mult = c(0.01, 0.07))) +
|
|
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
|
|
labs(title = "Case rate - nearby counties",
|
|
subtitle = paste("Through", format(downloaded_dttm, "%B %e, %Y")),
|
|
x = "Date",
|
|
color = NULL,
|
|
y = "new cases in previous 14 days per 100,000 people",
|
|
caption = "data from The New York Times") +
|
|
theme_bw() +
|
|
theme(panel.grid.major.x = element_line(colour="black", size = 0.1),
|
|
axis.text.x = element_text(angle = 60, hjust = 1),
|
|
plot.subtitle = element_text(color = "grey50"),
|
|
plot.caption = element_text(color = "grey50"),
|
|
legend.position = "bottom") +
|
|
coord_cartesian(ylim = c(0,NA),
|
|
clip = "off")
|
|
ggsave(filename = "nearby_caserate.png" ,
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
device = "png",
|
|
width = 11,
|
|
height = 8.5,
|
|
units = "in")
|
|
|
|
#plot new cases per week
|
|
ggplot() +
|
|
facet_grid(county ~ .) +
|
|
geom_col(data = us_county_data %>% filter(state %in% "Washington",
|
|
county %in% important_counties$washington,
|
|
county != "King"),
|
|
aes(x = week,
|
|
y = newcases,
|
|
fill = county),
|
|
position = "dodge") +
|
|
theme_bw() +
|
|
scale_fill_brewer(palette = "Set2") +
|
|
scale_x_date(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(add = c(7,0), mult = 0)) +
|
|
scale_y_continuous(limits = c(0,NA), expand = expansion(mult = c(0,0.1))) +
|
|
theme(panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1)) +
|
|
labs(title = "New cases of COVID-19 per week",
|
|
subtitle = paste("through", format(maxdate, "%b %e")),
|
|
x = "Week of",
|
|
y = "Number of new cases")
|
|
ggsave(filename = "pen_new_per_week.png" ,
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
device = "png",
|
|
width = 11,
|
|
height = 8.5,
|
|
units = "in")
|
|
|
|
#plot new cases per week per 100000 people
|
|
ggplot() +
|
|
facet_grid(county ~ .) +
|
|
geom_col(data = us_county_data %>% filter(state %in% "Washington",
|
|
county %in% important_counties$washington,
|
|
county != "King"),
|
|
aes(x = week,
|
|
y = newcases/countypop,
|
|
fill = county),
|
|
position = "dodge") +
|
|
theme_bw() +
|
|
scale_fill_brewer(palette = "Set2") +
|
|
scale_x_date(date_breaks = "1 week", date_labels = "%b %e", expand = expansion(add = c(7,0), mult = 0)) +
|
|
scale_y_continuous(limits = c(0,NA), expand = expansion(mult = c(0,0.1))) +
|
|
theme(panel.grid.minor.x = element_blank(), axis.text.x = element_text(angle = 60, hjust = 1)) +
|
|
labs(title = "New cases of COVID-19 per week - population normalized",
|
|
subtitle = paste("through", format(maxdate, "%b %e")),
|
|
x = "Week of",
|
|
y = "Number of new cases per 100,000 people")
|
|
ggsave(filename = "pen_new_per_week_popnorm.png" ,
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
device = "png",
|
|
width = 11,
|
|
height = 8.5,
|
|
units = "in")
|
|
|
|
|
|
#all WA counties new cases per 100000
|
|
ggplot() +
|
|
facet_wrap(county ~ .) +
|
|
geom_rect(data = us_county_data %>%
|
|
filter(state %in% "Washington",
|
|
!county %in% important_counties$washington),
|
|
aes(xmin = ymd("2020-01-01"),
|
|
xmax = maxdate + 30,
|
|
ymin = -100,
|
|
ymax = 10000),
|
|
fill = '#f2f2f2',
|
|
alpha = 0.1) +
|
|
geom_line(data = us_county_data %>%
|
|
filter(state %in% "Washington"),
|
|
aes(x = date,
|
|
y = active_estimate,
|
|
color = active_estimate),
|
|
size = 1) +
|
|
geom_line(data = us_county_data %>% filter(state %in% "Washington"),
|
|
aes(x = date,
|
|
y = active_estimate),
|
|
color = "Black",
|
|
size = 0.1) +
|
|
theme_bw() +
|
|
scale_x_date(date_breaks = "1 month", date_labels = "%b") +
|
|
scale_color_distiller(palette = "YlOrRd", direction = 1, guide = "none") +
|
|
theme(panel.grid.minor.x = element_blank(),
|
|
axis.text.x = element_text(angle = 60, hjust = 1),
|
|
legend.position="bottom",
|
|
strip.text = element_text(size = 10),
|
|
panel.spacing = unit(0.1, "lines")) +
|
|
coord_cartesian(xlim = c(min(us_county_data %>% filter(state %in% "Washington") %>% pull(date), na.rm = TRUE), maxdate),
|
|
ylim = c(0, max(us_county_data %>% filter(state %in% "Washington") %>% pull(active_estimate), na.rm = TRUE))) +
|
|
labs(title = "Estimate of active cases by county",
|
|
subtitle = paste("through", format(maxdate, "%b %e")),
|
|
x = NULL,
|
|
y = "New cases per 100,000 people in previous two weeks")
|
|
ggsave(filename = "WA_new_week_person.png" ,
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
device = "png",
|
|
height = 8.5,
|
|
width = 11,
|
|
units = "in")
|
|
|
|
|
|
#new cases in the last two weeks
|
|
ggplot() +
|
|
geom_col(data = us_county_data %>%
|
|
filter(state %in% "Washington",
|
|
date == maxdate) %>%
|
|
mutate(peninsula = ifelse(county %in% important_counties$washington, T, F)),
|
|
aes(x = reorder(county, active_estimate),
|
|
y = active_estimate,
|
|
fill = peninsula)) +
|
|
scale_fill_manual(values = c('#595959', 'darkgreen'), guide = NULL) +
|
|
theme(axis.text.x = element_text(angle = 60, hjust = 1)) +
|
|
scale_y_continuous(limits = c(0, NA)) +
|
|
labs(title = "Cumulative new cases in the last two weeks",
|
|
subtitle = paste(Sys.Date()-days(14), "-", Sys.Date()),
|
|
x = "County",
|
|
y = "Cumulative new cases in the last two weeks (per 100,000 residents)")
|
|
ggsave(filename = "WA_new_twoweek_column.png" ,
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
device = "png",
|
|
height = 8.5,
|
|
width = 11,
|
|
units = "in")
|
|
|
|
|
|
#map latest cases
|
|
#make bounding box for maps
|
|
|
|
buffer <- 0.1
|
|
states <- c("Washington", "Wisconsin", "Illinois")
|
|
max_county_value <- max(us_county_data %>% filter(state %in% states) %>% pull(active_estimate), na.rm = TRUE)
|
|
|
|
for(i in 1:length(states)){
|
|
if(file.exists(paste0("data/", states[i], "_basemap.RData"))){
|
|
load(file = paste0("data/", states[i], "_basemap.RData"))
|
|
} else {
|
|
range <- c(left = min(us_county_map %>% filter(state %in% states[i]) %>% pull(long), na.rm = TRUE) - buffer,
|
|
bottom = min(us_county_map %>% filter(state %in% states[i]) %>% pull(lat), na.rm = TRUE) - buffer,
|
|
right = max(us_county_map %>% filter(state %in% states[i]) %>% pull(long), na.rm = TRUE) + buffer,
|
|
top = max(us_county_map %>% filter(state %in% states[i]) %>% pull(lat), na.rm = TRUE) + buffer)
|
|
|
|
basemap <- ggmap::get_stamenmap(bbox = range,
|
|
zoom = 8,
|
|
maptype = "toner-hybrid")
|
|
save(basemap, file = paste0("data/", states[i], "_basemap.RData"))
|
|
}
|
|
|
|
ggmap(basemap) +
|
|
geom_polygon(data = us_county_map %>% filter(state %in% states[i]) %>% arrange(order),
|
|
aes(x = long,
|
|
y = lat,
|
|
group = group,
|
|
fill = active_estimate),
|
|
colour = "grey50",
|
|
alpha = 0.5) +
|
|
geom_text(data= us_county_label %>% filter(state %in% states[i]),
|
|
aes(x = long,
|
|
y = lat,
|
|
label = str_to_title(county)),
|
|
size = 2)+
|
|
scale_fill_distiller(palette = "Spectral") +
|
|
theme_bw() +
|
|
scale_x_continuous(breaks = NULL) +
|
|
scale_y_continuous(breaks = NULL) +
|
|
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(),
|
|
plot.subtitle = element_text(color = "grey50"),
|
|
plot.caption = element_text(color = "grey50"),
|
|
legend.position = "bottom",
|
|
legend.text = element_text(angle = 60, hjust = 1)) +
|
|
labs(title = "Cumulative new cases in the last two weeks",
|
|
subtitle = format(downloaded_dttm, "%B %e"),
|
|
x = NULL,
|
|
y = NULL,
|
|
fill = "Cumulative new cases per\n100,000 residents")
|
|
ggsave(filename = paste0("map_active_", tolower(substr(states[i], 1, 2)), ".png"),
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
device = "png",
|
|
width = 11,
|
|
height = 8.5,
|
|
units = "in")
|
|
|
|
}
|
|
|
|
ggplot() +
|
|
geom_polygon(data = us_county_map %>%
|
|
mutate(active_estimate = ifelse(active_estimate < 0, 0, active_estimate)) %>%
|
|
arrange(order),
|
|
aes(x = long,
|
|
y = lat,
|
|
group = group,
|
|
fill = active_estimate),
|
|
colour = NA) +
|
|
scale_fill_gradientn(colors = rev(RColorBrewer::brewer.pal(n = 9, name = "Spectral")), trans = "log10") +
|
|
theme_bw() +
|
|
scale_x_continuous(breaks = NULL) +
|
|
scale_y_continuous(breaks = NULL) +
|
|
theme(panel.grid.minor = element_blank(), panel.grid.major = element_blank(),
|
|
plot.subtitle = element_text(color = "grey50"),
|
|
plot.caption = element_text(color = "grey50"),
|
|
legend.position = "bottom",
|
|
legend.text = element_text(angle = 60, hjust = 1)) +
|
|
labs(title = "Cumulative new cases in the last two weeks",
|
|
subtitle = format(downloaded_dttm, "%B %e"),
|
|
x = NULL,
|
|
y = NULL,
|
|
fill = "Cumulative new cases per\n100,000 residents")
|
|
ggsave(filename = "map_active_us.png",
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
device = "png",
|
|
width = 11,
|
|
height = 8.5,
|
|
units = "in")
|
|
|
|
# WI data ----
|
|
#Milwaukee county plots
|
|
# add dates
|
|
|
|
noted_dates_wi<- data.frame(event = c("Stay-at-home order", "Restrictions lifted"),
|
|
eventdate = c(ymd("2020-03-25"), ymd("2020-05-14")))
|
|
|
|
|
|
# WI plots ----
|
|
#graph active cases for select counties
|
|
ggplot(us_county_data %>% filter(state %in% "Wisconsin",
|
|
county %in% important_counties$wisconsin)) +
|
|
geom_line(data = us_county_data %>% filter(state %in% "Washington",
|
|
county %in% c("Clallam", "King")),
|
|
aes(x = date,
|
|
y = active_estimate,
|
|
group = county),
|
|
color = "grey",
|
|
alpha = 0.5) +
|
|
geom_line(aes(x = date,
|
|
y = active_estimate,
|
|
color = county),
|
|
size = 0.75) +
|
|
geom_label_repel(data = us_county_data %>% filter(state %in% "Washington",
|
|
county %in% c("Clallam", "King"),
|
|
date %in% max(date)),
|
|
aes(x = date + 1,
|
|
y = active_estimate,
|
|
label = county),
|
|
hjust = "outward",
|
|
direction = "y",
|
|
size = 2.5,
|
|
color = "grey",) +
|
|
geom_label_repel(data = us_county_data %>% filter(state %in% "Wisconsin",
|
|
county %in% important_counties$wisconsin,
|
|
date %in% max(date)),
|
|
aes(x = date + 1,
|
|
y = active_estimate,
|
|
label = paste0(county,' - ', round(active_estimate,0)),
|
|
fill = county),
|
|
hjust = "outward",
|
|
direction = "y",
|
|
size = 2.5,
|
|
nudge_x = 4,
|
|
segment.color = "black",
|
|
segment.size = 0.3) +
|
|
scale_color_brewer(palette = "Set2", guide = "none") +
|
|
scale_fill_brewer(palette = "Set2", guide = "none") +
|
|
scale_x_date(date_breaks = "1 month",
|
|
date_labels = "%b\n%Y",
|
|
minor_breaks = "1 week",
|
|
expand = expansion(mult = c(0.02, 0.1))) +
|
|
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
|
|
labs(title = "Active case estimate - Wisconsin counties",
|
|
subtitle = paste("Through", format(downloaded_dttm, "%B %e")),
|
|
x = "Date",
|
|
y = "new cases in previous 14 days per 100,000 people",
|
|
color = "County") +
|
|
theme_bw() +
|
|
theme(panel.grid.major.x = element_line(colour="black", size = 0.1), axis.text.x = element_text(angle = 60, hjust = 1)) +
|
|
coord_cartesian(ylim = c(0,NA))
|
|
ggsave(filename = "WI_active_cases.png" ,
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
device = "png",
|
|
width = 11,
|
|
height = 8.5,
|
|
units = "in")
|
|
|
|
ggplot() +
|
|
geom_line(data = us_county_data %>%
|
|
filter(county_state %in% c("Clallam, Washington", "Lake, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah')),
|
|
aes(x = date,
|
|
y = active_estimate,
|
|
color = county)) +
|
|
geom_label_repel(data = us_county_data %>%
|
|
filter(county_state %in% c("Clallam, Washington", "Lake, Illinois", "Milwaukee, Wisconsin", "Dane, Wisconsin", 'Salt Lake, Utah'),
|
|
date %in% maxdate),
|
|
aes(x = date + 0.5,
|
|
y = active_estimate,
|
|
label = paste0(county,' - ', round(active_estimate,0)),
|
|
fill = county),
|
|
hjust = "outward",
|
|
direction = "y",
|
|
size = 2.5,
|
|
nudge_x = 4,
|
|
box.padding = 0.01,
|
|
min.segment.length = 0,
|
|
segment.color = "black",
|
|
segment.size = 0.1) +
|
|
scale_color_brewer(palette = "Set2", guide = "none") +
|
|
scale_fill_brewer(palette = "Set2", guide = "none") +
|
|
scale_x_date(date_breaks = "1 month",
|
|
date_labels = "%b\n%Y",
|
|
minor_breaks = "1 week",
|
|
expand = expansion(mult = c(0.01, .07))) +
|
|
scale_y_continuous(expand = expansion(mult = c(0,0.1))) +
|
|
labs(title = "Active case estimate",
|
|
subtitle = paste("Through", format(downloaded_dttm, "%B %e")),
|
|
x = "Date",
|
|
y = "new cases in previous 14 days per 100,000 people",
|
|
color = "County",
|
|
caption = "data from The New York Times") +
|
|
theme_bw() +
|
|
theme(panel.grid.major.x = element_line(colour="black", size = 0.1),
|
|
axis.text.x = element_text(angle = 60, hjust = 1),
|
|
plot.subtitle = element_text(color = "grey50"),
|
|
plot.caption = element_text(color = "grey50")) +
|
|
coord_cartesian(ylim = c(0,NA))
|
|
ggsave(filename = "select_active_cases.png" ,
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
device = "png",
|
|
width = 11,
|
|
height = 8.5,
|
|
units = "in")
|
|
|
|
# Animation ####
|
|
if(FALSE){
|
|
|
|
library(gganimate)
|
|
|
|
anim <- ggplot(data = us_county_data %>% filter(state %in% "Washington"),
|
|
aes(x = active_estimate,
|
|
y = deaths/countypop * 100000)) +
|
|
geom_point(aes(size = countypop,
|
|
color = active_estimate)) +
|
|
scale_color_distiller(palette = "Spectral") +
|
|
labs(title = "Cases and deaths by county in WA",
|
|
subtitle = "Date: {frame_time}",
|
|
x = "Cases per 100,000 residents",
|
|
y = "Deaths per 100,000 residents",
|
|
color = "active case estimate",
|
|
size = "population") +
|
|
transition_time(date) +
|
|
ease_aes('linear')
|
|
|
|
animate(anim,
|
|
nframes = as.double(difftime(maxdate, ymd("2020_01_21"), units = "days")))
|
|
|
|
anim_save(filename = "WA_cases_county.gif",
|
|
animation = last_animation(),
|
|
path = paste(getwd(),"/figures/", sep = ""),
|
|
fps = 1,
|
|
loop = 0,)
|
|
} else {
|
|
message("no animation this time")
|
|
}
|
|
|
|
message(difftime(Sys.time(), starttime))
|
|
rm(starttime)
|