added AQI
This commit is contained in:
parent
647b961a4f
commit
f79c8d5a90
4 changed files with 202 additions and 32 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -6,3 +6,5 @@
|
||||||
#exclude API keys
|
#exclude API keys
|
||||||
api_keys/*
|
api_keys/*
|
||||||
|
|
||||||
|
#exclude data
|
||||||
|
data/*
|
||||||
|
|
|
||||||
16
cities.csv
16
cities.csv
|
|
@ -1,8 +1,8 @@
|
||||||
City,Type,State,Country
|
City,Type,State,Country,CBSA-Code
|
||||||
Madison,city,WI,United States
|
Madison,city,WI,United States,31540
|
||||||
Bellingham,city,WA,United States
|
Bellingham,city,WA,United States,13380
|
||||||
Portland,city,OR,United States
|
Portland,city,OR,United States,38900
|
||||||
Port Angeles,city,WA,United States
|
Port Angeles,city,WA,United States,38820
|
||||||
Boston,city,MA,United States
|
Boston,city,MA,United States,14460
|
||||||
Boise City,city,ID,United States
|
Boise City,city,ID,United States,14260
|
||||||
Salt Lake City,city,UT,United States
|
Salt Lake City,city,UT,United States,41620
|
||||||
|
|
|
||||||
|
|
|
@ -11,3 +11,5 @@ Encoding: UTF-8
|
||||||
|
|
||||||
RnwWeave: Sweave
|
RnwWeave: Sweave
|
||||||
LaTeX: pdfLaTeX
|
LaTeX: pdfLaTeX
|
||||||
|
|
||||||
|
BuildType: Makefile
|
||||||
|
|
|
||||||
214
city_compare.Rmd
214
city_compare.Rmd
|
|
@ -36,7 +36,7 @@ census_api_key(key = substr(read_file(file = "api_keys/census_api_key"), 1, 40))
|
||||||
|
|
||||||
## Date ranges
|
## Date ranges
|
||||||
```{r date_range, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
```{r date_range, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||||
date_start <- "2010-01-01"
|
date_start <- "2014-01-01"
|
||||||
date_end <- "2024-12-31"
|
date_end <- "2024-12-31"
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
@ -44,42 +44,42 @@ date_end <- "2024-12-31"
|
||||||
|
|
||||||
```{r cities, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
```{r cities, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||||
cities <- read_csv(file = "cities.csv")
|
cities <- read_csv(file = "cities.csv")
|
||||||
cities <- cities %>%
|
cities <- cities |>
|
||||||
mutate(city_name = paste0(City, " ", Type))
|
mutate(city_name = paste0(City, " ", Type))
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
# Get data
|
# Data
|
||||||
|
|
||||||
## Census data
|
## Census data
|
||||||
```{r census, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
```{r census, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||||
populations <- list(NULL)
|
populations <- list(NULL)
|
||||||
for(city in cities$city_name){
|
for(city in cities$city_name){
|
||||||
state <- cities %>% filter(city_name == city) %>% pull(State)
|
state <- cities |> filter(city_name == city) |> pull(State)
|
||||||
populations[[city]] <- get_acs(
|
populations[[city]] <- get_acs(
|
||||||
geography = "place",
|
geography = "place",
|
||||||
variables = "B01003_001",
|
variables = "B01003_001",
|
||||||
state = state,
|
state = state,
|
||||||
year = 2023,
|
year = 2023,
|
||||||
geometry = TRUE
|
geometry = TRUE
|
||||||
) %>%
|
) |>
|
||||||
filter(str_detect(NAME, city))
|
filter(str_detect(NAME, city))
|
||||||
}
|
}
|
||||||
|
|
||||||
populations <- bind_rows(populations)
|
populations <- bind_rows(populations)
|
||||||
|
|
||||||
city_center <- populations %>%
|
city_center <- populations |>
|
||||||
st_centroid() %>%
|
st_centroid() |>
|
||||||
st_transform(4326) %>% # Convert to WGS84 (standard lat/lon)
|
st_transform(4326) %>%
|
||||||
mutate(
|
mutate(
|
||||||
lon = st_coordinates(.)[,1],
|
lon = st_coordinates(.)[,1],
|
||||||
lat = st_coordinates(.)[,2]
|
lat = st_coordinates(.)[,2]
|
||||||
) %>%
|
) |>
|
||||||
st_drop_geometry() %>%
|
st_drop_geometry() |>
|
||||||
select(lat, lon)
|
select(lat, lon)
|
||||||
|
|
||||||
cities <- bind_cols(cities, populations, city_center)
|
cities <- bind_cols(cities, populations, city_center)
|
||||||
cities <- cities %>% mutate(
|
cities <- cities |> mutate(
|
||||||
density = estimate/as.double(st_area(geometry))*1000000
|
density = estimate/as.double(st_area(geometry))*1000000
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -120,12 +120,12 @@ ggplot(data = cities) +
|
||||||
```{r weather, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
```{r weather, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||||
weather <- list(NULL)
|
weather <- list(NULL)
|
||||||
for(city in cities$City){
|
for(city in cities$City){
|
||||||
city_info <- cities %>% filter(City == city)
|
city_info <- cities |> filter(City == city)
|
||||||
city_run <- weather_history(
|
city_run <- weather_history(
|
||||||
location = c(city_info %>% pull(lat), city_info %>% pull(lon)),
|
location = c(city_info |> pull(lat), city_info |> pull(lon)),
|
||||||
start = date_start,
|
start = date_start,
|
||||||
end = date_end,
|
end = date_end,
|
||||||
daily = c("apparent_temperature_max", "apparent_temperature_min"),
|
daily = c("apparent_temperature_max", "apparent_temperature_min", "precipitation_hours"),
|
||||||
response_units = list(temperature_unit = "fahrenheit")
|
response_units = list(temperature_unit = "fahrenheit")
|
||||||
)
|
)
|
||||||
city_run$city <- city
|
city_run$city <- city
|
||||||
|
|
@ -134,23 +134,27 @@ for(city in cities$City){
|
||||||
|
|
||||||
weather <- bind_rows(weather)
|
weather <- bind_rows(weather)
|
||||||
|
|
||||||
weather_summary <- weather %>%
|
weather |>
|
||||||
mutate(year = year(ymd(date)),
|
mutate(year = year(ymd(date)),
|
||||||
month = month(ymd(date))) %>%
|
month = month(ymd(date))) |>
|
||||||
group_by(year, city) %>%
|
group_by(year, city) |>
|
||||||
summarise(days_above_80 = sum(daily_apparent_temperature_max > 80)) %>%
|
summarise(days_above_80 = sum(daily_apparent_temperature_max > 80)) |>
|
||||||
group_by(city) %>%
|
group_by(city) |>
|
||||||
summarise(median_days_above_80 = median(days_above_80))
|
summarise(median_days_above_80 = median(days_above_80)) |>
|
||||||
|
ggplot() +
|
||||||
ggplot(data = weather_summary) +
|
|
||||||
geom_col(aes(x = city,
|
geom_col(aes(x = city,
|
||||||
y = median_days_above_80)) +
|
y = median_days_above_80)) +
|
||||||
labs(title = "Days above 80°F",
|
labs(title = "Days above 80°F (apparent temperature)",
|
||||||
y = "Median days per year",
|
y = "Median days per year",
|
||||||
x = "City",
|
x = "City",
|
||||||
fill = NULL)
|
fill = NULL)
|
||||||
|
|
||||||
ggplot(data = weather %>% pivot_longer(cols = starts_with("daily"), names_to = "max_min", values_to = "temperature") %>% filter(max_min %in% c("daily_apparent_temperature_min", "daily_apparent_temperature_max"))) +
|
weather |>
|
||||||
|
pivot_longer(cols = starts_with("daily"),
|
||||||
|
names_to = "max_min",
|
||||||
|
values_to = "temperature") |>
|
||||||
|
filter(max_min %in% c("daily_apparent_temperature_min", "daily_apparent_temperature_max")) |>
|
||||||
|
ggplot() +
|
||||||
geom_violin(aes(x = city,
|
geom_violin(aes(x = city,
|
||||||
y = temperature,
|
y = temperature,
|
||||||
fill = max_min)) +
|
fill = max_min)) +
|
||||||
|
|
@ -160,5 +164,167 @@ ggplot(data = weather %>% pivot_longer(cols = starts_with("daily"), names_to = "
|
||||||
y = "°F",
|
y = "°F",
|
||||||
x = "City",
|
x = "City",
|
||||||
fill = NULL)
|
fill = NULL)
|
||||||
|
|
||||||
|
weather |>
|
||||||
|
mutate(year = year(ymd(date)),
|
||||||
|
month = month(ymd(date))) |>
|
||||||
|
group_by(year, city) |>
|
||||||
|
summarise(days_above_4hr = sum(daily_precipitation_hours > 4)) |>
|
||||||
|
group_by(city) |>
|
||||||
|
summarise(median_days_above_4hr = median(days_above_4hr)) |>
|
||||||
|
ggplot() +
|
||||||
|
geom_col(aes(x = city,
|
||||||
|
y = median_days_above_4hr)) +
|
||||||
|
labs(title = "Days with more than 4 hrs of rain",
|
||||||
|
y = "Median days per year",
|
||||||
|
x = "City",
|
||||||
|
fill = NULL)
|
||||||
|
|
||||||
|
weather |>
|
||||||
|
mutate(year = year(ymd(date)),
|
||||||
|
month = month(ymd(date))) |>
|
||||||
|
group_by(year, month, city) |>
|
||||||
|
summarise(days_above_4hr = sum(daily_precipitation_hours > 4)) |>
|
||||||
|
group_by(city, month) |>
|
||||||
|
summarise(median_days_above_4hr = median(days_above_4hr)) |>
|
||||||
|
ggplot() +
|
||||||
|
geom_line(aes(x = month,
|
||||||
|
y = median_days_above_4hr,
|
||||||
|
color = city)) +
|
||||||
|
labs(title = "Days with more than 4 hrs of rain",
|
||||||
|
y = "Median days per month",
|
||||||
|
x = "Month",
|
||||||
|
color = "City")
|
||||||
|
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|
||||||
|
## Air Quality
|
||||||
|
```{r air_quality, eval = TRUE, echo = TRUE, results = "show", warning = FALSE, error = TRUE, message = FALSE}
|
||||||
|
# download data
|
||||||
|
# Create data directory if it doesn't exist
|
||||||
|
if (!dir.exists("data")) {
|
||||||
|
dir.create("data")
|
||||||
|
}
|
||||||
|
|
||||||
|
# Define years
|
||||||
|
years <- seq(year(ymd(date_start)), year(ymd(date_end)), 1)
|
||||||
|
|
||||||
|
# Initialize empty list to store dataframes
|
||||||
|
aqi_list <- list()
|
||||||
|
|
||||||
|
# Download and process files for each year
|
||||||
|
for (year in years) {
|
||||||
|
# Construct URL
|
||||||
|
url <- paste0("https://aqs.epa.gov/aqsweb/airdata/daily_aqi_by_cbsa_", year, ".zip")
|
||||||
|
|
||||||
|
# Define local file paths
|
||||||
|
zip_file <- file.path("data", paste0("daily_aqi_by_cbsa_", year, ".zip"))
|
||||||
|
csv_file <- file.path("data", paste0("daily_aqi_by_cbsa_", year, ".csv"))
|
||||||
|
|
||||||
|
# Download zip if it doesn't exist
|
||||||
|
if (file.exists(zip_file)) {
|
||||||
|
cat(paste0("Zip file for year ", year, " already exists. Skipping download.\n"))
|
||||||
|
} else {
|
||||||
|
cat(paste0("Downloading data for year ", year, "...\n"))
|
||||||
|
|
||||||
|
tryCatch({
|
||||||
|
download.file(url, zip_file, mode = "wb")
|
||||||
|
cat(paste0("Successfully downloaded data for year ", year, "\n"))
|
||||||
|
}, error = function(e) {
|
||||||
|
cat(paste0("Error downloading data for year ", year, ": ", e$message, "\n"))
|
||||||
|
next
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
# Extract zip if CSV doesn't exist
|
||||||
|
if (file.exists(zip_file)) {
|
||||||
|
if (file.exists(csv_file)) {
|
||||||
|
cat(paste0("CSV for year ", year, " already extracted. Skipping extraction.\n"))
|
||||||
|
} else {
|
||||||
|
cat(paste0("Extracting zip for year ", year, "...\n"))
|
||||||
|
tryCatch({
|
||||||
|
unzip(zip_file, exdir = "data")
|
||||||
|
cat(paste0("Successfully extracted data for year ", year, "\n"))
|
||||||
|
}, error = function(e) {
|
||||||
|
cat(paste0("Error extracting data for year ", year, ": ", e$message, "\n"))
|
||||||
|
next
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Read CSV if it exists
|
||||||
|
if (file.exists(csv_file)) {
|
||||||
|
cat(paste0("Reading CSV for year ", year, "...\n"))
|
||||||
|
tryCatch({
|
||||||
|
aqi_year <- read.csv(csv_file)
|
||||||
|
aqi_list[[as.character(year)]] <- aqi_year
|
||||||
|
cat(paste0("Successfully read ", nrow(aqi_year), " rows for year ", year, "\n"))
|
||||||
|
}, error = function(e) {
|
||||||
|
cat(paste0("Error reading CSV for year ", year, ": ", e$message, "\n"))
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Combine all dataframes
|
||||||
|
aqi_data <- bind_rows(aqi_list)
|
||||||
|
aqi_data <- aqi_data |>
|
||||||
|
filter(CBSA.Code %in% (cities |> pull(`CBSA-Code`))) |>
|
||||||
|
mutate(year = year(ymd(Date)),
|
||||||
|
month = month(ymd(Date)))
|
||||||
|
|
||||||
|
aqi_threshhold <- 50
|
||||||
|
|
||||||
|
aqi_data |>
|
||||||
|
group_by(year, CBSA) |>
|
||||||
|
summarise(hours_above = sum(AQI >= aqi_threshhold, na.rm = TRUE)) |>
|
||||||
|
group_by(CBSA) |>
|
||||||
|
summarise(mean_hours_above = mean(hours_above, na.rm = TRUE)) |>
|
||||||
|
ggplot() +
|
||||||
|
geom_col(aes(x = CBSA,
|
||||||
|
y = mean_hours_above)) +
|
||||||
|
labs(title = "Hours above 50 AQI",
|
||||||
|
y = "Mean hours per year",
|
||||||
|
x = "City",
|
||||||
|
fill = NULL)
|
||||||
|
|
||||||
|
aqi_data |>
|
||||||
|
group_by(year, CBSA) |>
|
||||||
|
summarise(hours_above = sum(AQI >= 2*aqi_threshhold, na.rm = TRUE)) |>
|
||||||
|
group_by(CBSA) |>
|
||||||
|
summarise(mean_hours_above = mean(hours_above, na.rm = TRUE)) |>
|
||||||
|
ggplot() +
|
||||||
|
geom_col(aes(x = CBSA,
|
||||||
|
y = mean_hours_above)) +
|
||||||
|
labs(title = paste0("Hours above ", aqi_threshhold * 2," AQI"),
|
||||||
|
y = "Mean hours per year",
|
||||||
|
x = "City",
|
||||||
|
fill = NULL)
|
||||||
|
|
||||||
|
aqi_data |>
|
||||||
|
ggplot() +
|
||||||
|
geom_violin(aes(x = CBSA,
|
||||||
|
y = AQI)) +
|
||||||
|
labs(title = "AQI",
|
||||||
|
y = "AQI",
|
||||||
|
x = "City",
|
||||||
|
fill = NULL) +
|
||||||
|
coord_cartesian(ylim = (c(0,500)))
|
||||||
|
|
||||||
|
aqi_data |>
|
||||||
|
group_by(year, month, CBSA) |>
|
||||||
|
summarise(hours_above = sum(AQI >= aqi_threshhold, na.rm = TRUE)) |>
|
||||||
|
group_by(CBSA, month) |>
|
||||||
|
summarise(mean_hours_above = mean(hours_above, na.rm = TRUE)) |>
|
||||||
|
ggplot() +
|
||||||
|
geom_line(aes(x = month,
|
||||||
|
y = mean_hours_above,
|
||||||
|
color = CBSA)) +
|
||||||
|
scale_x_continuous(breaks = seq(1,12,1)) +
|
||||||
|
scale_color_brewer(type = "qual") +
|
||||||
|
labs(title = paste0("Hours with an AQI of greater than or equal to ", aqi_threshhold),
|
||||||
|
y = "Mean days per month",
|
||||||
|
x = "Month",
|
||||||
|
color = "City")
|
||||||
|
|
||||||
|
```
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue