changed gtfs_feed_downloads to a sensor instead of cron

This commit is contained in:
Ben Varick 2025-11-07 17:04:52 -08:00
parent 37f23fda18
commit 97aa28e003
Signed by: ben
SSH key fingerprint: SHA256:jWnpFDAcacYM5aPFpYRqlsamlDyKNpSj3jj+k4ojtUo
5 changed files with 176 additions and 17 deletions

View file

@ -20,7 +20,7 @@ logger = logging.getLogger(__name__)
@asset(
deps=["agency_list"],
group_name="gtfs_rt_metadata",
group_name="gtfs_rt_vehicles_metadata",
automation_condition=AutomationCondition.eager()
)
def gtfs_rt_vehicles_metadata(
@ -150,7 +150,7 @@ gtfs_rt_vehicles_partitions_def = DynamicPartitionsDefinition(name="gtfs_rt_vehi
@asset(
deps=["gtfs_rt_vehicles_metadata"],
group_name="gtfs_rt_metadata",
group_name="gtfs_rt_vehicles_metadata",
automation_condition=AutomationCondition.eager()
)
def gtfs_rt_vehicles_partitions(
@ -196,7 +196,7 @@ class GTFSRTDownloadConfig(Config):
@asset(
partitions_def=gtfs_rt_vehicles_partitions_def,
deps=[gtfs_rt_vehicles_partitions],
group_name="gtfs_rt_downloads",
group_name="gtfs_rt_vehicles_downloads",
)
def gtfs_rt_vehicles_downloads(
context: AssetExecutionContext,

View file

@ -5,6 +5,7 @@ from dagster import (
MetadataValue,
AutomationCondition,
DynamicPartitionsDefinition,
Config
)
from dagster_duckdb import DuckDBResource
from resources import MobilityDatabaseAPI
@ -14,9 +15,14 @@ from pathlib import Path
from datetime import datetime
import logging
logger = logging.getLogger(__name__)
class GTFSDownloadConfig(Config):
provider: str
producer_url: str
@asset(
deps=["agency_list"],
group_name="gtfs_metadata",
@ -191,10 +197,10 @@ def gtfs_feed_partitions(
partitions_def=gtfs_feeds_partitions_def,
deps=["gtfs_feed_partitions"],
group_name="gtfs_downloads",
automation_condition=AutomationCondition.on_cron("0 * * * *") | AutomationCondition.eager(),
)
def gtfs_feed_downloads(
context: AssetExecutionContext,
config: GTFSDownloadConfig,
duckdb: DuckDBResource,
) -> Output[None]:
"""
@ -204,19 +210,10 @@ def gtfs_feed_downloads(
Runs on the hour and whenever new partitions are added.
"""
feed_id = context.partition_key
download_url = config.producer_url
provider = config.provider
with duckdb.get_connection() as conn:
# Get the download URL for this feed
feed_info = conn.execute("""
SELECT feed_id, provider, producer_url
FROM gtfs_feed_metadata
WHERE feed_id = ?
""", [feed_id]).fetchone()
if not feed_info:
raise ValueError(f"Feed {feed_id} not found in metadata")
feed_id, provider, download_url = feed_info
if not download_url:
context.log.warning(f"No download URL for {feed_id}")