broke out assets.py into an assets and sensors directory

This commit is contained in:
Ben Varick 2025-11-07 16:39:19 -08:00
parent 0d7de37303
commit 37f23fda18
Signed by: ben
SSH key fingerprint: SHA256:jWnpFDAcacYM5aPFpYRqlsamlDyKNpSj3jj+k4ojtUo
8 changed files with 782 additions and 707 deletions

View file

@ -0,0 +1,61 @@
from dagster import (
sensor,
RunRequest,
SkipReason,
SensorEvaluationContext,
DefaultSensorStatus,
)
from dagster_duckdb import DuckDBResource
@sensor(
name="gtfs_rt_vehicles_sensor",
minimum_interval_seconds=60,
asset_selection=["gtfs_rt_vehicles_downloads"],
default_status=DefaultSensorStatus.RUNNING
)
def gtfs_rt_vehicles_sensor(
context: SensorEvaluationContext,
duckdb: DuckDBResource,
) -> list[RunRequest] | SkipReason:
"""
Sensor that triggers gtfs_rt_vehicles_downloads every 60 seconds.
Fetches feed metadata once and passes it to each partition run.
"""
with duckdb.get_connection() as conn:
# Get all active feeds with their metadata in one query
feeds = conn.execute("""
SELECT feed_id, provider, producer_url
FROM gtfs_rt_vehicles_metadata
WHERE producer_url IS NOT NULL AND producer_url != ''
ORDER BY feed_id
""").fetchall()
if not feeds:
return SkipReason("No GTFS-RT vehicle feeds configured")
# Create a RunRequest for each partition with metadata
run_requests = [
RunRequest(
partition_key=feed_id,
run_config={
"ops": {
"gtfs_rt_vehicles_downloads": {
"config": {
"provider": provider,
"producer_url": producer_url,
}
}
}
},
tags={
"feed_id": feed_id,
"sensor": "gtfs_rt_vehicles_sensor"
}
)
for feed_id, provider, producer_url in feeds
]
context.log.info(f"Triggering downloads for {len(run_requests)} GTFS-RT vehicle feeds")
return run_requests