61 lines
1.9 KiB
Python
61 lines
1.9 KiB
Python
from dagster import (
|
|
sensor,
|
|
RunRequest,
|
|
SkipReason,
|
|
SensorEvaluationContext,
|
|
DefaultSensorStatus,
|
|
)
|
|
from dagster_duckdb import DuckDBResource
|
|
|
|
|
|
@sensor(
|
|
name="gtfs_rt_vehicles_sensor",
|
|
minimum_interval_seconds=60,
|
|
asset_selection=["gtfs_rt_vehicles_downloads"],
|
|
default_status=DefaultSensorStatus.RUNNING
|
|
)
|
|
def gtfs_rt_vehicles_sensor(
|
|
context: SensorEvaluationContext,
|
|
duckdb: DuckDBResource,
|
|
) -> list[RunRequest] | SkipReason:
|
|
"""
|
|
Sensor that triggers gtfs_rt_vehicles_downloads every 60 seconds.
|
|
Fetches feed metadata once and passes it to each partition run.
|
|
"""
|
|
with duckdb.get_connection() as conn:
|
|
# Get all active feeds with their metadata in one query
|
|
feeds = conn.execute("""
|
|
SELECT feed_id, provider, producer_url
|
|
FROM gtfs_rt_vehicles_metadata
|
|
WHERE producer_url IS NOT NULL AND producer_url != ''
|
|
ORDER BY feed_id
|
|
""").fetchall()
|
|
|
|
if not feeds:
|
|
return SkipReason("No GTFS-RT vehicle feeds configured")
|
|
|
|
# Create a RunRequest for each partition with metadata
|
|
run_requests = [
|
|
RunRequest(
|
|
partition_key=feed_id,
|
|
run_config={
|
|
"ops": {
|
|
"gtfs_rt_vehicles_downloads": {
|
|
"config": {
|
|
"provider": provider,
|
|
"producer_url": producer_url,
|
|
}
|
|
}
|
|
}
|
|
},
|
|
tags={
|
|
"feed_id": feed_id,
|
|
"sensor": "gtfs_rt_vehicles_sensor"
|
|
}
|
|
)
|
|
for feed_id, provider, producer_url in feeds
|
|
]
|
|
|
|
context.log.info(f"Triggering downloads for {len(run_requests)} GTFS-RT vehicle feeds")
|
|
|
|
return run_requests
|