from dagster import ( sensor, SensorEvaluationContext, RunRequest, SkipReason, DefaultSensorStatus, AssetKey, ) from dagster_duckdb import DuckDBResource @sensor( name="gtfs_rt_vehicles_sensor", minimum_interval_seconds=60, asset_selection=["gtfs_rt_vehicles_downloads"], default_status=DefaultSensorStatus.RUNNING ) def gtfs_rt_vehicles_sensor( context: SensorEvaluationContext, duckdb: DuckDBResource, ) -> list[RunRequest] | SkipReason: """ Sensor that triggers gtfs_rt_vehicles_downloads every 60 seconds. Fetches feed metadata once and passes it to each partition run. """ # Check if upstream asset has been materialized at least once upstream_asset_key = AssetKey("gtfs_rt_vehicles_partitions") latest_materialization = context.instance.get_latest_materialization_event(upstream_asset_key) if latest_materialization is None: return SkipReason( "Waiting for upstream asset 'gtfs_rt_vehicles_partitions' to be materialized. " "Run the upstream assets first." ) try: with duckdb.get_connection() as conn: # Get all active feeds with their metadata in one query feeds = conn.execute(""" SELECT feed_id, provider, producer_url FROM gtfs_rt_vehicles_metadata WHERE producer_url IS NOT NULL AND producer_url != '' ORDER BY feed_id """).fetchall() if not feeds: return SkipReason("No GTFS-RT vehicle feeds configured") # Create a RunRequest for each partition with metadata run_requests = [ RunRequest( partition_key=feed_id, run_config={ "ops": { "gtfs_rt_vehicles_downloads": { "config": { "provider": provider, "producer_url": producer_url, } } } }, tags={ "feed_id": feed_id, "sensor": "gtfs_rt_vehicles_sensor" } ) for feed_id, provider, producer_url in feeds ] context.log.info(f"Triggering downloads for {len(run_requests)} GTFS-RT vehicle feeds") return run_requests except Exception as e: # Handle case where table doesn't exist yet or other DB errors context.log.warning(f"Database query failed: {e}") return SkipReason(f"Database not ready or query failed: {e}")