diff --git a/.gitignore b/.gitignore index ea2f8b0..326a612 100644 --- a/.gitignore +++ b/.gitignore @@ -1,12 +1,9 @@ #Exclude the .env file .env -#Exclude the postres_data +#Exclude contents of the postres_data directory postgres_data postgres_data/* -#Exclude data directory -data -#except for agency_list.csv -!data/gtfs/agency_list.csv - +#Exclude contents of the data directory +data/* diff --git a/Dockerfile_user_code_gtfs b/Dockerfile_dagster_code similarity index 92% rename from Dockerfile_user_code_gtfs rename to Dockerfile_dagster_code index 4095717..a5d5a29 100644 --- a/Dockerfile_user_code_gtfs +++ b/Dockerfile_dagster_code @@ -12,7 +12,7 @@ RUN pip install \ requests WORKDIR /opt/dagster/app -COPY user_code/gtfs /opt/dagster/app +COPY user_code /opt/dagster/app # Run dagster gRPC server on port 4000 diff --git a/data/gtfs/agency_list.csv b/config/agency_list.csv similarity index 100% rename from data/gtfs/agency_list.csv rename to config/agency_list.csv diff --git a/dagster.yaml b/dagster.yaml index a170c61..b5ac980 100644 --- a/dagster.yaml +++ b/dagster.yaml @@ -28,11 +28,7 @@ run_launcher: - /var/run/docker.sock:/var/run/docker.sock - /tmp/io_manager_storage:/tmp/io_manager_storage - /home/ben/code/gtfs-dagster/data:/opt/dagster/app/data - -auto_materialize: - enabled: true - run_tags: - source: auto-materialize + - /home/ben/code/gtfs-dagster/config:/opt/dagster/app/config run_storage: module: dagster_postgres.run_storage diff --git a/docker-compose.yaml b/docker-compose.yaml index 9a98f26..cdb1af9 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -29,23 +29,24 @@ services: # Multiple containers like this can be deployed separately - each just needs to run on # its own port, and have its own entry in the workspace.yaml file that's loaded by the # webserver. - dagster_user_code_gtfs: + dagster_code: build: context: . - dockerfile: ./Dockerfile_user_code_gtfs - container_name: dagster_user_code_gtfs - image: dagster_user_code_gtfs + dockerfile: ./Dockerfile_dagster_code + container_name: dagster_code + image: dagster_code restart: always environment: DAGSTER_POSTGRES_USER: ${POSTGRES_USER} DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} DAGSTER_POSTGRES_DB: ${POSTGRES_DB} - DAGSTER_CURRENT_IMAGE: 'dagster_user_code_gtfs' + DAGSTER_CURRENT_IMAGE: 'dagster_code' MOBILITY_DB_REFRESH_TOKEN: ${MOBILITY_DB_REFRESH_TOKEN} env_file: - .env volumes: - ./data:/opt/dagster/app/data + - ./config:/opt/dagster/app/config networks: - dagster @@ -81,7 +82,7 @@ services: depends_on: dagster_postgresql: condition: service_healthy - dagster_user_code_gtfs: + dagster_code: condition: service_started # This service runs the dagster-daemon process, which is responsible for taking runs @@ -109,7 +110,7 @@ services: depends_on: dagster_postgresql: condition: service_healthy - dagster_user_code_gtfs: + dagster_code: condition: service_started networks: diff --git a/user_code/__init__.py b/user_code/__init__.py index 3a78e18..10bd7a4 100644 --- a/user_code/__init__.py +++ b/user_code/__init__.py @@ -1,2 +1,2 @@ # user_code/__init__.py -# This file makes user_code a package +from . import assets, resources diff --git a/user_code/gtfs/assets.py b/user_code/assets.py similarity index 95% rename from user_code/gtfs/assets.py rename to user_code/assets.py index e341f54..67b7384 100644 --- a/user_code/gtfs/assets.py +++ b/user_code/assets.py @@ -4,19 +4,21 @@ from dagster import ( AssetExecutionContext, Output, MetadataValue, - AutoMaterializePolicy + AutomationCondition ) from dagster_duckdb import DuckDBResource -from resources import MobilityDatabaseAPI # Direct import instead of relative +from resources import MobilityDatabaseAPI import json -@asset +@asset( + group_name="gtfs_metadata", + ) def agency_list(duckdb: DuckDBResource) -> None: """Load agency list from CSV into DuckDB.""" # Read the CSV (path is relative to container working directory) - df = pd.read_csv('data/gtfs/agency_list.csv') + df = pd.read_csv('config/agency_list.csv') # Write to DuckDB with duckdb.get_connection() as conn: @@ -29,7 +31,7 @@ def agency_list(duckdb: DuckDBResource) -> None: @asset( deps=["agency_list"], group_name="gtfs_metadata", - auto_materialize_policy=AutoMaterializePolicy.eager() + automation_condition=AutomationCondition.eager() ) def gtfs_feed_metadata( context: AssetExecutionContext, diff --git a/user_code/gtfs/definitions.py b/user_code/definitions.py similarity index 51% rename from user_code/gtfs/definitions.py rename to user_code/definitions.py index 12cea67..7795743 100644 --- a/user_code/gtfs/definitions.py +++ b/user_code/definitions.py @@ -1,4 +1,10 @@ -from dagster import Definitions, load_assets_from_modules, EnvVar +from dagster import ( + Definitions, + load_assets_from_modules, + EnvVar, + DefaultSensorStatus, + AutomationConditionSensorDefinition +) from dagster_duckdb import DuckDBResource import assets @@ -8,9 +14,16 @@ all_assets = load_assets_from_modules([assets]) defs = Definitions( assets=all_assets, + sensors=[ + AutomationConditionSensorDefinition( + "asset_automation_sensor", + target="*", + default_status=DefaultSensorStatus.RUNNING, + ) + ], resources={ "duckdb": DuckDBResource( - database="data/gtfs/gtfs.duckdb" + database="data/duckdb/gtfs.duckdb" ), "mobility_db": MobilityDatabaseAPI( refresh_token=EnvVar("MOBILITY_DB_REFRESH_TOKEN"), diff --git a/user_code/gtfs/__init__.py b/user_code/gtfs/__init__.py deleted file mode 100644 index ea8ef8a..0000000 --- a/user_code/gtfs/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -# user_code/gtfs/__init__.py -from . import assets, resources diff --git a/user_code/gtfs/resources.py b/user_code/resources.py similarity index 100% rename from user_code/gtfs/resources.py rename to user_code/resources.py diff --git a/workspace.yaml b/workspace.yaml index 6ab94f4..cff3380 100644 --- a/workspace.yaml +++ b/workspace.yaml @@ -1,6 +1,6 @@ load_from: # Each entry here corresponds to a service in the docker-compose file that exposes user code. - grpc_server: - host: dagster_user_code_gtfs + host: dagster_code port: 4000 - location_name: "gtfs_user_code" \ No newline at end of file + location_name: "user_code" \ No newline at end of file