rearranged directory structure, automaterialize to automationCondition

This commit is contained in:
Ben Varick 2025-11-07 07:40:46 -08:00
parent dcff5c78e7
commit 11003a8bda
Signed by: ben
SSH key fingerprint: SHA256:jWnpFDAcacYM5aPFpYRqlsamlDyKNpSj3jj+k4ojtUo
11 changed files with 38 additions and 31 deletions

9
.gitignore vendored
View file

@ -1,12 +1,9 @@
#Exclude the .env file #Exclude the .env file
.env .env
#Exclude the postres_data #Exclude contents of the postres_data directory
postgres_data postgres_data
postgres_data/* postgres_data/*
#Exclude data directory #Exclude contents of the data directory
data data/*
#except for agency_list.csv
!data/gtfs/agency_list.csv

View file

@ -12,7 +12,7 @@ RUN pip install \
requests requests
WORKDIR /opt/dagster/app WORKDIR /opt/dagster/app
COPY user_code/gtfs /opt/dagster/app COPY user_code /opt/dagster/app
# Run dagster gRPC server on port 4000 # Run dagster gRPC server on port 4000

View file

@ -28,11 +28,7 @@ run_launcher:
- /var/run/docker.sock:/var/run/docker.sock - /var/run/docker.sock:/var/run/docker.sock
- /tmp/io_manager_storage:/tmp/io_manager_storage - /tmp/io_manager_storage:/tmp/io_manager_storage
- /home/ben/code/gtfs-dagster/data:/opt/dagster/app/data - /home/ben/code/gtfs-dagster/data:/opt/dagster/app/data
- /home/ben/code/gtfs-dagster/config:/opt/dagster/app/config
auto_materialize:
enabled: true
run_tags:
source: auto-materialize
run_storage: run_storage:
module: dagster_postgres.run_storage module: dagster_postgres.run_storage

View file

@ -29,23 +29,24 @@ services:
# Multiple containers like this can be deployed separately - each just needs to run on # Multiple containers like this can be deployed separately - each just needs to run on
# its own port, and have its own entry in the workspace.yaml file that's loaded by the # its own port, and have its own entry in the workspace.yaml file that's loaded by the
# webserver. # webserver.
dagster_user_code_gtfs: dagster_code:
build: build:
context: . context: .
dockerfile: ./Dockerfile_user_code_gtfs dockerfile: ./Dockerfile_dagster_code
container_name: dagster_user_code_gtfs container_name: dagster_code
image: dagster_user_code_gtfs image: dagster_code
restart: always restart: always
environment: environment:
DAGSTER_POSTGRES_USER: ${POSTGRES_USER} DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD} DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
DAGSTER_POSTGRES_DB: ${POSTGRES_DB} DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
DAGSTER_CURRENT_IMAGE: 'dagster_user_code_gtfs' DAGSTER_CURRENT_IMAGE: 'dagster_code'
MOBILITY_DB_REFRESH_TOKEN: ${MOBILITY_DB_REFRESH_TOKEN} MOBILITY_DB_REFRESH_TOKEN: ${MOBILITY_DB_REFRESH_TOKEN}
env_file: env_file:
- .env - .env
volumes: volumes:
- ./data:/opt/dagster/app/data - ./data:/opt/dagster/app/data
- ./config:/opt/dagster/app/config
networks: networks:
- dagster - dagster
@ -81,7 +82,7 @@ services:
depends_on: depends_on:
dagster_postgresql: dagster_postgresql:
condition: service_healthy condition: service_healthy
dagster_user_code_gtfs: dagster_code:
condition: service_started condition: service_started
# This service runs the dagster-daemon process, which is responsible for taking runs # This service runs the dagster-daemon process, which is responsible for taking runs
@ -109,7 +110,7 @@ services:
depends_on: depends_on:
dagster_postgresql: dagster_postgresql:
condition: service_healthy condition: service_healthy
dagster_user_code_gtfs: dagster_code:
condition: service_started condition: service_started
networks: networks:

View file

@ -1,2 +1,2 @@
# user_code/__init__.py # user_code/__init__.py
# This file makes user_code a package from . import assets, resources

View file

@ -4,19 +4,21 @@ from dagster import (
AssetExecutionContext, AssetExecutionContext,
Output, Output,
MetadataValue, MetadataValue,
AutoMaterializePolicy AutomationCondition
) )
from dagster_duckdb import DuckDBResource from dagster_duckdb import DuckDBResource
from resources import MobilityDatabaseAPI # Direct import instead of relative from resources import MobilityDatabaseAPI
import json import json
@asset @asset(
group_name="gtfs_metadata",
)
def agency_list(duckdb: DuckDBResource) -> None: def agency_list(duckdb: DuckDBResource) -> None:
"""Load agency list from CSV into DuckDB.""" """Load agency list from CSV into DuckDB."""
# Read the CSV (path is relative to container working directory) # Read the CSV (path is relative to container working directory)
df = pd.read_csv('data/gtfs/agency_list.csv') df = pd.read_csv('config/agency_list.csv')
# Write to DuckDB # Write to DuckDB
with duckdb.get_connection() as conn: with duckdb.get_connection() as conn:
@ -29,7 +31,7 @@ def agency_list(duckdb: DuckDBResource) -> None:
@asset( @asset(
deps=["agency_list"], deps=["agency_list"],
group_name="gtfs_metadata", group_name="gtfs_metadata",
auto_materialize_policy=AutoMaterializePolicy.eager() automation_condition=AutomationCondition.eager()
) )
def gtfs_feed_metadata( def gtfs_feed_metadata(
context: AssetExecutionContext, context: AssetExecutionContext,

View file

@ -1,4 +1,10 @@
from dagster import Definitions, load_assets_from_modules, EnvVar from dagster import (
Definitions,
load_assets_from_modules,
EnvVar,
DefaultSensorStatus,
AutomationConditionSensorDefinition
)
from dagster_duckdb import DuckDBResource from dagster_duckdb import DuckDBResource
import assets import assets
@ -8,9 +14,16 @@ all_assets = load_assets_from_modules([assets])
defs = Definitions( defs = Definitions(
assets=all_assets, assets=all_assets,
sensors=[
AutomationConditionSensorDefinition(
"asset_automation_sensor",
target="*",
default_status=DefaultSensorStatus.RUNNING,
)
],
resources={ resources={
"duckdb": DuckDBResource( "duckdb": DuckDBResource(
database="data/gtfs/gtfs.duckdb" database="data/duckdb/gtfs.duckdb"
), ),
"mobility_db": MobilityDatabaseAPI( "mobility_db": MobilityDatabaseAPI(
refresh_token=EnvVar("MOBILITY_DB_REFRESH_TOKEN"), refresh_token=EnvVar("MOBILITY_DB_REFRESH_TOKEN"),

View file

@ -1,2 +0,0 @@
# user_code/gtfs/__init__.py
from . import assets, resources

View file

@ -1,6 +1,6 @@
load_from: load_from:
# Each entry here corresponds to a service in the docker-compose file that exposes user code. # Each entry here corresponds to a service in the docker-compose file that exposes user code.
- grpc_server: - grpc_server:
host: dagster_user_code_gtfs host: dagster_code
port: 4000 port: 4000
location_name: "gtfs_user_code" location_name: "user_code"