rearranged directory structure, automaterialize to automationCondition

This commit is contained in:
Ben Varick 2025-11-07 07:40:46 -08:00
parent dcff5c78e7
commit 11003a8bda
Signed by: ben
SSH key fingerprint: SHA256:jWnpFDAcacYM5aPFpYRqlsamlDyKNpSj3jj+k4ojtUo
11 changed files with 38 additions and 31 deletions

9
.gitignore vendored
View file

@ -1,12 +1,9 @@
#Exclude the .env file
.env
#Exclude the postres_data
#Exclude contents of the postres_data directory
postgres_data
postgres_data/*
#Exclude data directory
data
#except for agency_list.csv
!data/gtfs/agency_list.csv
#Exclude contents of the data directory
data/*

View file

@ -12,7 +12,7 @@ RUN pip install \
requests
WORKDIR /opt/dagster/app
COPY user_code/gtfs /opt/dagster/app
COPY user_code /opt/dagster/app
# Run dagster gRPC server on port 4000

View file

@ -28,11 +28,7 @@ run_launcher:
- /var/run/docker.sock:/var/run/docker.sock
- /tmp/io_manager_storage:/tmp/io_manager_storage
- /home/ben/code/gtfs-dagster/data:/opt/dagster/app/data
auto_materialize:
enabled: true
run_tags:
source: auto-materialize
- /home/ben/code/gtfs-dagster/config:/opt/dagster/app/config
run_storage:
module: dagster_postgres.run_storage

View file

@ -29,23 +29,24 @@ services:
# Multiple containers like this can be deployed separately - each just needs to run on
# its own port, and have its own entry in the workspace.yaml file that's loaded by the
# webserver.
dagster_user_code_gtfs:
dagster_code:
build:
context: .
dockerfile: ./Dockerfile_user_code_gtfs
container_name: dagster_user_code_gtfs
image: dagster_user_code_gtfs
dockerfile: ./Dockerfile_dagster_code
container_name: dagster_code
image: dagster_code
restart: always
environment:
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
DAGSTER_CURRENT_IMAGE: 'dagster_user_code_gtfs'
DAGSTER_CURRENT_IMAGE: 'dagster_code'
MOBILITY_DB_REFRESH_TOKEN: ${MOBILITY_DB_REFRESH_TOKEN}
env_file:
- .env
volumes:
- ./data:/opt/dagster/app/data
- ./config:/opt/dagster/app/config
networks:
- dagster
@ -81,7 +82,7 @@ services:
depends_on:
dagster_postgresql:
condition: service_healthy
dagster_user_code_gtfs:
dagster_code:
condition: service_started
# This service runs the dagster-daemon process, which is responsible for taking runs
@ -109,7 +110,7 @@ services:
depends_on:
dagster_postgresql:
condition: service_healthy
dagster_user_code_gtfs:
dagster_code:
condition: service_started
networks:

View file

@ -1,2 +1,2 @@
# user_code/__init__.py
# This file makes user_code a package
from . import assets, resources

View file

@ -4,19 +4,21 @@ from dagster import (
AssetExecutionContext,
Output,
MetadataValue,
AutoMaterializePolicy
AutomationCondition
)
from dagster_duckdb import DuckDBResource
from resources import MobilityDatabaseAPI # Direct import instead of relative
from resources import MobilityDatabaseAPI
import json
@asset
@asset(
group_name="gtfs_metadata",
)
def agency_list(duckdb: DuckDBResource) -> None:
"""Load agency list from CSV into DuckDB."""
# Read the CSV (path is relative to container working directory)
df = pd.read_csv('data/gtfs/agency_list.csv')
df = pd.read_csv('config/agency_list.csv')
# Write to DuckDB
with duckdb.get_connection() as conn:
@ -29,7 +31,7 @@ def agency_list(duckdb: DuckDBResource) -> None:
@asset(
deps=["agency_list"],
group_name="gtfs_metadata",
auto_materialize_policy=AutoMaterializePolicy.eager()
automation_condition=AutomationCondition.eager()
)
def gtfs_feed_metadata(
context: AssetExecutionContext,

View file

@ -1,4 +1,10 @@
from dagster import Definitions, load_assets_from_modules, EnvVar
from dagster import (
Definitions,
load_assets_from_modules,
EnvVar,
DefaultSensorStatus,
AutomationConditionSensorDefinition
)
from dagster_duckdb import DuckDBResource
import assets
@ -8,9 +14,16 @@ all_assets = load_assets_from_modules([assets])
defs = Definitions(
assets=all_assets,
sensors=[
AutomationConditionSensorDefinition(
"asset_automation_sensor",
target="*",
default_status=DefaultSensorStatus.RUNNING,
)
],
resources={
"duckdb": DuckDBResource(
database="data/gtfs/gtfs.duckdb"
database="data/duckdb/gtfs.duckdb"
),
"mobility_db": MobilityDatabaseAPI(
refresh_token=EnvVar("MOBILITY_DB_REFRESH_TOKEN"),

View file

@ -1,2 +0,0 @@
# user_code/gtfs/__init__.py
from . import assets, resources

View file

@ -1,6 +1,6 @@
load_from:
# Each entry here corresponds to a service in the docker-compose file that exposes user code.
- grpc_server:
host: dagster_user_code_gtfs
host: dagster_code
port: 4000
location_name: "gtfs_user_code"
location_name: "user_code"