rearranged directory structure, automaterialize to automationCondition
This commit is contained in:
parent
dcff5c78e7
commit
11003a8bda
11 changed files with 38 additions and 31 deletions
9
.gitignore
vendored
9
.gitignore
vendored
|
|
@ -1,12 +1,9 @@
|
|||
#Exclude the .env file
|
||||
.env
|
||||
|
||||
#Exclude the postres_data
|
||||
#Exclude contents of the postres_data directory
|
||||
postgres_data
|
||||
postgres_data/*
|
||||
|
||||
#Exclude data directory
|
||||
data
|
||||
#except for agency_list.csv
|
||||
!data/gtfs/agency_list.csv
|
||||
|
||||
#Exclude contents of the data directory
|
||||
data/*
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ RUN pip install \
|
|||
requests
|
||||
|
||||
WORKDIR /opt/dagster/app
|
||||
COPY user_code/gtfs /opt/dagster/app
|
||||
COPY user_code /opt/dagster/app
|
||||
|
||||
# Run dagster gRPC server on port 4000
|
||||
|
||||
|
|
@ -28,11 +28,7 @@ run_launcher:
|
|||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /tmp/io_manager_storage:/tmp/io_manager_storage
|
||||
- /home/ben/code/gtfs-dagster/data:/opt/dagster/app/data
|
||||
|
||||
auto_materialize:
|
||||
enabled: true
|
||||
run_tags:
|
||||
source: auto-materialize
|
||||
- /home/ben/code/gtfs-dagster/config:/opt/dagster/app/config
|
||||
|
||||
run_storage:
|
||||
module: dagster_postgres.run_storage
|
||||
|
|
|
|||
|
|
@ -29,23 +29,24 @@ services:
|
|||
# Multiple containers like this can be deployed separately - each just needs to run on
|
||||
# its own port, and have its own entry in the workspace.yaml file that's loaded by the
|
||||
# webserver.
|
||||
dagster_user_code_gtfs:
|
||||
dagster_code:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./Dockerfile_user_code_gtfs
|
||||
container_name: dagster_user_code_gtfs
|
||||
image: dagster_user_code_gtfs
|
||||
dockerfile: ./Dockerfile_dagster_code
|
||||
container_name: dagster_code
|
||||
image: dagster_code
|
||||
restart: always
|
||||
environment:
|
||||
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
|
||||
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
|
||||
DAGSTER_CURRENT_IMAGE: 'dagster_user_code_gtfs'
|
||||
DAGSTER_CURRENT_IMAGE: 'dagster_code'
|
||||
MOBILITY_DB_REFRESH_TOKEN: ${MOBILITY_DB_REFRESH_TOKEN}
|
||||
env_file:
|
||||
- .env
|
||||
volumes:
|
||||
- ./data:/opt/dagster/app/data
|
||||
- ./config:/opt/dagster/app/config
|
||||
networks:
|
||||
- dagster
|
||||
|
||||
|
|
@ -81,7 +82,7 @@ services:
|
|||
depends_on:
|
||||
dagster_postgresql:
|
||||
condition: service_healthy
|
||||
dagster_user_code_gtfs:
|
||||
dagster_code:
|
||||
condition: service_started
|
||||
|
||||
# This service runs the dagster-daemon process, which is responsible for taking runs
|
||||
|
|
@ -109,7 +110,7 @@ services:
|
|||
depends_on:
|
||||
dagster_postgresql:
|
||||
condition: service_healthy
|
||||
dagster_user_code_gtfs:
|
||||
dagster_code:
|
||||
condition: service_started
|
||||
|
||||
networks:
|
||||
|
|
|
|||
|
|
@ -1,2 +1,2 @@
|
|||
# user_code/__init__.py
|
||||
# This file makes user_code a package
|
||||
from . import assets, resources
|
||||
|
|
|
|||
|
|
@ -4,19 +4,21 @@ from dagster import (
|
|||
AssetExecutionContext,
|
||||
Output,
|
||||
MetadataValue,
|
||||
AutoMaterializePolicy
|
||||
AutomationCondition
|
||||
)
|
||||
from dagster_duckdb import DuckDBResource
|
||||
from resources import MobilityDatabaseAPI # Direct import instead of relative
|
||||
from resources import MobilityDatabaseAPI
|
||||
import json
|
||||
|
||||
|
||||
@asset
|
||||
@asset(
|
||||
group_name="gtfs_metadata",
|
||||
)
|
||||
def agency_list(duckdb: DuckDBResource) -> None:
|
||||
"""Load agency list from CSV into DuckDB."""
|
||||
|
||||
# Read the CSV (path is relative to container working directory)
|
||||
df = pd.read_csv('data/gtfs/agency_list.csv')
|
||||
df = pd.read_csv('config/agency_list.csv')
|
||||
|
||||
# Write to DuckDB
|
||||
with duckdb.get_connection() as conn:
|
||||
|
|
@ -29,7 +31,7 @@ def agency_list(duckdb: DuckDBResource) -> None:
|
|||
@asset(
|
||||
deps=["agency_list"],
|
||||
group_name="gtfs_metadata",
|
||||
auto_materialize_policy=AutoMaterializePolicy.eager()
|
||||
automation_condition=AutomationCondition.eager()
|
||||
)
|
||||
def gtfs_feed_metadata(
|
||||
context: AssetExecutionContext,
|
||||
|
|
@ -1,4 +1,10 @@
|
|||
from dagster import Definitions, load_assets_from_modules, EnvVar
|
||||
from dagster import (
|
||||
Definitions,
|
||||
load_assets_from_modules,
|
||||
EnvVar,
|
||||
DefaultSensorStatus,
|
||||
AutomationConditionSensorDefinition
|
||||
)
|
||||
from dagster_duckdb import DuckDBResource
|
||||
|
||||
import assets
|
||||
|
|
@ -8,9 +14,16 @@ all_assets = load_assets_from_modules([assets])
|
|||
|
||||
defs = Definitions(
|
||||
assets=all_assets,
|
||||
sensors=[
|
||||
AutomationConditionSensorDefinition(
|
||||
"asset_automation_sensor",
|
||||
target="*",
|
||||
default_status=DefaultSensorStatus.RUNNING,
|
||||
)
|
||||
],
|
||||
resources={
|
||||
"duckdb": DuckDBResource(
|
||||
database="data/gtfs/gtfs.duckdb"
|
||||
database="data/duckdb/gtfs.duckdb"
|
||||
),
|
||||
"mobility_db": MobilityDatabaseAPI(
|
||||
refresh_token=EnvVar("MOBILITY_DB_REFRESH_TOKEN"),
|
||||
|
|
@ -1,2 +0,0 @@
|
|||
# user_code/gtfs/__init__.py
|
||||
from . import assets, resources
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
load_from:
|
||||
# Each entry here corresponds to a service in the docker-compose file that exposes user code.
|
||||
- grpc_server:
|
||||
host: dagster_user_code_gtfs
|
||||
host: dagster_code
|
||||
port: 4000
|
||||
location_name: "gtfs_user_code"
|
||||
location_name: "user_code"
|
||||
Loading…
Add table
Add a link
Reference in a new issue