rearranged directory structure, automaterialize to automationCondition
This commit is contained in:
parent
dcff5c78e7
commit
11003a8bda
11 changed files with 38 additions and 31 deletions
9
.gitignore
vendored
9
.gitignore
vendored
|
|
@ -1,12 +1,9 @@
|
||||||
#Exclude the .env file
|
#Exclude the .env file
|
||||||
.env
|
.env
|
||||||
|
|
||||||
#Exclude the postres_data
|
#Exclude contents of the postres_data directory
|
||||||
postgres_data
|
postgres_data
|
||||||
postgres_data/*
|
postgres_data/*
|
||||||
|
|
||||||
#Exclude data directory
|
#Exclude contents of the data directory
|
||||||
data
|
data/*
|
||||||
#except for agency_list.csv
|
|
||||||
!data/gtfs/agency_list.csv
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -12,7 +12,7 @@ RUN pip install \
|
||||||
requests
|
requests
|
||||||
|
|
||||||
WORKDIR /opt/dagster/app
|
WORKDIR /opt/dagster/app
|
||||||
COPY user_code/gtfs /opt/dagster/app
|
COPY user_code /opt/dagster/app
|
||||||
|
|
||||||
# Run dagster gRPC server on port 4000
|
# Run dagster gRPC server on port 4000
|
||||||
|
|
||||||
|
|
@ -28,11 +28,7 @@ run_launcher:
|
||||||
- /var/run/docker.sock:/var/run/docker.sock
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
- /tmp/io_manager_storage:/tmp/io_manager_storage
|
- /tmp/io_manager_storage:/tmp/io_manager_storage
|
||||||
- /home/ben/code/gtfs-dagster/data:/opt/dagster/app/data
|
- /home/ben/code/gtfs-dagster/data:/opt/dagster/app/data
|
||||||
|
- /home/ben/code/gtfs-dagster/config:/opt/dagster/app/config
|
||||||
auto_materialize:
|
|
||||||
enabled: true
|
|
||||||
run_tags:
|
|
||||||
source: auto-materialize
|
|
||||||
|
|
||||||
run_storage:
|
run_storage:
|
||||||
module: dagster_postgres.run_storage
|
module: dagster_postgres.run_storage
|
||||||
|
|
|
||||||
|
|
@ -29,23 +29,24 @@ services:
|
||||||
# Multiple containers like this can be deployed separately - each just needs to run on
|
# Multiple containers like this can be deployed separately - each just needs to run on
|
||||||
# its own port, and have its own entry in the workspace.yaml file that's loaded by the
|
# its own port, and have its own entry in the workspace.yaml file that's loaded by the
|
||||||
# webserver.
|
# webserver.
|
||||||
dagster_user_code_gtfs:
|
dagster_code:
|
||||||
build:
|
build:
|
||||||
context: .
|
context: .
|
||||||
dockerfile: ./Dockerfile_user_code_gtfs
|
dockerfile: ./Dockerfile_dagster_code
|
||||||
container_name: dagster_user_code_gtfs
|
container_name: dagster_code
|
||||||
image: dagster_user_code_gtfs
|
image: dagster_code
|
||||||
restart: always
|
restart: always
|
||||||
environment:
|
environment:
|
||||||
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
|
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
|
||||||
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
|
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
|
||||||
DAGSTER_CURRENT_IMAGE: 'dagster_user_code_gtfs'
|
DAGSTER_CURRENT_IMAGE: 'dagster_code'
|
||||||
MOBILITY_DB_REFRESH_TOKEN: ${MOBILITY_DB_REFRESH_TOKEN}
|
MOBILITY_DB_REFRESH_TOKEN: ${MOBILITY_DB_REFRESH_TOKEN}
|
||||||
env_file:
|
env_file:
|
||||||
- .env
|
- .env
|
||||||
volumes:
|
volumes:
|
||||||
- ./data:/opt/dagster/app/data
|
- ./data:/opt/dagster/app/data
|
||||||
|
- ./config:/opt/dagster/app/config
|
||||||
networks:
|
networks:
|
||||||
- dagster
|
- dagster
|
||||||
|
|
||||||
|
|
@ -81,7 +82,7 @@ services:
|
||||||
depends_on:
|
depends_on:
|
||||||
dagster_postgresql:
|
dagster_postgresql:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
dagster_user_code_gtfs:
|
dagster_code:
|
||||||
condition: service_started
|
condition: service_started
|
||||||
|
|
||||||
# This service runs the dagster-daemon process, which is responsible for taking runs
|
# This service runs the dagster-daemon process, which is responsible for taking runs
|
||||||
|
|
@ -109,7 +110,7 @@ services:
|
||||||
depends_on:
|
depends_on:
|
||||||
dagster_postgresql:
|
dagster_postgresql:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
dagster_user_code_gtfs:
|
dagster_code:
|
||||||
condition: service_started
|
condition: service_started
|
||||||
|
|
||||||
networks:
|
networks:
|
||||||
|
|
|
||||||
|
|
@ -1,2 +1,2 @@
|
||||||
# user_code/__init__.py
|
# user_code/__init__.py
|
||||||
# This file makes user_code a package
|
from . import assets, resources
|
||||||
|
|
|
||||||
|
|
@ -4,19 +4,21 @@ from dagster import (
|
||||||
AssetExecutionContext,
|
AssetExecutionContext,
|
||||||
Output,
|
Output,
|
||||||
MetadataValue,
|
MetadataValue,
|
||||||
AutoMaterializePolicy
|
AutomationCondition
|
||||||
)
|
)
|
||||||
from dagster_duckdb import DuckDBResource
|
from dagster_duckdb import DuckDBResource
|
||||||
from resources import MobilityDatabaseAPI # Direct import instead of relative
|
from resources import MobilityDatabaseAPI
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
@asset
|
@asset(
|
||||||
|
group_name="gtfs_metadata",
|
||||||
|
)
|
||||||
def agency_list(duckdb: DuckDBResource) -> None:
|
def agency_list(duckdb: DuckDBResource) -> None:
|
||||||
"""Load agency list from CSV into DuckDB."""
|
"""Load agency list from CSV into DuckDB."""
|
||||||
|
|
||||||
# Read the CSV (path is relative to container working directory)
|
# Read the CSV (path is relative to container working directory)
|
||||||
df = pd.read_csv('data/gtfs/agency_list.csv')
|
df = pd.read_csv('config/agency_list.csv')
|
||||||
|
|
||||||
# Write to DuckDB
|
# Write to DuckDB
|
||||||
with duckdb.get_connection() as conn:
|
with duckdb.get_connection() as conn:
|
||||||
|
|
@ -29,7 +31,7 @@ def agency_list(duckdb: DuckDBResource) -> None:
|
||||||
@asset(
|
@asset(
|
||||||
deps=["agency_list"],
|
deps=["agency_list"],
|
||||||
group_name="gtfs_metadata",
|
group_name="gtfs_metadata",
|
||||||
auto_materialize_policy=AutoMaterializePolicy.eager()
|
automation_condition=AutomationCondition.eager()
|
||||||
)
|
)
|
||||||
def gtfs_feed_metadata(
|
def gtfs_feed_metadata(
|
||||||
context: AssetExecutionContext,
|
context: AssetExecutionContext,
|
||||||
|
|
@ -1,4 +1,10 @@
|
||||||
from dagster import Definitions, load_assets_from_modules, EnvVar
|
from dagster import (
|
||||||
|
Definitions,
|
||||||
|
load_assets_from_modules,
|
||||||
|
EnvVar,
|
||||||
|
DefaultSensorStatus,
|
||||||
|
AutomationConditionSensorDefinition
|
||||||
|
)
|
||||||
from dagster_duckdb import DuckDBResource
|
from dagster_duckdb import DuckDBResource
|
||||||
|
|
||||||
import assets
|
import assets
|
||||||
|
|
@ -8,9 +14,16 @@ all_assets = load_assets_from_modules([assets])
|
||||||
|
|
||||||
defs = Definitions(
|
defs = Definitions(
|
||||||
assets=all_assets,
|
assets=all_assets,
|
||||||
|
sensors=[
|
||||||
|
AutomationConditionSensorDefinition(
|
||||||
|
"asset_automation_sensor",
|
||||||
|
target="*",
|
||||||
|
default_status=DefaultSensorStatus.RUNNING,
|
||||||
|
)
|
||||||
|
],
|
||||||
resources={
|
resources={
|
||||||
"duckdb": DuckDBResource(
|
"duckdb": DuckDBResource(
|
||||||
database="data/gtfs/gtfs.duckdb"
|
database="data/duckdb/gtfs.duckdb"
|
||||||
),
|
),
|
||||||
"mobility_db": MobilityDatabaseAPI(
|
"mobility_db": MobilityDatabaseAPI(
|
||||||
refresh_token=EnvVar("MOBILITY_DB_REFRESH_TOKEN"),
|
refresh_token=EnvVar("MOBILITY_DB_REFRESH_TOKEN"),
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
# user_code/gtfs/__init__.py
|
|
||||||
from . import assets, resources
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
load_from:
|
load_from:
|
||||||
# Each entry here corresponds to a service in the docker-compose file that exposes user code.
|
# Each entry here corresponds to a service in the docker-compose file that exposes user code.
|
||||||
- grpc_server:
|
- grpc_server:
|
||||||
host: dagster_user_code_gtfs
|
host: dagster_code
|
||||||
port: 4000
|
port: 4000
|
||||||
location_name: "gtfs_user_code"
|
location_name: "user_code"
|
||||||
Loading…
Add table
Add a link
Reference in a new issue