initial dagster setup
This commit is contained in:
parent
af2213f0ab
commit
7791d034ae
8 changed files with 232 additions and 0 deletions
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
#Exclude the .env file
|
||||
.env
|
||||
|
||||
#Exclude the postres_data
|
||||
postgres_data
|
||||
postgres_data/*
|
||||
|
||||
20
Dockerfile_dagster
Normal file
20
Dockerfile_dagster
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
# Dagster libraries to run both dagster-webserver and the dagster-daemon. Does not
|
||||
# need to have access to any pipeline code.
|
||||
|
||||
FROM python:3.10-slim
|
||||
|
||||
RUN pip install \
|
||||
dagster \
|
||||
dagster-graphql \
|
||||
dagster-webserver \
|
||||
dagster-postgres \
|
||||
dagster-docker
|
||||
|
||||
# Set $DAGSTER_HOME and copy dagster instance and workspace YAML there
|
||||
ENV DAGSTER_HOME=/opt/dagster/dagster_home/
|
||||
|
||||
RUN mkdir -p $DAGSTER_HOME
|
||||
|
||||
COPY dagster.yaml workspace.yaml $DAGSTER_HOME
|
||||
|
||||
WORKDIR $DAGSTER_HOME
|
||||
20
Dockerfile_user_code_gtfs
Normal file
20
Dockerfile_user_code_gtfs
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
FROM python:3.10-slim
|
||||
|
||||
# Checkout and install dagster libraries needed to run the gRPC server
|
||||
# exposing your repository to dagster-webserver and dagster-daemon, and to load the DagsterInstance
|
||||
|
||||
RUN pip install \
|
||||
dagster \
|
||||
dagster-postgres \
|
||||
dagster-docker
|
||||
|
||||
WORKDIR /opt/dagster/app
|
||||
COPY user_code/gtfs /opt/dagster/app
|
||||
|
||||
COPY definitions.py /opt/dagster/app
|
||||
|
||||
# Run dagster gRPC server on port 4000
|
||||
|
||||
EXPOSE 4000
|
||||
|
||||
CMD ["dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000", "-f", "definitions.py"]
|
||||
70
dagster.yaml
Normal file
70
dagster.yaml
Normal file
|
|
@ -0,0 +1,70 @@
|
|||
scheduler:
|
||||
module: dagster.core.scheduler
|
||||
class: DagsterDaemonScheduler
|
||||
|
||||
|
||||
run_coordinator:
|
||||
module: dagster.core.run_coordinator
|
||||
class: QueuedRunCoordinator
|
||||
config:
|
||||
max_concurrent_runs: 5
|
||||
tag_concurrency_limits:
|
||||
- key: "operation"
|
||||
value: "example"
|
||||
limit: 5
|
||||
|
||||
run_launcher:
|
||||
module: dagster_docker
|
||||
class: DockerRunLauncher
|
||||
config:
|
||||
env_vars:
|
||||
- DAGSTER_POSTGRES_USER
|
||||
- DAGSTER_POSTGRES_PASSWORD
|
||||
- DAGSTER_POSTGRES_DB
|
||||
network: dagster
|
||||
container_kwargs:
|
||||
volumes: # Make docker client accessible to any launched containers as well
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /tmp/io_manager_storage:/tmp/io_manager_storage
|
||||
|
||||
run_storage:
|
||||
module: dagster_postgres.run_storage
|
||||
class: PostgresRunStorage
|
||||
config:
|
||||
postgres_db:
|
||||
hostname: dagster_postgresql
|
||||
username:
|
||||
env: DAGSTER_POSTGRES_USER
|
||||
password:
|
||||
env: DAGSTER_POSTGRES_PASSWORD
|
||||
db_name:
|
||||
env: DAGSTER_POSTGRES_DB
|
||||
port: 5432
|
||||
|
||||
schedule_storage:
|
||||
module: dagster_postgres.schedule_storage
|
||||
class: PostgresScheduleStorage
|
||||
config:
|
||||
postgres_db:
|
||||
hostname: dagster_postgresql
|
||||
username:
|
||||
env: DAGSTER_POSTGRES_USER
|
||||
password:
|
||||
env: DAGSTER_POSTGRES_PASSWORD
|
||||
db_name:
|
||||
env: DAGSTER_POSTGRES_DB
|
||||
port: 5432
|
||||
|
||||
event_log_storage:
|
||||
module: dagster_postgres.event_log
|
||||
class: PostgresEventLogStorage
|
||||
config:
|
||||
postgres_db:
|
||||
hostname: dagster_postgresql
|
||||
username:
|
||||
env: DAGSTER_POSTGRES_USER
|
||||
password:
|
||||
env: DAGSTER_POSTGRES_PASSWORD
|
||||
db_name:
|
||||
env: DAGSTER_POSTGRES_DB
|
||||
port: 5432
|
||||
2
data/gtfs/agency_list.csv
Normal file
2
data/gtfs/agency_list.csv
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
Name,GTFS,GTFS-RT_vehicles,GTFS-RT_trips,GTFS-RT_alerts
|
||||
Madison Metro,mdb-394,mdb-2097,mdb-2096,mdb-2095
|
||||
|
0
definitions.py
Normal file
0
definitions.py
Normal file
107
docker-compose.yaml
Normal file
107
docker-compose.yaml
Normal file
|
|
@ -0,0 +1,107 @@
|
|||
---
|
||||
|
||||
services:
|
||||
# This service runs the postgres DB used by dagster for run storage, schedule storage,
|
||||
# and event log storage. Depending on the hardware you run this Compose on, you may be able
|
||||
# to reduce the interval and timeout in the healthcheck to speed up your `docker-compose up` times.
|
||||
dagster_postgresql:
|
||||
image: postgres:17
|
||||
container_name: dagster_postgresql
|
||||
environment:
|
||||
POSTGRES_USER: ${POSTGRES_USER}
|
||||
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
POSTGRES_DB: ${POSTGRES_DB}
|
||||
volumes:
|
||||
- ./postgres_data:/var/lib/postgresql/data
|
||||
networks:
|
||||
- dagster
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}']
|
||||
interval: 10s
|
||||
timeout: 8s
|
||||
retries: 5
|
||||
|
||||
# This service runs the gRPC server that loads your user code, in both dagster-webserver
|
||||
# and dagster-daemon. By setting DAGSTER_CURRENT_IMAGE to its own image, we tell the
|
||||
# run launcher to use this same image when launching runs in a new container as well.
|
||||
# Multiple containers like this can be deployed separately - each just needs to run on
|
||||
# its own port, and have its own entry in the workspace.yaml file that's loaded by the
|
||||
# webserver.
|
||||
dagster_user_code_gtfs:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./Dockerfile_user_code_gtfs
|
||||
container_name: dagster_user_code_gtfs
|
||||
image: dagster_user_code_gtfs
|
||||
restart: always
|
||||
environment:
|
||||
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
|
||||
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
|
||||
DAGSTER_CURRENT_IMAGE: 'dagster_user_code_gtfs'
|
||||
networks:
|
||||
- dagster
|
||||
|
||||
# This service runs dagster-webserver, which loads your user code from the user code container.
|
||||
# Since our instance uses the QueuedRunCoordinator, any runs submitted from the webserver will be put on
|
||||
# a queue and later dequeued and launched by dagster-daemon.
|
||||
dagster_webserver:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./Dockerfile_dagster
|
||||
entrypoint:
|
||||
- dagster-webserver
|
||||
- -h
|
||||
- '0.0.0.0'
|
||||
- -p
|
||||
- '3000'
|
||||
- -w
|
||||
- workspace.yaml
|
||||
container_name: dagster_webserver
|
||||
ports:
|
||||
- 3001:3000
|
||||
environment:
|
||||
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
|
||||
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
|
||||
volumes: # Make docker client accessible so we can terminate containers from the webserver
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /tmp/io_manager_storage:/tmp/io_manager_storage
|
||||
networks:
|
||||
- dagster
|
||||
depends_on:
|
||||
dagster_postgresql:
|
||||
condition: service_healthy
|
||||
dagster_user_code_gtfs:
|
||||
condition: service_started
|
||||
|
||||
# This service runs the dagster-daemon process, which is responsible for taking runs
|
||||
# off of the queue and launching them, as well as creating runs from schedules or sensors.
|
||||
dagster_daemon:
|
||||
build:
|
||||
context: .
|
||||
dockerfile: ./Dockerfile_dagster
|
||||
entrypoint:
|
||||
- dagster-daemon
|
||||
- run
|
||||
container_name: dagster_daemon
|
||||
restart: on-failure
|
||||
environment:
|
||||
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
|
||||
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
|
||||
volumes: # Make docker client accessible so we can launch containers using host docker
|
||||
- /var/run/docker.sock:/var/run/docker.sock
|
||||
- /tmp/io_manager_storage:/tmp/io_manager_storage
|
||||
networks:
|
||||
- dagster
|
||||
depends_on:
|
||||
dagster_postgresql:
|
||||
condition: service_healthy
|
||||
dagster_user_code_gtfs:
|
||||
condition: service_started
|
||||
|
||||
networks:
|
||||
dagster:
|
||||
driver: bridge
|
||||
name: dagster
|
||||
6
workspace.yaml
Normal file
6
workspace.yaml
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
load_from:
|
||||
# Each entry here corresponds to a service in the docker-compose file that exposes user code.
|
||||
- grpc_server:
|
||||
host: dagster_user_code_gtfs
|
||||
port: 4000
|
||||
location_name: "gtfs"
|
||||
Loading…
Add table
Add a link
Reference in a new issue