initial dagster setup
This commit is contained in:
parent
af2213f0ab
commit
7791d034ae
8 changed files with 232 additions and 0 deletions
7
.gitignore
vendored
Normal file
7
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
||||||
|
#Exclude the .env file
|
||||||
|
.env
|
||||||
|
|
||||||
|
#Exclude the postres_data
|
||||||
|
postgres_data
|
||||||
|
postgres_data/*
|
||||||
|
|
||||||
20
Dockerfile_dagster
Normal file
20
Dockerfile_dagster
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
# Dagster libraries to run both dagster-webserver and the dagster-daemon. Does not
|
||||||
|
# need to have access to any pipeline code.
|
||||||
|
|
||||||
|
FROM python:3.10-slim
|
||||||
|
|
||||||
|
RUN pip install \
|
||||||
|
dagster \
|
||||||
|
dagster-graphql \
|
||||||
|
dagster-webserver \
|
||||||
|
dagster-postgres \
|
||||||
|
dagster-docker
|
||||||
|
|
||||||
|
# Set $DAGSTER_HOME and copy dagster instance and workspace YAML there
|
||||||
|
ENV DAGSTER_HOME=/opt/dagster/dagster_home/
|
||||||
|
|
||||||
|
RUN mkdir -p $DAGSTER_HOME
|
||||||
|
|
||||||
|
COPY dagster.yaml workspace.yaml $DAGSTER_HOME
|
||||||
|
|
||||||
|
WORKDIR $DAGSTER_HOME
|
||||||
20
Dockerfile_user_code_gtfs
Normal file
20
Dockerfile_user_code_gtfs
Normal file
|
|
@ -0,0 +1,20 @@
|
||||||
|
FROM python:3.10-slim
|
||||||
|
|
||||||
|
# Checkout and install dagster libraries needed to run the gRPC server
|
||||||
|
# exposing your repository to dagster-webserver and dagster-daemon, and to load the DagsterInstance
|
||||||
|
|
||||||
|
RUN pip install \
|
||||||
|
dagster \
|
||||||
|
dagster-postgres \
|
||||||
|
dagster-docker
|
||||||
|
|
||||||
|
WORKDIR /opt/dagster/app
|
||||||
|
COPY user_code/gtfs /opt/dagster/app
|
||||||
|
|
||||||
|
COPY definitions.py /opt/dagster/app
|
||||||
|
|
||||||
|
# Run dagster gRPC server on port 4000
|
||||||
|
|
||||||
|
EXPOSE 4000
|
||||||
|
|
||||||
|
CMD ["dagster", "api", "grpc", "-h", "0.0.0.0", "-p", "4000", "-f", "definitions.py"]
|
||||||
70
dagster.yaml
Normal file
70
dagster.yaml
Normal file
|
|
@ -0,0 +1,70 @@
|
||||||
|
scheduler:
|
||||||
|
module: dagster.core.scheduler
|
||||||
|
class: DagsterDaemonScheduler
|
||||||
|
|
||||||
|
|
||||||
|
run_coordinator:
|
||||||
|
module: dagster.core.run_coordinator
|
||||||
|
class: QueuedRunCoordinator
|
||||||
|
config:
|
||||||
|
max_concurrent_runs: 5
|
||||||
|
tag_concurrency_limits:
|
||||||
|
- key: "operation"
|
||||||
|
value: "example"
|
||||||
|
limit: 5
|
||||||
|
|
||||||
|
run_launcher:
|
||||||
|
module: dagster_docker
|
||||||
|
class: DockerRunLauncher
|
||||||
|
config:
|
||||||
|
env_vars:
|
||||||
|
- DAGSTER_POSTGRES_USER
|
||||||
|
- DAGSTER_POSTGRES_PASSWORD
|
||||||
|
- DAGSTER_POSTGRES_DB
|
||||||
|
network: dagster
|
||||||
|
container_kwargs:
|
||||||
|
volumes: # Make docker client accessible to any launched containers as well
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
- /tmp/io_manager_storage:/tmp/io_manager_storage
|
||||||
|
|
||||||
|
run_storage:
|
||||||
|
module: dagster_postgres.run_storage
|
||||||
|
class: PostgresRunStorage
|
||||||
|
config:
|
||||||
|
postgres_db:
|
||||||
|
hostname: dagster_postgresql
|
||||||
|
username:
|
||||||
|
env: DAGSTER_POSTGRES_USER
|
||||||
|
password:
|
||||||
|
env: DAGSTER_POSTGRES_PASSWORD
|
||||||
|
db_name:
|
||||||
|
env: DAGSTER_POSTGRES_DB
|
||||||
|
port: 5432
|
||||||
|
|
||||||
|
schedule_storage:
|
||||||
|
module: dagster_postgres.schedule_storage
|
||||||
|
class: PostgresScheduleStorage
|
||||||
|
config:
|
||||||
|
postgres_db:
|
||||||
|
hostname: dagster_postgresql
|
||||||
|
username:
|
||||||
|
env: DAGSTER_POSTGRES_USER
|
||||||
|
password:
|
||||||
|
env: DAGSTER_POSTGRES_PASSWORD
|
||||||
|
db_name:
|
||||||
|
env: DAGSTER_POSTGRES_DB
|
||||||
|
port: 5432
|
||||||
|
|
||||||
|
event_log_storage:
|
||||||
|
module: dagster_postgres.event_log
|
||||||
|
class: PostgresEventLogStorage
|
||||||
|
config:
|
||||||
|
postgres_db:
|
||||||
|
hostname: dagster_postgresql
|
||||||
|
username:
|
||||||
|
env: DAGSTER_POSTGRES_USER
|
||||||
|
password:
|
||||||
|
env: DAGSTER_POSTGRES_PASSWORD
|
||||||
|
db_name:
|
||||||
|
env: DAGSTER_POSTGRES_DB
|
||||||
|
port: 5432
|
||||||
2
data/gtfs/agency_list.csv
Normal file
2
data/gtfs/agency_list.csv
Normal file
|
|
@ -0,0 +1,2 @@
|
||||||
|
Name,GTFS,GTFS-RT_vehicles,GTFS-RT_trips,GTFS-RT_alerts
|
||||||
|
Madison Metro,mdb-394,mdb-2097,mdb-2096,mdb-2095
|
||||||
|
0
definitions.py
Normal file
0
definitions.py
Normal file
107
docker-compose.yaml
Normal file
107
docker-compose.yaml
Normal file
|
|
@ -0,0 +1,107 @@
|
||||||
|
---
|
||||||
|
|
||||||
|
services:
|
||||||
|
# This service runs the postgres DB used by dagster for run storage, schedule storage,
|
||||||
|
# and event log storage. Depending on the hardware you run this Compose on, you may be able
|
||||||
|
# to reduce the interval and timeout in the healthcheck to speed up your `docker-compose up` times.
|
||||||
|
dagster_postgresql:
|
||||||
|
image: postgres:17
|
||||||
|
container_name: dagster_postgresql
|
||||||
|
environment:
|
||||||
|
POSTGRES_USER: ${POSTGRES_USER}
|
||||||
|
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
POSTGRES_DB: ${POSTGRES_DB}
|
||||||
|
volumes:
|
||||||
|
- ./postgres_data:/var/lib/postgresql/data
|
||||||
|
networks:
|
||||||
|
- dagster
|
||||||
|
healthcheck:
|
||||||
|
test: ['CMD-SHELL', 'pg_isready -U ${POSTGRES_USER} -d ${POSTGRES_DB}']
|
||||||
|
interval: 10s
|
||||||
|
timeout: 8s
|
||||||
|
retries: 5
|
||||||
|
|
||||||
|
# This service runs the gRPC server that loads your user code, in both dagster-webserver
|
||||||
|
# and dagster-daemon. By setting DAGSTER_CURRENT_IMAGE to its own image, we tell the
|
||||||
|
# run launcher to use this same image when launching runs in a new container as well.
|
||||||
|
# Multiple containers like this can be deployed separately - each just needs to run on
|
||||||
|
# its own port, and have its own entry in the workspace.yaml file that's loaded by the
|
||||||
|
# webserver.
|
||||||
|
dagster_user_code_gtfs:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./Dockerfile_user_code_gtfs
|
||||||
|
container_name: dagster_user_code_gtfs
|
||||||
|
image: dagster_user_code_gtfs
|
||||||
|
restart: always
|
||||||
|
environment:
|
||||||
|
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
|
||||||
|
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
|
||||||
|
DAGSTER_CURRENT_IMAGE: 'dagster_user_code_gtfs'
|
||||||
|
networks:
|
||||||
|
- dagster
|
||||||
|
|
||||||
|
# This service runs dagster-webserver, which loads your user code from the user code container.
|
||||||
|
# Since our instance uses the QueuedRunCoordinator, any runs submitted from the webserver will be put on
|
||||||
|
# a queue and later dequeued and launched by dagster-daemon.
|
||||||
|
dagster_webserver:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./Dockerfile_dagster
|
||||||
|
entrypoint:
|
||||||
|
- dagster-webserver
|
||||||
|
- -h
|
||||||
|
- '0.0.0.0'
|
||||||
|
- -p
|
||||||
|
- '3000'
|
||||||
|
- -w
|
||||||
|
- workspace.yaml
|
||||||
|
container_name: dagster_webserver
|
||||||
|
ports:
|
||||||
|
- 3001:3000
|
||||||
|
environment:
|
||||||
|
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
|
||||||
|
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
|
||||||
|
volumes: # Make docker client accessible so we can terminate containers from the webserver
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
- /tmp/io_manager_storage:/tmp/io_manager_storage
|
||||||
|
networks:
|
||||||
|
- dagster
|
||||||
|
depends_on:
|
||||||
|
dagster_postgresql:
|
||||||
|
condition: service_healthy
|
||||||
|
dagster_user_code_gtfs:
|
||||||
|
condition: service_started
|
||||||
|
|
||||||
|
# This service runs the dagster-daemon process, which is responsible for taking runs
|
||||||
|
# off of the queue and launching them, as well as creating runs from schedules or sensors.
|
||||||
|
dagster_daemon:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: ./Dockerfile_dagster
|
||||||
|
entrypoint:
|
||||||
|
- dagster-daemon
|
||||||
|
- run
|
||||||
|
container_name: dagster_daemon
|
||||||
|
restart: on-failure
|
||||||
|
environment:
|
||||||
|
DAGSTER_POSTGRES_USER: ${POSTGRES_USER}
|
||||||
|
DAGSTER_POSTGRES_PASSWORD: ${POSTGRES_PASSWORD}
|
||||||
|
DAGSTER_POSTGRES_DB: ${POSTGRES_DB}
|
||||||
|
volumes: # Make docker client accessible so we can launch containers using host docker
|
||||||
|
- /var/run/docker.sock:/var/run/docker.sock
|
||||||
|
- /tmp/io_manager_storage:/tmp/io_manager_storage
|
||||||
|
networks:
|
||||||
|
- dagster
|
||||||
|
depends_on:
|
||||||
|
dagster_postgresql:
|
||||||
|
condition: service_healthy
|
||||||
|
dagster_user_code_gtfs:
|
||||||
|
condition: service_started
|
||||||
|
|
||||||
|
networks:
|
||||||
|
dagster:
|
||||||
|
driver: bridge
|
||||||
|
name: dagster
|
||||||
6
workspace.yaml
Normal file
6
workspace.yaml
Normal file
|
|
@ -0,0 +1,6 @@
|
||||||
|
load_from:
|
||||||
|
# Each entry here corresponds to a service in the docker-compose file that exposes user code.
|
||||||
|
- grpc_server:
|
||||||
|
host: dagster_user_code_gtfs
|
||||||
|
port: 4000
|
||||||
|
location_name: "gtfs"
|
||||||
Loading…
Add table
Add a link
Reference in a new issue