Skip to content

Commit

Permalink
Dagster docker compose command now working again. Updated dagster lib…
Browse files Browse the repository at this point in the history
…raries to latest, tested, clarified some points in documentation
  • Loading branch information
cyramic committed Oct 7, 2024
1 parent 418998d commit 93f04d6
Show file tree
Hide file tree
Showing 9 changed files with 606 additions and 532 deletions.
14 changes: 14 additions & 0 deletions .env.sample
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
DAGSTER_POSTGRES_USER=postgres
DAGSTER_POSTGRES_PASSWORD=postgres
DAGSTER_POSTGRES_DB=postgres
DAGSTER_POSTGRES_PORT=5432
DAGSTER_POSTGRES_HOST=postgres
ALLOWED_DATASETS=cin,ssda903

INPUT_LOCATION=./Files/Input
WORKSPACE_LOCATION=./Files/Workspace
SHARED_LOCATION=./Files/Shared
EXTERNAL_DATA_LOCATION=./Files/External

CLEAN_SCHEDULE=*/2 * * * *
REPORTS_SCHEDULE=*/2 * * * *
4 changes: 4 additions & 0 deletions .github/workflows/build_dagster.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ jobs:
uses: docker/build-push-action@v5
with:
context: .
<<<<<<< Updated upstream
file: ./Dockerfile_LA
=======
file: ./Dockerfile_dagster
>>>>>>> Stashed changes
push: true
tags: ${{ secrets.DOCKERHUB_DAGSTER_SERVER_URL }}:latest
labels: ${{ steps.meta.outputs.labels }}
4 changes: 4 additions & 0 deletions .github/workflows/build_release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ jobs:
uses: docker/build-push-action@v5
with:
context: .
<<<<<<< Updated upstream
file: ./Dockerfile_LA
=======
file: ./Dockerfile_dagster
>>>>>>> Stashed changes
push: true
tags: ${{ secrets.DOCKERHUB_DAGSTER_SERVER_URL }}:latest
labels: ${{ steps.meta.outputs.labels }}
4 changes: 2 additions & 2 deletions dagster/Dockerfile_dagster → Dockerfile_dagster
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Dagster libraries to run both dagit and the dagster-daemon. Does not
# need to have access to any pipeline code.

FROM python:3.10-slim
FROM python:3.11-slim

RUN pip install \
dagster \
Expand All @@ -17,7 +17,7 @@ ENV DAGSTER_HOME=/opt/dagster/dagster_home/

RUN mkdir -p $DAGSTER_HOME

COPY dagster.yaml workspace.yaml $DAGSTER_HOME
COPY ./dagster/dagster.yaml ./dagster/workspace.yaml $DAGSTER_HOME

WORKDIR $DAGSTER_HOME

File renamed without changes.
6 changes: 5 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,8 @@ doesn't rely on this.


## Infrastructure
Documentation on the infrastructure can be found [here](./docs/infrastructure-diagrams/README.md)
Documentation on the infrastructure can be found [here](./docs/infrastructure-diagrams/README.md)

## Dagit/Daemon
Code in the dagster folder [here](./dagster/README.md) is what is used to run the dagit and daemon interfaces
on the platform. Code servers can then be hooked in and are part of different repos as needed.
10 changes: 7 additions & 3 deletions dagster/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,13 +38,17 @@ end


### Dagit
This is the web interface for Dagster that allows for viewing and interacting with Dagster objects. More
This is the web interface for Dagster that allows for viewing and interacting with Dagster objects.
This is built and pushed to docker hub for use with the wider platform. More
[info here](https://docs.dagster.io/concepts/dagit/dagit)
### Dagster Daemon
Determines what needs to be run and when using schedulers, sensors, etc. More
Determines what needs to be run and when using schedulers, sensors, etc. This is built and pushed to
docker hub for use with the wider platform. More
[info here](https://docs.dagster.io/deployment/dagster-daemon)
### User Code Server
This contains the pipeline code for Dagster to run. This includes sensors, ops, assets, jobs,
schedule definitions, etc. More
schedule definitions, etc. More. The one used in this repo is for demo purposes only and isn't part of
any wider build or running of the platform.

[info here](https://docs.dagster.io/concepts/code-locations/workspace-files#running-your-own-grpc-server)

1,047 changes: 537 additions & 510 deletions dagster/poetry.lock

Large diffs are not rendered by default.

49 changes: 33 additions & 16 deletions dagster/docker-compose.yaml → docker-compose.yaml
Original file line number Diff line number Diff line change
@@ -1,17 +1,23 @@
version: "3.8"

services:
# This service runs the postgres DB used by dagster for run storage, schedule storage,
# and event log storage.
postgres:
env_file:
- .env
image: postgres:11
container_name: postgres
expose:
- "5432"
ports:
- "5432:5432"
environment:
POSTGRES_USER: ${DAGSTER_POSTGRES_USER}
POSTGRES_PASSWORD: ${DAGSTER_POSTGRES_PASSWORD}
POSTGRES_DB: ${DAGSTER_POSTGRES_DB}
volumes:
- postgres_data:/var/lib/postgresql/data
networks:
- network
- dagster_network

# This service runs the gRPC server that loads your user code, in both dagit
# and dagster-daemon. By setting DAGSTER_CURRENT_IMAGE to its own image, we tell the
Expand All @@ -20,9 +26,13 @@ services:
# its own port, and have its own entry in the workspace.yaml file that's loaded by dagit.
user_code:
env_file:
- .env.staging
- .env
container_name: user_code
image: code-server #mathewpugh/fons-code-server:latest
expose:
- "4000"
ports:
- "4000:4000"
image: sfdl/fons-code-server-org:latest #sfdl/fons-code-server-client:latest
restart: always
environment:
DAGSTER_POSTGRES_HOST: ${DAGSTER_POSTGRES_HOST}
Expand All @@ -31,15 +41,19 @@ services:
DAGSTER_POSTGRES_DB: ${DAGSTER_POSTGRES_DB}
DAGSTER_POSTGRES_PORT: ${DAGSTER_POSTGRES_PORT}
DAGSTER_CURRENT_IMAGE: "user_code_image"
INPUT_LOCATION: ${INPUT_LOCATION}
WORKSPACE_LOCATION: ${WORKSPACE_LOCATION}
SHARED_LOCATION: ${SHARED_LOCATION}
EXTERNAL_DATA_LOCATION: ${EXTERNAL_DATA_LOCATION}
networks:
- network
- dagster_network

# This service runs dagit, which loads your user code from the user code container.
# Since our instance uses the QueuedRunCoordinator, any runs submitted from dagit will be put on
# a queue and later dequeued and launched by dagster-daemon.
dagit:
env_file:
- .env.staging
- .env
build:
context: .
dockerfile: ./Dockerfile_dagster
Expand All @@ -65,17 +79,17 @@ services:
volumes: # Make docker client accessible so we can terminate containers from dagit
- /var/run/docker.sock:/var/run/docker.sock
- /tmp/io_manager_storage:/tmp/io_manager_storage
networks:
- network
depends_on:
- postgres
- user_code
networks:
- dagster_network

# This service runs the dagster-daemon process, which is responsible for taking runs
# off of the queue and launching them, as well as creating runs from schedules or sensors.
daemon:
env_file:
- .env.staging
- .env
build:
context: .
dockerfile: ./Dockerfile_dagster
Expand All @@ -91,18 +105,21 @@ services:
DAGSTER_POSTGRES_DB: ${DAGSTER_POSTGRES_DB}
DAGSTER_POSTGRES_PORT: ${DAGSTER_POSTGRES_PORT}
PYTHONLEGACYWINDOWSSTDIO: utf8.env
INCOMING_LOCATION: ${INCOMING_LOCATION}
OUTPUT_LOCATION: ${OUTPUT_LOCATION}
volumes: # Make docker client accessible so we can launch containers using host docker
- /var/run/docker.sock:/var/run/docker.sock
- /tmp/io_manager_storage:/tmp/io_manager_storage
networks:
- network
depends_on:
- postgres
- user_code
networks:
- dagster_network

volumes:
dwh:
postgres_data:
name: dagster_postgres_volume

networks:
network:
dagster_network:
driver: bridge
name: network
name: dagster_network

0 comments on commit 93f04d6

Please sign in to comment.