Skip to content

Commit

Permalink
Update and fix NITTA release GH Actions workflow
Browse files Browse the repository at this point in the history
  • Loading branch information
iburakov committed Sep 23, 2023
1 parent e6d4dd6 commit 16c1a31
Show file tree
Hide file tree
Showing 3 changed files with 113 additions and 44 deletions.
117 changes: 88 additions & 29 deletions .github/workflows/ml-with-releases.yml
Original file line number Diff line number Diff line change
@@ -1,48 +1,105 @@
name: ML Test & Train + GitHub Release
# Maybe this should be split to several workflows triggered by different events.

# That's a very long and heavy workflow intended only to be run when a new NITTA release should be made.
# It's triggered by pushing a new version-like tag (v0.1.15, for example).
# Expected runtime: ~4-5 hours.

# The project's Docker environment is used (it's heavy, 10+ GB, layers don't fit in the CI cache)
# Actions summary:
# - Build released NITTA binaries and web frontend
# - Crawl fresh ML training data (with the latest synthesis node parameters and tree structure)
# - Train a fresh ML model on the crawled data
# - Evaluate and rate the overall NITTA sythesis performance in different modes
# - Prepare release assets and generate a release description based on the git tag message and evaluation results
# - Publish the release to GitHub

on:
push:
tags:
- "?.?.*"
- "v[0-9]+.[0-9]+.[0-9]+*"

# Filtering only to react to pushes to master branch may be needed here.
# Filtering only to react to pushes to the master branch may be needed here.

jobs:
ml-test-train-release:
runs-on: ubuntu-latest
# On ubuntu-latest, job fails with 143 exit code and github runner forceful termination. (?!)
# See https://github.com/actions/runner-images/discussions/7188.
runs-on: ubuntu-20.04

permissions:
# needed for runforesight/workflow-telemetry-action@v1
actions: read
# pushing rm of manually trained model (?)
contents: write
# defaults
packages: read

steps:
- name: Maximize available disk space
# We don't have enough disk space without this.
# Based on https://github.com/easimon/maximize-build-space/blob/master/action.yml
# Not using the full action since a custom LVM breaks stuff for some reason.
# Just removing some unused tools is enough for now.
run: |
echo "Disk space before cleanup:"
df -h
echo "Removing unwanted built-in stuff..."
sudo rm -rf /usr/share/dotnet
sudo rm -rf /usr/local/lib/android
sudo rm -rf /opt/ghc
sudo rm -rf /opt/hostedtoolcache/CodeQL
sudo docker image prune --all --force
echo "Disk space after cleanup:"
df -h
- name: Increase swap space
# The job can fail with out-of-memory errors during synthesis evaluation without this.
run: |
sudo swapoff -a
sudo fallocate -l 16G /swapfile
sudo chmod 600 /swapfile
sudo mkswap /swapfile
sudo swapon /swapfile
sudo swapon --show
echo "Swap space:"
free -h
- uses: actions/checkout@v3
- uses: fregante/setup-git-user@v1

# To make NITTA Docker workflows faster^H^H^H^H^H^H^H succeed within finite time, this action:
# - saves docker build cache in its post run;
# - restores the cache if it exists in its pre run.
- uses: satackey/action-docker-layer-caching@v0.0.11
# Ignore the failure of a step and avoid terminating the job.
continue-on-error: true
- name: Enable workflow telemetry collection
uses: runforesight/workflow-telemetry-action@v1

- name: Build NITTA binaries and dependencies
run: |
docker build --target build -f ml/synthesis/Dockerfile --tag nitta-build .
docker create --name nitta-build-container nitta-build
mkdir -p build/nitta/web
docker cp nitta-build-container:/app/web/build build/nitta/web
docker cp nitta-build-container:/app/build/nitta/nitta build/nitta
mkdir -p build/nitta-dist/web
docker cp nitta-build-container:/app/web/build build/nitta-dist/web
docker cp nitta-build-container:/app/build/nitta/nitta build/nitta-dist
docker rm -v nitta-build-container
- name: Log available disk space
run: |
df -h
# 11 GB of free space on /dev/root is enough at the time of writing
- name: Smoke test ML for synthesis
run: |
docker build --target ml-script -f ml/synthesis/Dockerfile --tag nitta-ml-script:latest .
docker run nitta-ml-script -m pytest -c ml/synthesis/pytest.ini
docker build --target ci-ml -f ml/synthesis/Dockerfile --tag nitta-ci-ml:latest .
docker run nitta-ci-ml python -m pytest -c=ml/synthesis/pyproject.toml
- name: Train and/or evaluate ML model
run: |
docker run --name model-trainer nitta-ml-script ml/synthesis/src/scripts/train_evaluate_in_ci.py
docker run --name model-trainer nitta-ci-ml
mkdir -p build/model-synthesis
docker cp model-trainer:/app/ml/synthesis/models/production build/model-synthesis
docker cp model-trainer:/app/train_eval_description.txt build
docker cp model-trainer:/app/evaluation/ci/ build/evaluation
- name: Ensure model is deleted from repository
- name: Check the manual model is deleted if it was committed
run: |
git rm -r ml/synthesis/models/production --ignore-unmatch
if [[ $(git status --short -uno) ]]; then
Expand All @@ -59,23 +116,25 @@ jobs:
NITTA_VERSION="$(git describe)"
TAG_MESSAGE="$(git tag -l --format='%(contents)' ${GITHUB_REF#refs/*/})"
echo "Preparing release for NITTA v$NITTA_VERSION, contents:"
mkdir build/release
echo "$TAG_MESSAGE" | tee build/release/body.txt
printf "\n" | tee -a build/release/body.txt
cat build/model-synthesis/production/description.txt | tee -a build/release/body.txt
mkdir release_assets
cp build/evaluation/*.csv release_assets
echo "Preparing release for NITTA $NITTA_VERSION, release body text:"
echo "$TAG_MESSAGE" | tee release_text.txt
printf "\n---\n" | tee -a release_text.txt
cat build/train_eval_description.txt | tee -a release_text.txt
cp -r examples build/nitta
# more to include?
cp -r examples build/nitta-dist
cp -r hdl build/nitta-dist
cp -r templates build/nitta-dist
# FIXME: remove runtime NITTA dependency from ./web/src/services/gen/PORT, yarn dev's proxy can be used now
tar -czvf "build/release/nitta-$NITTA_VERSION-linux-amd64.tar.gz" -C build/nitta .
zip -j -r "build/release/model-synthesis-$NITTA_VERSION.zip" build/model-synthesis
tar -czvf "release_assets/nitta-$NITTA_VERSION-linux-amd64.tar.gz" -C build/nitta-dist .
zip -j -r "release_assets/model-synthesis.zip" build/model-synthesis
- name: Release
uses: softprops/action-gh-release@v1
with:
body_path: build/release/body.txt
body_path: release_text.txt
files: |
build/release/*.tar.gz
build/release/*.zip
release_assets/*
18 changes: 13 additions & 5 deletions ml/synthesis/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@
#
# The following build stages structure is chosen:
#
# ubuntu:22.04 <-- dependencies <-- build <-- ml-script
# ubuntu:22.04 <-- dependencies <-- build <-- ci-ml
# ^--- development <-- development-gpu
#
# - "build" builds just NITTA itself
# - "ml-script" adds and prepares ML-related stuff
# - "ci-ml" adds and prepares ML-related stuff
# - "development" is a base stage for development containers
#
# During development it's useful to add a bind mount at /app to the repo root.
Expand Down Expand Up @@ -372,9 +372,17 @@ WORKDIR /app
# -----------------------------


FROM build AS ml-script
FROM build AS ci-ml

ENV PYTHONPATH=/app/ml/synthesis/src

WORKDIR /app/ml/synthesis
RUN poetry install --no-root --compile
WORKDIR /app

COPY --chown=devuser:devuser examples examples/
COPY --chown=devuser:devuser ml ml/
ENV PYTHONPATH=/app/ml/synthesis/src
ENTRYPOINT ["python"]
COPY --chown=devuser:devuser hdl hdl/
COPY --chown=devuser:devuser templates templates/

CMD ["python", "-m", "scripts.train_evaluate_in_ci"]
22 changes: 12 additions & 10 deletions ml/synthesis/src/scripts/train_evaluate_in_ci.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from components.data_crawling.data_crawling import crawl_data_from_many_examples
from components.evaluation.rating import rate_evaluations
from components.model_generation.training import train_and_save_model
from consts import EVALUATION_CONFIGS_DIR, EVALUATIONS_DIR, MODELS_DIR
from consts import EVALUATION_CONFIGS_DIR, EVALUATIONS_DIR, MODELS_DIR, ROOT_DIR
from scripts.evaluate_nitta_synthesis import (
evaluate_nitta_synthesis,
read_evaluation_config_from_json,
Expand Down Expand Up @@ -57,24 +57,26 @@ def _parse_args() -> Namespace:

rating_df = None
if not args.skip_evaluation:
config = read_evaluation_config_from_json(EVALUATION_CONFIGS_DIR / "ci.json")
config = read_evaluation_config_from_json(EVALUATION_CONFIGS_DIR / "full.json")
config.output_dir = EVALUATIONS_DIR / "ci"
if config.output_dir.exists():
shutil.rmtree(config.output_dir)

evaluate_nitta_synthesis(config)
rating_df = rate_evaluations(list(config.output_dir.glob("evaluation_*.csv")))

with (model_dir / "description.txt").open("w") as f:
f.write(f"{'Manually' if is_manual else 'Automatically'} trained model for synthesis \n\n")
f.write(f"Training MAE: {meta.train_mae:.3f}\n")
f.write(f"Validation MAE: {meta.validation_mae:.3f}\n")
with (ROOT_DIR / "train_eval_description.txt").open("w") as f:
if rating_df is not None:
f.write("\n---\n")
# "tabulate" package is required for to_markdown()
# preformat df for a decent appearance in markdown
# "tabulate" package is required for this
rating_df = rating_df.reset_index().drop("evaluation", axis=1)
table = rating_df.to_markdown(tablefmt="github", index=False)
f.write(f"Synthesis evaluation rating: \n{table}")
f.write(f"Synthesis evaluation rating: \n{table}\n\n")

logger.info(f"Done! Model saved to {model_dir}. Training history (+PNG chart), description and metainfo included.")
training_kind = "manually" if is_manual else "automatically"
f.write(
f"ML model for synthesis is trained {training_kind} "
+ f"(train/val MAE: {meta.train_mae:.3f}/{meta.validation_mae:.3f}).\n",
)

logger.info(f"Done! Model saved to {model_dir}. Training history (+PNG chart) and metainfo included.")

0 comments on commit 16c1a31

Please sign in to comment.