Update and fix NITTA release GH Actions workflow

ryukzak · Sep 23, 2023 · 16c1a31 · 16c1a31
1 parent e6d4dd6
commit 16c1a31
Show file tree

Hide file tree

Showing 3 changed files with 113 additions and 44 deletions.
diff --git a/.github/workflows/ml-with-releases.yml b/.github/workflows/ml-with-releases.yml
@@ -1,48 +1,105 @@
 name: ML Test & Train + GitHub Release
-# Maybe this should be split to several workflows triggered by different events.
+
+# That's a very long and heavy workflow intended only to be run when a new NITTA release should be made.
+# It's triggered by pushing a new version-like tag (v0.1.15, for example).
+# Expected runtime: ~4-5 hours.
+
+# The project's Docker environment is used (it's heavy, 10+ GB, layers don't fit in the CI cache)
+# Actions summary:
+# - Build released NITTA binaries and web frontend
+# - Crawl fresh ML training data (with the latest synthesis node parameters and tree structure)
+# - Train a fresh ML model on the crawled data
+# - Evaluate and rate the overall NITTA sythesis performance in different modes
+# - Prepare release assets and generate a release description based on the git tag message and evaluation results
+# - Publish the release to GitHub
 
 on:
   push:
     tags:
-      - "?.?.*"
+      - "v[0-9]+.[0-9]+.[0-9]+*"
 
-# Filtering only to react to pushes to master branch may be needed here.
+# Filtering only to react to pushes to the master branch may be needed here.
 
 jobs:
   ml-test-train-release:
-    runs-on: ubuntu-latest
+    # On ubuntu-latest, job fails with 143 exit code and github runner forceful termination. (?!)
+    # See https://github.com/actions/runner-images/discussions/7188.
+    runs-on: ubuntu-20.04
+
+    permissions:
+      # needed for runforesight/workflow-telemetry-action@v1
+      actions: read
+      # pushing rm of manually trained model (?)
+      contents: write
+      # defaults
+      packages: read
+
     steps:
+      - name: Maximize available disk space
+        # We don't have enough disk space without this.
+        # Based on https://github.com/easimon/maximize-build-space/blob/master/action.yml
+        # Not using the full action since a custom LVM breaks stuff for some reason.
+        # Just removing some unused tools is enough for now.
+        run: |
+          echo "Disk space before cleanup:"
+          df -h
+
+          echo "Removing unwanted built-in stuff..."
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /usr/local/lib/android
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /opt/hostedtoolcache/CodeQL
+          sudo docker image prune --all --force
+
+          echo "Disk space after cleanup:"
+          df -h
+
+      - name: Increase swap space
+        # The job can fail with out-of-memory errors during synthesis evaluation without this.
+        run: |
+          sudo swapoff -a
+          sudo fallocate -l 16G /swapfile
+          sudo chmod 600 /swapfile
+          sudo mkswap /swapfile
+          sudo swapon /swapfile
+          sudo swapon --show
+          echo "Swap space:"
+          free -h
+
       - uses: actions/checkout@v3
       - uses: fregante/setup-git-user@v1
 
-      # To make NITTA Docker workflows faster^H^H^H^H^H^H^H succeed within finite time, this action:
-      #   - saves docker build cache in its post run;
-      #   - restores the cache if it exists in its pre run.
-      - uses: satackey/action-docker-layer-caching@v0.0.11
-        # Ignore the failure of a step and avoid terminating the job.
-        continue-on-error: true
+      - name: Enable workflow telemetry collection
+        uses: runforesight/workflow-telemetry-action@v1
 
       - name: Build NITTA binaries and dependencies
         run: |
           docker build --target build -f ml/synthesis/Dockerfile --tag nitta-build .
           docker create --name nitta-build-container nitta-build
-          mkdir -p build/nitta/web
-          docker cp nitta-build-container:/app/web/build build/nitta/web
-          docker cp nitta-build-container:/app/build/nitta/nitta build/nitta
+          mkdir -p build/nitta-dist/web
+          docker cp nitta-build-container:/app/web/build build/nitta-dist/web
+          docker cp nitta-build-container:/app/build/nitta/nitta build/nitta-dist
           docker rm -v nitta-build-container
 
+      - name: Log available disk space
+        run: |
+          df -h
+          # 11 GB of free space on /dev/root is enough at the time of writing
+
       - name: Smoke test ML for synthesis
         run: |
-          docker build --target ml-script -f ml/synthesis/Dockerfile --tag nitta-ml-script:latest .
-          docker run nitta-ml-script -m pytest -c ml/synthesis/pytest.ini
+          docker build --target ci-ml -f ml/synthesis/Dockerfile --tag nitta-ci-ml:latest .
+          docker run nitta-ci-ml python -m pytest -c=ml/synthesis/pyproject.toml
 
       - name: Train and/or evaluate ML model
         run: |
-          docker run --name model-trainer nitta-ml-script ml/synthesis/src/scripts/train_evaluate_in_ci.py
+          docker run --name model-trainer nitta-ci-ml
           mkdir -p build/model-synthesis
           docker cp model-trainer:/app/ml/synthesis/models/production build/model-synthesis
+          docker cp model-trainer:/app/train_eval_description.txt build
+          docker cp model-trainer:/app/evaluation/ci/ build/evaluation
 
-      - name: Ensure model is deleted from repository
+      - name: Check the manual model is deleted if it was committed
         run: |
           git rm -r ml/synthesis/models/production --ignore-unmatch
           if [[ $(git status --short -uno) ]]; then
@@ -59,23 +116,25 @@ jobs:
           NITTA_VERSION="$(git describe)"
           TAG_MESSAGE="$(git tag -l --format='%(contents)' ${GITHUB_REF#refs/*/})"
 
-          echo "Preparing release for NITTA v$NITTA_VERSION, contents:"
-          mkdir build/release
-          echo "$TAG_MESSAGE" | tee build/release/body.txt
-          printf "\n" | tee -a build/release/body.txt
-          cat build/model-synthesis/production/description.txt | tee -a build/release/body.txt
+          mkdir release_assets
+          cp build/evaluation/*.csv release_assets
+
+          echo "Preparing release for NITTA $NITTA_VERSION, release body text:"
+          echo "$TAG_MESSAGE" | tee release_text.txt
+          printf "\n---\n" | tee -a release_text.txt
+          cat build/train_eval_description.txt | tee -a release_text.txt
 
-          cp -r examples build/nitta
-          # more to include?
+          cp -r examples build/nitta-dist
+          cp -r hdl build/nitta-dist
+          cp -r templates build/nitta-dist
           # FIXME: remove runtime NITTA dependency from ./web/src/services/gen/PORT, yarn dev's proxy can be used now
 
-          tar -czvf "build/release/nitta-$NITTA_VERSION-linux-amd64.tar.gz" -C build/nitta .
-          zip -j -r "build/release/model-synthesis-$NITTA_VERSION.zip" build/model-synthesis
+          tar -czvf "release_assets/nitta-$NITTA_VERSION-linux-amd64.tar.gz" -C build/nitta-dist .
+          zip -j -r "release_assets/model-synthesis.zip" build/model-synthesis
 
       - name: Release
         uses: softprops/action-gh-release@v1
         with:
-          body_path: build/release/body.txt
+          body_path: release_text.txt
           files: |
-            build/release/*.tar.gz
-            build/release/*.zip
+            release_assets/*
diff --git a/ml/synthesis/Dockerfile b/ml/synthesis/Dockerfile
@@ -4,11 +4,11 @@
 # 
 # The following build stages structure is chosen:
 #
-#     ubuntu:22.04 <-- dependencies <-- build <-- ml-script
+#     ubuntu:22.04 <-- dependencies <-- build <-- ci-ml
 #                           ^--- development <-- development-gpu
 #
 #     - "build" builds just NITTA itself
-#     - "ml-script" adds and prepares ML-related stuff
+#     - "ci-ml" adds and prepares ML-related stuff
 #     - "development" is a base stage for development containers
 #
 # During development it's useful to add a bind mount at /app to the repo root. 
@@ -372,9 +372,17 @@ WORKDIR /app
 # -----------------------------
 
 
-FROM build AS ml-script
+FROM build AS ci-ml
+
+ENV PYTHONPATH=/app/ml/synthesis/src
+
+WORKDIR /app/ml/synthesis
+RUN poetry install --no-root --compile
+WORKDIR /app
 
 COPY --chown=devuser:devuser examples examples/
 COPY --chown=devuser:devuser ml ml/
-ENV PYTHONPATH=/app/ml/synthesis/src
-ENTRYPOINT ["python"]
+COPY --chown=devuser:devuser hdl hdl/
+COPY --chown=devuser:devuser templates templates/
+
+CMD ["python", "-m", "scripts.train_evaluate_in_ci"]
diff --git a/ml/synthesis/src/scripts/train_evaluate_in_ci.py b/ml/synthesis/src/scripts/train_evaluate_in_ci.py
@@ -9,7 +9,7 @@
 from components.data_crawling.data_crawling import crawl_data_from_many_examples
 from components.evaluation.rating import rate_evaluations
 from components.model_generation.training import train_and_save_model
-from consts import EVALUATION_CONFIGS_DIR, EVALUATIONS_DIR, MODELS_DIR
+from consts import EVALUATION_CONFIGS_DIR, EVALUATIONS_DIR, MODELS_DIR, ROOT_DIR
 from scripts.evaluate_nitta_synthesis import (
     evaluate_nitta_synthesis,
     read_evaluation_config_from_json,
@@ -57,24 +57,26 @@ def _parse_args() -> Namespace:
 
     rating_df = None
     if not args.skip_evaluation:
-        config = read_evaluation_config_from_json(EVALUATION_CONFIGS_DIR / "ci.json")
+        config = read_evaluation_config_from_json(EVALUATION_CONFIGS_DIR / "full.json")
         config.output_dir = EVALUATIONS_DIR / "ci"
         if config.output_dir.exists():
             shutil.rmtree(config.output_dir)
 
         evaluate_nitta_synthesis(config)
         rating_df = rate_evaluations(list(config.output_dir.glob("evaluation_*.csv")))
 
-    with (model_dir / "description.txt").open("w") as f:
-        f.write(f"{'Manually' if is_manual else 'Automatically'} trained model for synthesis \n\n")
-        f.write(f"Training MAE: {meta.train_mae:.3f}\n")
-        f.write(f"Validation MAE: {meta.validation_mae:.3f}\n")
+    with (ROOT_DIR / "train_eval_description.txt").open("w") as f:
         if rating_df is not None:
-            f.write("\n---\n")
-            # "tabulate" package is required for to_markdown()
             # preformat df for a decent appearance in markdown
+            # "tabulate" package is required for this
             rating_df = rating_df.reset_index().drop("evaluation", axis=1)
             table = rating_df.to_markdown(tablefmt="github", index=False)
-            f.write(f"Synthesis evaluation rating: \n{table}")
+            f.write(f"Synthesis evaluation rating: \n{table}\n\n")
 
-    logger.info(f"Done! Model saved to {model_dir}. Training history (+PNG chart), description and metainfo included.")
+        training_kind = "manually" if is_manual else "automatically"
+        f.write(
+            f"ML model for synthesis is trained {training_kind} "
+            + f"(train/val MAE: {meta.train_mae:.3f}/{meta.validation_mae:.3f}).\n",
+        )
+
+    logger.info(f"Done! Model saved to {model_dir}. Training history (+PNG chart) and metainfo included.")