From 16c1a31391c19cdd571bc564869d0fbfcf1fc085 Mon Sep 17 00:00:00 2001 From: Ilya Burakov Date: Sat, 23 Sep 2023 15:55:48 +0000 Subject: [PATCH] Update and fix NITTA release GH Actions workflow --- .github/workflows/ml-with-releases.yml | 117 +++++++++++++----- ml/synthesis/Dockerfile | 18 ++- .../src/scripts/train_evaluate_in_ci.py | 22 ++-- 3 files changed, 113 insertions(+), 44 deletions(-) diff --git a/.github/workflows/ml-with-releases.yml b/.github/workflows/ml-with-releases.yml index 8462969b6..8de4d2f5c 100644 --- a/.github/workflows/ml-with-releases.yml +++ b/.github/workflows/ml-with-releases.yml @@ -1,48 +1,105 @@ name: ML Test & Train + GitHub Release -# Maybe this should be split to several workflows triggered by different events. + +# That's a very long and heavy workflow intended only to be run when a new NITTA release should be made. +# It's triggered by pushing a new version-like tag (v0.1.15, for example). +# Expected runtime: ~4-5 hours. + +# The project's Docker environment is used (it's heavy, 10+ GB, layers don't fit in the CI cache) +# Actions summary: +# - Build released NITTA binaries and web frontend +# - Crawl fresh ML training data (with the latest synthesis node parameters and tree structure) +# - Train a fresh ML model on the crawled data +# - Evaluate and rate the overall NITTA sythesis performance in different modes +# - Prepare release assets and generate a release description based on the git tag message and evaluation results +# - Publish the release to GitHub on: push: tags: - - "?.?.*" + - "v[0-9]+.[0-9]+.[0-9]+*" -# Filtering only to react to pushes to master branch may be needed here. +# Filtering only to react to pushes to the master branch may be needed here. jobs: ml-test-train-release: - runs-on: ubuntu-latest + # On ubuntu-latest, job fails with 143 exit code and github runner forceful termination. (?!) + # See https://github.com/actions/runner-images/discussions/7188. + runs-on: ubuntu-20.04 + + permissions: + # needed for runforesight/workflow-telemetry-action@v1 + actions: read + # pushing rm of manually trained model (?) + contents: write + # defaults + packages: read + steps: + - name: Maximize available disk space + # We don't have enough disk space without this. + # Based on https://github.com/easimon/maximize-build-space/blob/master/action.yml + # Not using the full action since a custom LVM breaks stuff for some reason. + # Just removing some unused tools is enough for now. + run: | + echo "Disk space before cleanup:" + df -h + + echo "Removing unwanted built-in stuff..." + sudo rm -rf /usr/share/dotnet + sudo rm -rf /usr/local/lib/android + sudo rm -rf /opt/ghc + sudo rm -rf /opt/hostedtoolcache/CodeQL + sudo docker image prune --all --force + + echo "Disk space after cleanup:" + df -h + + - name: Increase swap space + # The job can fail with out-of-memory errors during synthesis evaluation without this. + run: | + sudo swapoff -a + sudo fallocate -l 16G /swapfile + sudo chmod 600 /swapfile + sudo mkswap /swapfile + sudo swapon /swapfile + sudo swapon --show + echo "Swap space:" + free -h + - uses: actions/checkout@v3 - uses: fregante/setup-git-user@v1 - # To make NITTA Docker workflows faster^H^H^H^H^H^H^H succeed within finite time, this action: - # - saves docker build cache in its post run; - # - restores the cache if it exists in its pre run. - - uses: satackey/action-docker-layer-caching@v0.0.11 - # Ignore the failure of a step and avoid terminating the job. - continue-on-error: true + - name: Enable workflow telemetry collection + uses: runforesight/workflow-telemetry-action@v1 - name: Build NITTA binaries and dependencies run: | docker build --target build -f ml/synthesis/Dockerfile --tag nitta-build . docker create --name nitta-build-container nitta-build - mkdir -p build/nitta/web - docker cp nitta-build-container:/app/web/build build/nitta/web - docker cp nitta-build-container:/app/build/nitta/nitta build/nitta + mkdir -p build/nitta-dist/web + docker cp nitta-build-container:/app/web/build build/nitta-dist/web + docker cp nitta-build-container:/app/build/nitta/nitta build/nitta-dist docker rm -v nitta-build-container + - name: Log available disk space + run: | + df -h + # 11 GB of free space on /dev/root is enough at the time of writing + - name: Smoke test ML for synthesis run: | - docker build --target ml-script -f ml/synthesis/Dockerfile --tag nitta-ml-script:latest . - docker run nitta-ml-script -m pytest -c ml/synthesis/pytest.ini + docker build --target ci-ml -f ml/synthesis/Dockerfile --tag nitta-ci-ml:latest . + docker run nitta-ci-ml python -m pytest -c=ml/synthesis/pyproject.toml - name: Train and/or evaluate ML model run: | - docker run --name model-trainer nitta-ml-script ml/synthesis/src/scripts/train_evaluate_in_ci.py + docker run --name model-trainer nitta-ci-ml mkdir -p build/model-synthesis docker cp model-trainer:/app/ml/synthesis/models/production build/model-synthesis + docker cp model-trainer:/app/train_eval_description.txt build + docker cp model-trainer:/app/evaluation/ci/ build/evaluation - - name: Ensure model is deleted from repository + - name: Check the manual model is deleted if it was committed run: | git rm -r ml/synthesis/models/production --ignore-unmatch if [[ $(git status --short -uno) ]]; then @@ -59,23 +116,25 @@ jobs: NITTA_VERSION="$(git describe)" TAG_MESSAGE="$(git tag -l --format='%(contents)' ${GITHUB_REF#refs/*/})" - echo "Preparing release for NITTA v$NITTA_VERSION, contents:" - mkdir build/release - echo "$TAG_MESSAGE" | tee build/release/body.txt - printf "\n" | tee -a build/release/body.txt - cat build/model-synthesis/production/description.txt | tee -a build/release/body.txt + mkdir release_assets + cp build/evaluation/*.csv release_assets + + echo "Preparing release for NITTA $NITTA_VERSION, release body text:" + echo "$TAG_MESSAGE" | tee release_text.txt + printf "\n---\n" | tee -a release_text.txt + cat build/train_eval_description.txt | tee -a release_text.txt - cp -r examples build/nitta - # more to include? + cp -r examples build/nitta-dist + cp -r hdl build/nitta-dist + cp -r templates build/nitta-dist # FIXME: remove runtime NITTA dependency from ./web/src/services/gen/PORT, yarn dev's proxy can be used now - tar -czvf "build/release/nitta-$NITTA_VERSION-linux-amd64.tar.gz" -C build/nitta . - zip -j -r "build/release/model-synthesis-$NITTA_VERSION.zip" build/model-synthesis + tar -czvf "release_assets/nitta-$NITTA_VERSION-linux-amd64.tar.gz" -C build/nitta-dist . + zip -j -r "release_assets/model-synthesis.zip" build/model-synthesis - name: Release uses: softprops/action-gh-release@v1 with: - body_path: build/release/body.txt + body_path: release_text.txt files: | - build/release/*.tar.gz - build/release/*.zip + release_assets/* diff --git a/ml/synthesis/Dockerfile b/ml/synthesis/Dockerfile index 5d76524ab..fa8833991 100644 --- a/ml/synthesis/Dockerfile +++ b/ml/synthesis/Dockerfile @@ -4,11 +4,11 @@ # # The following build stages structure is chosen: # -# ubuntu:22.04 <-- dependencies <-- build <-- ml-script +# ubuntu:22.04 <-- dependencies <-- build <-- ci-ml # ^--- development <-- development-gpu # # - "build" builds just NITTA itself -# - "ml-script" adds and prepares ML-related stuff +# - "ci-ml" adds and prepares ML-related stuff # - "development" is a base stage for development containers # # During development it's useful to add a bind mount at /app to the repo root. @@ -372,9 +372,17 @@ WORKDIR /app # ----------------------------- -FROM build AS ml-script +FROM build AS ci-ml + +ENV PYTHONPATH=/app/ml/synthesis/src + +WORKDIR /app/ml/synthesis +RUN poetry install --no-root --compile +WORKDIR /app COPY --chown=devuser:devuser examples examples/ COPY --chown=devuser:devuser ml ml/ -ENV PYTHONPATH=/app/ml/synthesis/src -ENTRYPOINT ["python"] +COPY --chown=devuser:devuser hdl hdl/ +COPY --chown=devuser:devuser templates templates/ + +CMD ["python", "-m", "scripts.train_evaluate_in_ci"] diff --git a/ml/synthesis/src/scripts/train_evaluate_in_ci.py b/ml/synthesis/src/scripts/train_evaluate_in_ci.py index 0b8320649..5c253b8df 100644 --- a/ml/synthesis/src/scripts/train_evaluate_in_ci.py +++ b/ml/synthesis/src/scripts/train_evaluate_in_ci.py @@ -9,7 +9,7 @@ from components.data_crawling.data_crawling import crawl_data_from_many_examples from components.evaluation.rating import rate_evaluations from components.model_generation.training import train_and_save_model -from consts import EVALUATION_CONFIGS_DIR, EVALUATIONS_DIR, MODELS_DIR +from consts import EVALUATION_CONFIGS_DIR, EVALUATIONS_DIR, MODELS_DIR, ROOT_DIR from scripts.evaluate_nitta_synthesis import ( evaluate_nitta_synthesis, read_evaluation_config_from_json, @@ -57,7 +57,7 @@ def _parse_args() -> Namespace: rating_df = None if not args.skip_evaluation: - config = read_evaluation_config_from_json(EVALUATION_CONFIGS_DIR / "ci.json") + config = read_evaluation_config_from_json(EVALUATION_CONFIGS_DIR / "full.json") config.output_dir = EVALUATIONS_DIR / "ci" if config.output_dir.exists(): shutil.rmtree(config.output_dir) @@ -65,16 +65,18 @@ def _parse_args() -> Namespace: evaluate_nitta_synthesis(config) rating_df = rate_evaluations(list(config.output_dir.glob("evaluation_*.csv"))) - with (model_dir / "description.txt").open("w") as f: - f.write(f"{'Manually' if is_manual else 'Automatically'} trained model for synthesis \n\n") - f.write(f"Training MAE: {meta.train_mae:.3f}\n") - f.write(f"Validation MAE: {meta.validation_mae:.3f}\n") + with (ROOT_DIR / "train_eval_description.txt").open("w") as f: if rating_df is not None: - f.write("\n---\n") - # "tabulate" package is required for to_markdown() # preformat df for a decent appearance in markdown + # "tabulate" package is required for this rating_df = rating_df.reset_index().drop("evaluation", axis=1) table = rating_df.to_markdown(tablefmt="github", index=False) - f.write(f"Synthesis evaluation rating: \n{table}") + f.write(f"Synthesis evaluation rating: \n{table}\n\n") - logger.info(f"Done! Model saved to {model_dir}. Training history (+PNG chart), description and metainfo included.") + training_kind = "manually" if is_manual else "automatically" + f.write( + f"ML model for synthesis is trained {training_kind} " + + f"(train/val MAE: {meta.train_mae:.3f}/{meta.validation_mae:.3f}).\n", + ) + + logger.info(f"Done! Model saved to {model_dir}. Training history (+PNG chart) and metainfo included.")