ryukzak · ryukzak · Jan 8, 2024 · Dec 28, 2023 · Dec 28, 2023 · Dec 28, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -73,14 +73,6 @@ jobs:
           restore-keys: |
             ${{ runner.os }}-stack-global-
 
-      - uses: actions/cache@v3
-        name: Cache .stack-work
-        with:
-          path: .stack-work
-          key: ${{ runner.os }}-stack-work-${{ hashFiles('stack.yaml') }}-${{ hashFiles('package.yaml') }}-${{ hashFiles('**/*.hs') }}
-          restore-keys: |
-            ${{ runner.os }}-stack-work-
-
       - uses: actions/cache@v3
         name: Cache nitta exe
         with:
@@ -252,7 +244,7 @@ jobs:
       - name: Run Python tests
         working-directory: ./ml/synthesis
         env:
-          NITTA_RUN_COMMAND: nitta
+          NITTA_RUN_COMMAND_OVERRIDE: nitta
         run: |
           export PYTHONPATH=$(pwd)/src:$PYTHONPATH
 

diff --git a/Makefile b/Makefile
@@ -5,8 +5,12 @@ POETRYPATH = ml/synthesis
 PYTHONPATH = ml/synthesis/src
 POETRY = poetry -C $(POETRYPATH)
 PYTHON = PYTHONPATH=$(PYTHONPATH) $(POETRY) run python3
+
+ML_CRAWL_DATA_PATH = ml/synthesis/data
 ML_MODEL_PATH = ml/synthesis/models
-ML_MODEL = $(shell ls -t ml/synthesis/models | grep model | head -n 1)
+ML_MODEL = $(shell ls -t $(ML_MODEL_PATH) | grep model | head -n 1)
+
+PLATFORM := $(shell uname -s)
 
 .PHONY: all build run test format clean
 
@@ -73,10 +77,10 @@ ui-format-check:
 ############################################################
 
 ml-crawl-data:
-	$(PYTHON) $(PYTHONPATH)/scripts/crawl_data_by_tree_sampling_many.py
+	$(PYTHON) -m scripts.crawl_data_by_tree_sampling_many
 
 ml-train-model:
-	$(PYTHON) $(PYTHONPATH)/scripts/train_model.py
+	$(PYTHON) -m scripts.train_model
 
 ml-format:
 	$(POETRY) run black $(PYTHONPATH)
@@ -90,21 +94,60 @@ ml-lint:
 	cd $(POETRYPATH) && poetry run vulture
 
 ml-nitta:
-	echo 'Model:' $(ML_MODEL)
+	echo 'Model for Synthesis: ' $(ML_MODEL)
 	$(POETRY) shell
 	MODELS_DIR=$(ML_MODEL_PATH) PYTHONPATH=$(PYTHONPATH) stack exec nitta -- examples/teacup.lua -s ml_$(ML_MODEL) -p=8080
 
+ml-clean:
+	rm -rfv $(ML_CRAWL_DATA_PATH) $(ML_MODEL_PATH)
+
 ############################################################
 ## docker development image
 ############################################################
 
-docker-dev-build:
+
+docker-dev-build-for-linux-win:
+	docker build \
+		--target development \
+		-f ml/synthesis/Dockerfile \
+		--build-arg HOST_UID=$(id -u) \
+		--build-arg HOST_GID=$(id -g) \
+		-t nitta-dev \
+		.
+
+docker-dev-build-for-mac:
 	docker build \
 		--target development \
 		-f ml/synthesis/Dockerfile \
 		-t nitta-dev \
 		.
 
+docker-dev-build:
+	echo Platform: $(PLATFORM)
+ifeq ($(PLATFORM),Darwin)
+	make docker-dev-build-for-mac
+else
+	make docker-dev-build-for-linux-win
+endif
+
+
+docker-dev-build-with-gpu-for-linux-win:
+	docker build \
+		--target development-gpu \
+		-f ml/synthesis/Dockerfile \
+		--build-arg HOST_UID=$(id -u) \
+		--build-arg HOST_GID=$(id -g) \
+		-t nitta-dev \
+		.
+
+docker-dev-build-with-gpu:
+	echo Platform: $(PLATFORM)
+ifeq ($(PLATFORM),Darwin)
+	echo "GPU is not supported on Mac, use `docker-dev-build`"
+else
+	make docker-dev-build-with-gpu-for-linux-win
+endif
+
 docker-dev-run:
 	docker run \
 		--name=nitta-dev-container \
@@ -113,3 +156,13 @@ docker-dev-run:
 		-v="nitta-devuser-home:/home/devuser" \
 		-it \
 		nitta-dev
+
+docker-dev-run-with-gpu:
+	docker run \
+		--name=nitta-dev-container \
+		--gpus=all \
+		-p 31032:22 \
+		-v="$(PWD):/app" \
+		-v="nitta-devuser-home:/home/devuser" \
+		-it \
+		nitta-dev
diff --git a/README.md b/README.md
@@ -118,6 +118,7 @@ see [Makefile](Makefile) as a source of up-to-date command examples. Most genera
 - `PYTHONPATH`: Specifies the Python path for Poetry.
 - `POETRY`: Command to run Poetry within the specified configuration.
 - `PYTHON`: Command to run Python using Poetry.
+- `ML_CRAWL_DATA_PATH`: Path to store data sets for ML training.
 - `ML_MODEL_PATH`: Path to the machine learning model directory.
 - `ML_MODEL`: Identifies the latest machine learning model in the `ML_MODEL_PATH`.
 
@@ -149,6 +150,15 @@ see [Makefile](Makefile) as a source of up-to-date command examples. Most genera
 - `ml-lint`: Lints machine learning code using Ruff, MyPy, and Vulture.
 - `ml-nitta`: Runs the machine learning model with Nitta.
 
+### Docker image for development
+
+see details in: <./ml/synthesis/README.md>
+
+- `docker-dev-build`: make dev image for your platform (~15 GB).
+- `docker-dev-build-with-gpu`: make dev image for your platform with GPU support (~25 GB).
+- `docker-dev-run`: run dev image without GPU support.
+- `docker-dev-run-with-gpu`: run dev image with GPU support.
+
 ## CLI Hints
 
 ``` console

diff --git a/app/Main.hs b/app/Main.hs
@@ -23,7 +23,6 @@ import Data.ByteString.Lazy.Char8 qualified as BS
 import Data.Default (def)
 import Data.Maybe
 import Data.Proxy
-
 import Data.String.Utils qualified as S
 import Data.Text qualified as T
 import Data.Text.IO qualified as T

diff --git a/ml/synthesis/pyproject.toml b/ml/synthesis/pyproject.toml
@@ -19,7 +19,11 @@ uvicorn = { extras = ["standard"], version = "^0.23.2" }
 pandas = { version = "2.0.2", extras = ["performance"] }
 tabulate = "^0.9.0"
 
-tensorflow = {version = "^2.13.1" }
+# source: https://github.com/python-poetry/poetry/issues/8271#issuecomment-1696053565
+#
+# Don't forget to update TENSORFLOW_VER in ml/synthesis/Dockerfile!
+#
+tensorflow = "^2.13.1"
 tensorflow-macos = { version = "^2.13.1", platform = "darwin", markers = "platform_machine=='arm64'" }
 tensorflow-intel = { version = "^2.13.1", platform = "windows" }
 tensorflow-cpu = [
@@ -28,12 +32,6 @@ tensorflow-cpu = [
 ]
 tensorflow-cpu-aws = { version = "^2.13.1", platform = "linux", markers = "platform_machine=='arm64' or platform_machine=='aarch64'" }
 
-# tensorflow = "2.13.1"
-# tensorflow-cpu-aws = { version = "^2.13.1", platform = "linux" }
-# tensorflow-intel = { version = "^2.13.1", platform = "win32" }
-# tensorflow-macos = { platform = "darwin", version = "^2.13.1" }
-# tensorflow-metal = { platform = "darwin", version = "^1.1.0" }
-
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.4.0"
 pytest-asyncio = "^0.21.1"

diff --git a/ml/synthesis/src/components/data_crawling/nitta/nitta_running.py b/ml/synthesis/src/components/data_crawling/nitta/nitta_running.py
@@ -17,7 +17,7 @@
 from typing import AsyncGenerator
 
 from components.common.logging import get_logger
-from consts import ROOT_DIR
+from consts import NITTA_RUN_COMMAND_OVERRIDE, ROOT_DIR
 
 logger = get_logger(__name__)
 
@@ -144,8 +144,8 @@ async def run_nitta_server(
 ) -> AsyncGenerator[NittaRunResult, None]:
     port = 0  # NITTA will choose a random free port and print it to stdout, we'll parse
 
-    if new_nitta_cmd := os.environ.get("NITTA_RUN_COMMAND"):
-        logger.warn(f"Using NITTA_RUN_COMMAND env var: {new_nitta_cmd}")
+    if new_nitta_cmd := NITTA_RUN_COMMAND_OVERRIDE:
+        logger.warning(f"Using NITTA_RUN_COMMAND_OVERRIDE env var: {new_nitta_cmd}")
         nitta_run_command = new_nitta_cmd
 
     final_kwargs: dict = dict(

diff --git a/ml/synthesis/src/consts.py b/ml/synthesis/src/consts.py
@@ -49,3 +49,6 @@ def _find_root_dir():
 MODELS_DIR = Path(_models_dir_env) if _models_dir_env else ML_SYNTHESIS_DIR / "models"
 
 ML_BACKEND_BASE_URL_FILEPATH = ".ml_backend_base_url"
+
+# high priority env var which overrides command provided in the config file
+NITTA_RUN_COMMAND_OVERRIDE = os.environ.get("NITTA_RUN_COMMAND", None)
 env: 
   NITTA_RUN_COMMAND_OVERRIDE: nitta 
 async with run_nitta_server( 
     EXAMPLES_DIR / "fibonacci.lua", 
     nitta_args=f'--score="ml_{model_name}"  --method=NoSynthesis -e', 
     env={EnvVarNames.MODELS_DIR: tmp_models_dir.resolve()}, 
 ) as nitta: 
     ml_scores = await _get_scores(await nitta.get_base_url()) 
     assert non_ml_scores != ml_scores 
 env: 
   NITTA_RUN_COMMAND_OVERRIDE: nitta 
 async with run_nitta_server( 
     EXAMPLES_DIR / "fibonacci.lua", 
     nitta_args=f'--score="ml_{model_name}"  --method=NoSynthesis -e', 
     env={EnvVarNames.MODELS_DIR: tmp_models_dir.resolve()}, 
 ) as nitta: 
     ml_scores = await _get_scores(await nitta.get_base_url()) 
     assert non_ml_scores != ml_scores 
diff --git a/nitta.cabal b/nitta.cabal
@@ -1,6 +1,6 @@
 cabal-version: 1.18
 
--- This file has been generated from package.yaml by hpack version 0.35.1.
+-- This file has been generated from package.yaml by hpack version 0.36.0.
 --
 -- see: https://github.com/sol/hpack
 
@@ -90,7 +90,7 @@ library
       NITTA.Synthesis.Analysis
       NITTA.Synthesis.Explore
       NITTA.Synthesis.Method
-      NITTA.Synthesis.MlBackend.Api
+      NITTA.Synthesis.MlBackend.Client
       NITTA.Synthesis.MlBackend.FixedCache
       NITTA.Synthesis.MlBackend.ServerInstance
       NITTA.Synthesis.Steps

diff --git a/src/NITTA/Synthesis/Explore.hs b/src/NITTA/Synthesis/Explore.hs
@@ -37,7 +37,7 @@ import NITTA.Model.Problems.Bind
 import NITTA.Model.Problems.Dataflow
 import NITTA.Model.Problems.Refactor
 import NITTA.Model.TargetSystem
-import NITTA.Synthesis.MlBackend.Api
+import NITTA.Synthesis.MlBackend.Client
 import NITTA.Synthesis.MlBackend.ServerInstance
 import NITTA.Synthesis.Types
 import NITTA.UIBackend.Types

diff --git a/src/NITTA/Synthesis/Method.hs b/src/NITTA/Synthesis/Method.hs
@@ -216,26 +216,6 @@ topDownByScoreSynthesisIO' heap step depthCoeffBase limit scoreKey ctx currentNo
 
                             topDownByScoreSynthesisIO' (H.drop dropCount heapWithSubforest) (step + 1) depthCoeffBase limit scoreKey ctx nextBestScoreNode
 
--- FIXME: Validate the type above, its usages and meaning in the context of changes described below.
---
---      Ilya Burakov is not sure why signatures of synthesis method functions were explicitly defined
---      (not inferred) and why they are what they are, but introduction of JSON body formatting
---      for ML backend node scoring requests in NITTA.Synthesis.Explore module forced to add JSON-related
---      constraints to them.
---
---      Also, it has spilled to Default interface in NITTA.Synthesis. See usages of
---      SynthesisMethodConstraints for all related changes.
---
---      Effectvely, those constraints were added:
---          - ToJSONKey v, ToJSON v, ToJSON x, ToJSON t (via ValValTime -> ValValTimeJSON)
---          - ToJSON tag (explicitly)
---
---      Related chain of dependencies:
---      stateOfTheArtSynthesisIO -> bestThreadIO (or others) -> positiveSubForestIO -> subForestIO ->
---      predictScoresIO -> ScoringInput -> NodeView
---
---      Not sure if it's the right way to do it, but it works for now. Please, validate and fix if needed.
-
 -- * Helpers
 
 selectSubForestIO ::

diff --git a/src/NITTA/Synthesis/MlBackend/Api.hs → src/NITTA/Synthesis/MlBackend/Client.hs b/src/NITTA/Synthesis/MlBackend/Api.hs → src/NITTA/Synthesis/MlBackend/Client.hs
@@ -9,7 +9,7 @@ License     : BSD3
 Maintainer  : aleksandr.penskoi@gmail.com
 Stability   : experimental
 -}
-module NITTA.Synthesis.MlBackend.Api (
+module NITTA.Synthesis.MlBackend.Client (
     ScoringInput (..),
     ScoringTarget (..),
     predictScoresIO,

diff --git a/src/NITTA/Utils/Base.hs b/src/NITTA/Utils/Base.hs
@@ -26,6 +26,7 @@ import Data.Set (elems, unions)
 import Data.String
 import Data.String.ToString
 import Data.Text qualified as T
+import System.Log.Logger (warningM)
 
 unionsMap f lst = unions $ map f lst
 
@@ -45,4 +46,10 @@ showText v = T.pack $ show v
 
 vsToStringList vs = map toString $ elems vs
 
-catchToMaybeIO action = catch (action <&> Just) (\(_ :: IOException) -> return Nothing)
+catchToMaybeIO action =
+    catch
+        (action <&> Just)
+        ( \(e :: IOException) -> do
+            warningM "NITTA" ("IO Exception: " <> show e)
+            return Nothing
+        )