From 7ba8f351862e00a72dd015fa66d4490e7d36a5fc Mon Sep 17 00:00:00 2001 From: Kevin Su Date: Wed, 24 Apr 2024 11:14:13 -0700 Subject: [PATCH] Add .gitignore and .dockerignore behavior to ImageSpec (#2369) * Add .gitignore and .dockerignore behavior to ImageSpec Signed-off-by: Kevin Su * Add a test Signed-off-by: Kevin Su * Fix CI Signed-off-by: Kevin Su * fix test Signed-off-by: Kevin Su * fix tests Signed-off-by: Kevin Su * fix test Signed-off-by: Kevin Su * nit Signed-off-by: Kevin Su * test Signed-off-by: Kevin Su * nit Signed-off-by: Kevin Su --------- Signed-off-by: Kevin Su --- flytekit/image_spec/image_spec.py | 10 +++++++++- flytekit/remote/executions.py | 2 +- flytekit/tools/fast_registration.py | 2 +- flytekit/tools/ignore.py | 5 +++-- .../flytekitplugins/envd/image_builder.py | 13 ++++++++++--- plugins/flytekit-envd/tests/.dockerignore | 1 + plugins/flytekit-envd/tests/test_image_spec.py | 3 +++ tests/flytekit/unit/cli/pyflyte/test_run.py | 16 +++++++++++----- .../unit/tools/test_fast_registration.py | 2 ++ tests/flytekit/unit/tools/test_ignore.py | 4 ++-- 10 files changed, 43 insertions(+), 15 deletions(-) create mode 100644 plugins/flytekit-envd/tests/.dockerignore diff --git a/flytekit/image_spec/image_spec.py b/flytekit/image_spec/image_spec.py index c7c9235a4e..027b5e0b47 100644 --- a/flytekit/image_spec/image_spec.py +++ b/flytekit/image_spec/image_spec.py @@ -275,7 +275,15 @@ def calculate_hash_from_image_spec(image_spec: ImageSpec): spec = copy.deepcopy(image_spec) if isinstance(spec.base_image, ImageSpec): spec.base_image = spec.base_image.image_name() - spec.source_root = hash_directory(image_spec.source_root) if image_spec.source_root else b"" + + if image_spec.source_root: + from flytekit.tools.fast_registration import compute_digest + from flytekit.tools.ignore import DockerIgnore, GitIgnore, IgnoreGroup, StandardIgnore + + ignore = IgnoreGroup(image_spec.source_root, [GitIgnore, DockerIgnore, StandardIgnore]) + digest = compute_digest(image_spec.source_root, ignore.is_ignored) + spec.source_root = digest + if spec.requirements: spec.requirements = hashlib.sha1(pathlib.Path(spec.requirements).read_bytes()).hexdigest() # won't rebuild the image if we change the registry_config path diff --git a/flytekit/remote/executions.py b/flytekit/remote/executions.py index 292b6f0218..bd5e182952 100644 --- a/flytekit/remote/executions.py +++ b/flytekit/remote/executions.py @@ -103,7 +103,7 @@ def flyte_workflow(self) -> Optional[FlyteWorkflow]: return self._flyte_workflow @property - def node_executions(self) -> Dict[str, "FlyteNodeExecution"]: + def node_executions(self) -> Dict[str, FlyteNodeExecution]: """Get a dictionary of node executions that are a part of this workflow execution.""" return self._node_executions or {} diff --git a/flytekit/tools/fast_registration.py b/flytekit/tools/fast_registration.py index 71ff3968f6..e596e62f38 100644 --- a/flytekit/tools/fast_registration.py +++ b/flytekit/tools/fast_registration.py @@ -61,7 +61,7 @@ def compute_digest(source: os.PathLike, filter: Optional[callable] = None) -> st """ Walks the entirety of the source dir to compute a deterministic md5 hex digest of the dir contents. :param os.PathLike source: - :param Ignore ignore: + :param callable filter: :return Text: """ hasher = hashlib.md5() diff --git a/flytekit/tools/ignore.py b/flytekit/tools/ignore.py index 4760fa2d48..4a427c2734 100644 --- a/flytekit/tools/ignore.py +++ b/flytekit/tools/ignore.py @@ -11,7 +11,7 @@ from flytekit.loggers import logger -STANDARD_IGNORE_PATTERNS = ["*.pyc", ".cache", ".cache/*", "__pycache__", "**/__pycache__"] +STANDARD_IGNORE_PATTERNS = ["*.pyc", ".cache", ".cache/*", "__pycache__/*", "**/__pycache__/*"] class Ignore(ABC): @@ -79,7 +79,8 @@ def _parse(self) -> PatternMatcher: if os.path.isfile(dockerignore): with open(dockerignore, "r") as f: patterns = [l.strip() for l in f.readlines() if l and not l.startswith("#")] - logger.info(f"No .dockerignore found in {self.root}, not applying any filters") + else: + logger.info(f"No .dockerignore found in {self.root}, not applying any filters") return PatternMatcher(patterns) def _is_ignored(self, path: str) -> bool: diff --git a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py index 2409f0a25d..f72ddde2a7 100644 --- a/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py +++ b/plugins/flytekit-envd/flytekitplugins/envd/image_builder.py @@ -11,6 +11,7 @@ from flytekit.core import context_manager from flytekit.core.constants import REQUIREMENTS_FILE_NAME from flytekit.image_spec.image_spec import _F_IMG_ID, ImageBuildEngine, ImageSpec, ImageSpecBuilder +from flytekit.tools.ignore import DockerIgnore, GitIgnore, IgnoreGroup, StandardIgnore FLYTE_LOCAL_REGISTRY = "localhost:30000" @@ -131,14 +132,20 @@ def build(): envd_config += f' install.cuda(version="{image_spec.cuda}", cudnn="{cudnn}")\n' if image_spec.source_root: - shutil.copytree(image_spec.source_root, pathlib.Path(cfg_path).parent, dirs_exist_ok=True) + ignore = IgnoreGroup(image_spec.source_root, [GitIgnore, DockerIgnore, StandardIgnore]) + shutil.copytree( + src=image_spec.source_root, + dst=pathlib.Path(cfg_path).parent, + ignore=shutil.ignore_patterns(*ignore.list_ignored()), + dirs_exist_ok=True, + ) envd_version = metadata.version("envd") # Indentation is required by envd if Version(envd_version) <= Version("0.3.37"): - envd_config += ' io.copy(host_path="./", envd_path="/root")' + envd_config += ' io.copy(host_path="./", envd_path="/root")\n' else: - envd_config += ' io.copy(source="./", target="/root")' + envd_config += ' io.copy(source="./", target="/root")\n' with open(cfg_path, "w+") as f: f.write(envd_config) diff --git a/plugins/flytekit-envd/tests/.dockerignore b/plugins/flytekit-envd/tests/.dockerignore new file mode 100644 index 0000000000..b43bf86b50 --- /dev/null +++ b/plugins/flytekit-envd/tests/.dockerignore @@ -0,0 +1 @@ +README.md diff --git a/plugins/flytekit-envd/tests/test_image_spec.py b/plugins/flytekit-envd/tests/test_image_spec.py index f7c8e3f370..7fd3cd1be0 100644 --- a/plugins/flytekit-envd/tests/test_image_spec.py +++ b/plugins/flytekit-envd/tests/test_image_spec.py @@ -1,3 +1,4 @@ +import os from pathlib import Path from textwrap import dedent @@ -37,6 +38,7 @@ def test_image_spec(): python_version="3.8", base_image=base_image, pip_index="https://private-pip-index/simple", + source_root=os.path.dirname(os.path.realpath(__file__)), ) image_spec = image_spec.with_commands("echo hello") @@ -58,6 +60,7 @@ def build(): runtime.environ(env={{'PYTHONPATH': '/root', '_F_IMG_ID': '{image_name}'}}, extra_path=['/root']) config.pip_index(url="https://private-pip-index/simple") install.python(version="3.8") + io.copy(source="./", target="/root") """ ) diff --git a/tests/flytekit/unit/cli/pyflyte/test_run.py b/tests/flytekit/unit/cli/pyflyte/test_run.py index 59fca87f06..28bee1dea7 100644 --- a/tests/flytekit/unit/cli/pyflyte/test_run.py +++ b/tests/flytekit/unit/cli/pyflyte/test_run.py @@ -7,13 +7,14 @@ import mock import pytest +import yaml from click.testing import CliRunner from flytekit.clis.sdk_in_container import pyflyte from flytekit.clis.sdk_in_container.run import RunLevelParams, get_entities_in_file, run_command from flytekit.configuration import Config, Image, ImageConfig from flytekit.core.task import task -from flytekit.image_spec.image_spec import ImageBuildEngine +from flytekit.image_spec.image_spec import ImageBuildEngine, ImageSpec, calculate_hash_from_image_spec from flytekit.interaction.click_types import DirParamType, FileParamType from flytekit.remote import FlyteRemote @@ -347,10 +348,17 @@ def test_list_default_arguments(wf_path): ], ) +IMAGE_SPEC = os.path.join(os.path.dirname(os.path.realpath(__file__)), "imageSpec.yaml") + +with open(IMAGE_SPEC, "r") as f: + image_spec_dict = yaml.safe_load(f) + image_spec = ImageSpec(**image_spec_dict) + tag = calculate_hash_from_image_spec(image_spec) + ic_result_4 = ImageConfig( - default_image=Image(name="default", fqn="flytekit", tag="urw7fglw5pBrIQ9JTW1vQA"), + default_image=Image(name="default", fqn="flytekit", tag=tag), images=[ - Image(name="default", fqn="flytekit", tag="urw7fglw5pBrIQ9JTW1vQA"), + Image(name="default", fqn="flytekit", tag=tag), Image(name="xyz", fqn="docker.io/xyz", tag="latest"), Image(name="abc", fqn="docker.io/abc", tag=None), Image( @@ -361,8 +369,6 @@ def test_list_default_arguments(wf_path): ], ) -IMAGE_SPEC = os.path.join(os.path.dirname(os.path.realpath(__file__)), "imageSpec.yaml") - @mock.patch("flytekit.configuration.default_images.DefaultImages.default_image") @pytest.mark.parametrize( diff --git a/tests/flytekit/unit/tools/test_fast_registration.py b/tests/flytekit/unit/tools/test_fast_registration.py index 8d1c565a9d..dd68e22aa8 100644 --- a/tests/flytekit/unit/tools/test_fast_registration.py +++ b/tests/flytekit/unit/tools/test_fast_registration.py @@ -53,6 +53,7 @@ def test_package(flyte_project, tmp_path): "src", "src/util", "src/workflows", + "src/workflows/__pycache__", "src/workflows/hello_world.py", "utils", "utils/util.py", @@ -69,6 +70,7 @@ def test_package_with_symlink(flyte_project, tmp_path): assert sorted(tar.getnames()) == [ "util", "workflows", + "workflows/__pycache__", "workflows/hello_world.py", ] util = tar.getmember("util") diff --git a/tests/flytekit/unit/tools/test_ignore.py b/tests/flytekit/unit/tools/test_ignore.py index 1f93cd2bee..3b24eeb8a0 100644 --- a/tests/flytekit/unit/tools/test_ignore.py +++ b/tests/flytekit/unit/tools/test_ignore.py @@ -204,7 +204,7 @@ def test_all_ignore(all_ignore): ignore = IgnoreGroup(all_ignore, [GitIgnore, DockerIgnore, StandardIgnore]) assert not ignore.is_ignored("sub") assert not ignore.is_ignored("sub/some.bar") - assert ignore.is_ignored("sub/__pycache__") + assert ignore.is_ignored("sub/__pycache__/") assert ignore.is_ignored("sub/__pycache__/some.pyc") assert ignore.is_ignored("data") assert ignore.is_ignored("data/reallybigfile.bar") @@ -222,7 +222,7 @@ def test_all_ignore_tar_filter(all_ignore): ignore = IgnoreGroup(all_ignore, [GitIgnore, DockerIgnore, StandardIgnore]) assert ignore.tar_filter(TarInfo(name="sub")).name == "sub" assert ignore.tar_filter(TarInfo(name="sub/some.bar")).name == "sub/some.bar" - assert not ignore.tar_filter(TarInfo(name="sub/__pycache__")) + assert not ignore.tar_filter(TarInfo(name="sub/__pycache__/")) assert not ignore.tar_filter(TarInfo(name="sub/__pycache__/some.pyc")) assert not ignore.tar_filter(TarInfo(name="data")) assert not ignore.tar_filter(TarInfo(name="data/reallybigfile.bar"))