Skip to content

Commit

Permalink
Add .gitignore and .dockerignore behavior to ImageSpec (#2369)
Browse files Browse the repository at this point in the history
* Add .gitignore and .dockerignore behavior to ImageSpec

Signed-off-by: Kevin Su <pingsutw@apache.org>

* Add a test

Signed-off-by: Kevin Su <pingsutw@apache.org>

* Fix CI

Signed-off-by: Kevin Su <pingsutw@apache.org>

* fix test

Signed-off-by: Kevin Su <pingsutw@apache.org>

* fix tests

Signed-off-by: Kevin Su <pingsutw@apache.org>

* fix test

Signed-off-by: Kevin Su <pingsutw@apache.org>

* nit

Signed-off-by: Kevin Su <pingsutw@apache.org>

* test

Signed-off-by: Kevin Su <pingsutw@apache.org>

* nit

Signed-off-by: Kevin Su <pingsutw@apache.org>

---------

Signed-off-by: Kevin Su <pingsutw@apache.org>
  • Loading branch information
pingsutw authored Apr 24, 2024
1 parent 9703959 commit 7ba8f35
Show file tree
Hide file tree
Showing 10 changed files with 43 additions and 15 deletions.
10 changes: 9 additions & 1 deletion flytekit/image_spec/image_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,7 +275,15 @@ def calculate_hash_from_image_spec(image_spec: ImageSpec):
spec = copy.deepcopy(image_spec)
if isinstance(spec.base_image, ImageSpec):
spec.base_image = spec.base_image.image_name()
spec.source_root = hash_directory(image_spec.source_root) if image_spec.source_root else b""

if image_spec.source_root:
from flytekit.tools.fast_registration import compute_digest
from flytekit.tools.ignore import DockerIgnore, GitIgnore, IgnoreGroup, StandardIgnore

ignore = IgnoreGroup(image_spec.source_root, [GitIgnore, DockerIgnore, StandardIgnore])
digest = compute_digest(image_spec.source_root, ignore.is_ignored)
spec.source_root = digest

if spec.requirements:
spec.requirements = hashlib.sha1(pathlib.Path(spec.requirements).read_bytes()).hexdigest()
# won't rebuild the image if we change the registry_config path
Expand Down
2 changes: 1 addition & 1 deletion flytekit/remote/executions.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def flyte_workflow(self) -> Optional[FlyteWorkflow]:
return self._flyte_workflow

@property
def node_executions(self) -> Dict[str, "FlyteNodeExecution"]:
def node_executions(self) -> Dict[str, FlyteNodeExecution]:
"""Get a dictionary of node executions that are a part of this workflow execution."""
return self._node_executions or {}

Expand Down
2 changes: 1 addition & 1 deletion flytekit/tools/fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def compute_digest(source: os.PathLike, filter: Optional[callable] = None) -> st
"""
Walks the entirety of the source dir to compute a deterministic md5 hex digest of the dir contents.
:param os.PathLike source:
:param Ignore ignore:
:param callable filter:
:return Text:
"""
hasher = hashlib.md5()
Expand Down
5 changes: 3 additions & 2 deletions flytekit/tools/ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

from flytekit.loggers import logger

STANDARD_IGNORE_PATTERNS = ["*.pyc", ".cache", ".cache/*", "__pycache__", "**/__pycache__"]
STANDARD_IGNORE_PATTERNS = ["*.pyc", ".cache", ".cache/*", "__pycache__/*", "**/__pycache__/*"]


class Ignore(ABC):
Expand Down Expand Up @@ -79,7 +79,8 @@ def _parse(self) -> PatternMatcher:
if os.path.isfile(dockerignore):
with open(dockerignore, "r") as f:
patterns = [l.strip() for l in f.readlines() if l and not l.startswith("#")]
logger.info(f"No .dockerignore found in {self.root}, not applying any filters")
else:
logger.info(f"No .dockerignore found in {self.root}, not applying any filters")
return PatternMatcher(patterns)

def _is_ignored(self, path: str) -> bool:
Expand Down
13 changes: 10 additions & 3 deletions plugins/flytekit-envd/flytekitplugins/envd/image_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from flytekit.core import context_manager
from flytekit.core.constants import REQUIREMENTS_FILE_NAME
from flytekit.image_spec.image_spec import _F_IMG_ID, ImageBuildEngine, ImageSpec, ImageSpecBuilder
from flytekit.tools.ignore import DockerIgnore, GitIgnore, IgnoreGroup, StandardIgnore

FLYTE_LOCAL_REGISTRY = "localhost:30000"

Expand Down Expand Up @@ -131,14 +132,20 @@ def build():
envd_config += f' install.cuda(version="{image_spec.cuda}", cudnn="{cudnn}")\n'

if image_spec.source_root:
shutil.copytree(image_spec.source_root, pathlib.Path(cfg_path).parent, dirs_exist_ok=True)
ignore = IgnoreGroup(image_spec.source_root, [GitIgnore, DockerIgnore, StandardIgnore])
shutil.copytree(
src=image_spec.source_root,
dst=pathlib.Path(cfg_path).parent,
ignore=shutil.ignore_patterns(*ignore.list_ignored()),
dirs_exist_ok=True,
)

envd_version = metadata.version("envd")
# Indentation is required by envd
if Version(envd_version) <= Version("0.3.37"):
envd_config += ' io.copy(host_path="./", envd_path="/root")'
envd_config += ' io.copy(host_path="./", envd_path="/root")\n'
else:
envd_config += ' io.copy(source="./", target="/root")'
envd_config += ' io.copy(source="./", target="/root")\n'

with open(cfg_path, "w+") as f:
f.write(envd_config)
Expand Down
1 change: 1 addition & 0 deletions plugins/flytekit-envd/tests/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
README.md
3 changes: 3 additions & 0 deletions plugins/flytekit-envd/tests/test_image_spec.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
from pathlib import Path
from textwrap import dedent

Expand Down Expand Up @@ -37,6 +38,7 @@ def test_image_spec():
python_version="3.8",
base_image=base_image,
pip_index="https://private-pip-index/simple",
source_root=os.path.dirname(os.path.realpath(__file__)),
)

image_spec = image_spec.with_commands("echo hello")
Expand All @@ -58,6 +60,7 @@ def build():
runtime.environ(env={{'PYTHONPATH': '/root', '_F_IMG_ID': '{image_name}'}}, extra_path=['/root'])
config.pip_index(url="https://private-pip-index/simple")
install.python(version="3.8")
io.copy(source="./", target="/root")
"""
)

Expand Down
16 changes: 11 additions & 5 deletions tests/flytekit/unit/cli/pyflyte/test_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,14 @@

import mock
import pytest
import yaml
from click.testing import CliRunner

from flytekit.clis.sdk_in_container import pyflyte
from flytekit.clis.sdk_in_container.run import RunLevelParams, get_entities_in_file, run_command
from flytekit.configuration import Config, Image, ImageConfig
from flytekit.core.task import task
from flytekit.image_spec.image_spec import ImageBuildEngine
from flytekit.image_spec.image_spec import ImageBuildEngine, ImageSpec, calculate_hash_from_image_spec
from flytekit.interaction.click_types import DirParamType, FileParamType
from flytekit.remote import FlyteRemote

Expand Down Expand Up @@ -347,10 +348,17 @@ def test_list_default_arguments(wf_path):
],
)

IMAGE_SPEC = os.path.join(os.path.dirname(os.path.realpath(__file__)), "imageSpec.yaml")

with open(IMAGE_SPEC, "r") as f:
image_spec_dict = yaml.safe_load(f)
image_spec = ImageSpec(**image_spec_dict)
tag = calculate_hash_from_image_spec(image_spec)

ic_result_4 = ImageConfig(
default_image=Image(name="default", fqn="flytekit", tag="urw7fglw5pBrIQ9JTW1vQA"),
default_image=Image(name="default", fqn="flytekit", tag=tag),
images=[
Image(name="default", fqn="flytekit", tag="urw7fglw5pBrIQ9JTW1vQA"),
Image(name="default", fqn="flytekit", tag=tag),
Image(name="xyz", fqn="docker.io/xyz", tag="latest"),
Image(name="abc", fqn="docker.io/abc", tag=None),
Image(
Expand All @@ -361,8 +369,6 @@ def test_list_default_arguments(wf_path):
],
)

IMAGE_SPEC = os.path.join(os.path.dirname(os.path.realpath(__file__)), "imageSpec.yaml")


@mock.patch("flytekit.configuration.default_images.DefaultImages.default_image")
@pytest.mark.parametrize(
Expand Down
2 changes: 2 additions & 0 deletions tests/flytekit/unit/tools/test_fast_registration.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def test_package(flyte_project, tmp_path):
"src",
"src/util",
"src/workflows",
"src/workflows/__pycache__",
"src/workflows/hello_world.py",
"utils",
"utils/util.py",
Expand All @@ -69,6 +70,7 @@ def test_package_with_symlink(flyte_project, tmp_path):
assert sorted(tar.getnames()) == [
"util",
"workflows",
"workflows/__pycache__",
"workflows/hello_world.py",
]
util = tar.getmember("util")
Expand Down
4 changes: 2 additions & 2 deletions tests/flytekit/unit/tools/test_ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,7 +204,7 @@ def test_all_ignore(all_ignore):
ignore = IgnoreGroup(all_ignore, [GitIgnore, DockerIgnore, StandardIgnore])
assert not ignore.is_ignored("sub")
assert not ignore.is_ignored("sub/some.bar")
assert ignore.is_ignored("sub/__pycache__")
assert ignore.is_ignored("sub/__pycache__/")
assert ignore.is_ignored("sub/__pycache__/some.pyc")
assert ignore.is_ignored("data")
assert ignore.is_ignored("data/reallybigfile.bar")
Expand All @@ -222,7 +222,7 @@ def test_all_ignore_tar_filter(all_ignore):
ignore = IgnoreGroup(all_ignore, [GitIgnore, DockerIgnore, StandardIgnore])
assert ignore.tar_filter(TarInfo(name="sub")).name == "sub"
assert ignore.tar_filter(TarInfo(name="sub/some.bar")).name == "sub/some.bar"
assert not ignore.tar_filter(TarInfo(name="sub/__pycache__"))
assert not ignore.tar_filter(TarInfo(name="sub/__pycache__/"))
assert not ignore.tar_filter(TarInfo(name="sub/__pycache__/some.pyc"))
assert not ignore.tar_filter(TarInfo(name="data"))
assert not ignore.tar_filter(TarInfo(name="data/reallybigfile.bar"))
Expand Down

0 comments on commit 7ba8f35

Please sign in to comment.