-
Notifications
You must be signed in to change notification settings - Fork 1.7k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: Introduce cache_key to sdk #11466
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -686,6 +686,7 @@ def run_pipeline( | |
version_id: Optional[str] = None, | ||
pipeline_root: Optional[str] = None, | ||
enable_caching: Optional[bool] = None, | ||
cache_key: Optional[str] = '', | ||
service_account: Optional[str] = None, | ||
) -> kfp_server_api.V2beta1Run: | ||
"""Runs a specified pipeline. | ||
|
@@ -709,6 +710,8 @@ def run_pipeline( | |
is ``True`` for all tasks by default. If set, the | ||
setting applies to all tasks in the pipeline (overrides the | ||
compile time settings). | ||
cache_key (optional): Customized cache key for this task. | ||
If set, the cache_key will be used as the key for the task's cache. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you elaborate on why we would need a cache key? This doc string isn't providing a clear enough idea. I would suggest adding some description to the PR, and also linking the related GitHub issue. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks for reviewing! This is for Issue #11328 (Also updated this information in description) |
||
service_account: Specifies which Kubernetes service | ||
account to use for this run. | ||
|
||
|
@@ -721,6 +724,7 @@ def run_pipeline( | |
pipeline_id=pipeline_id, | ||
version_id=version_id, | ||
enable_caching=enable_caching, | ||
cache_key=cache_key, | ||
pipeline_root=pipeline_root, | ||
) | ||
|
||
|
@@ -806,6 +810,7 @@ def create_recurring_run( | |
enabled: bool = True, | ||
pipeline_root: Optional[str] = None, | ||
enable_caching: Optional[bool] = None, | ||
cache_key: Optional[str] = '', | ||
service_account: Optional[str] = None, | ||
) -> kfp_server_api.V2beta1RecurringRun: | ||
"""Creates a recurring run. | ||
|
@@ -850,6 +855,8 @@ def create_recurring_run( | |
different caching options for individual tasks. If set, the | ||
setting applies to all tasks in the pipeline (overrides the | ||
compile time settings). | ||
cache_key (optional): Customized cache key for this task. | ||
If set, the cache_key will be used as the key for the task's cache. | ||
service_account: Specifies which Kubernetes service | ||
account this recurring run uses. | ||
Returns: | ||
|
@@ -862,6 +869,7 @@ def create_recurring_run( | |
pipeline_id=pipeline_id, | ||
version_id=version_id, | ||
enable_caching=enable_caching, | ||
cache_key=cache_key, | ||
pipeline_root=pipeline_root, | ||
) | ||
|
||
|
@@ -908,6 +916,7 @@ def _create_job_config( | |
pipeline_id: Optional[str], | ||
version_id: Optional[str], | ||
enable_caching: Optional[bool], | ||
cache_key: Optional[str], | ||
pipeline_root: Optional[str], | ||
) -> _JobConfig: | ||
"""Creates a JobConfig with spec and resource_references. | ||
|
@@ -928,6 +937,8 @@ def _create_job_config( | |
different caching options for individual tasks. If set, the | ||
setting applies to all tasks in the pipeline (overrides the | ||
compile time settings). | ||
cache_key (optional): Customized cache key for this task. | ||
If set, the cache_key will be used as the key for the task's cache. | ||
pipeline_root: Root path of the pipeline outputs. | ||
|
||
Returns: | ||
|
@@ -956,7 +967,7 @@ def _create_job_config( | |
# settings. | ||
if enable_caching is not None: | ||
_override_caching_options(pipeline_doc.pipeline_spec, | ||
enable_caching) | ||
enable_caching, cache_key) | ||
pipeline_spec = pipeline_doc.to_dict() | ||
|
||
pipeline_version_reference = None | ||
|
@@ -983,6 +994,7 @@ def create_run_from_pipeline_func( | |
namespace: Optional[str] = None, | ||
pipeline_root: Optional[str] = None, | ||
enable_caching: Optional[bool] = None, | ||
cache_key: Optional[str] = '', | ||
service_account: Optional[str] = None, | ||
experiment_id: Optional[str] = None, | ||
) -> RunPipelineResult: | ||
|
@@ -1004,6 +1016,8 @@ def create_run_from_pipeline_func( | |
different caching options for individual tasks. If set, the | ||
setting applies to all tasks in the pipeline (overrides the | ||
compile time settings). | ||
cache_key (optional): Customized cache key for this task. | ||
If set, the cache_key will be used as the key for the task's cache. | ||
service_account: Specifies which Kubernetes service | ||
account to use for this run. | ||
experiment_id: ID of the experiment to add the run to. You cannot specify both experiment_id and experiment_name. | ||
|
@@ -1032,6 +1046,7 @@ def create_run_from_pipeline_func( | |
namespace=namespace, | ||
pipeline_root=pipeline_root, | ||
enable_caching=enable_caching, | ||
cache_key=cache_key, | ||
service_account=service_account, | ||
) | ||
|
||
|
@@ -1044,6 +1059,7 @@ def create_run_from_pipeline_package( | |
namespace: Optional[str] = None, | ||
pipeline_root: Optional[str] = None, | ||
enable_caching: Optional[bool] = None, | ||
cache_key: Optional[str] = '', | ||
service_account: Optional[str] = None, | ||
experiment_id: Optional[str] = None, | ||
) -> RunPipelineResult: | ||
|
@@ -1065,6 +1081,8 @@ def create_run_from_pipeline_package( | |
different caching options for individual tasks. If set, the | ||
setting applies to all tasks in the pipeline (overrides the | ||
compile time settings). | ||
cache_key (optional): Customized cache key for this task. | ||
If set, the cache_key will be used as the key for the task's cache. | ||
service_account: Specifies which Kubernetes service | ||
account to use for this run. | ||
experiment_id: ID of the experiment to add the run to. You cannot specify both experiment_id and experiment_name. | ||
|
@@ -1105,6 +1123,7 @@ def create_run_from_pipeline_package( | |
params=arguments, | ||
pipeline_root=pipeline_root, | ||
enable_caching=enable_caching, | ||
cache_key=cache_key, | ||
service_account=service_account, | ||
) | ||
return RunPipelineResult(self, run_info) | ||
|
@@ -1681,6 +1700,7 @@ def _safe_load_yaml(stream: TextIO) -> _PipelineDoc: | |
def _override_caching_options( | ||
pipeline_spec: pipeline_spec_pb2.PipelineSpec, | ||
enable_caching: bool, | ||
cache_key: str = '', | ||
) -> None: | ||
"""Overrides caching options. | ||
|
||
|
@@ -1690,3 +1710,4 @@ def _override_caching_options( | |
""" | ||
for _, task_spec in pipeline_spec.root.dag.tasks.items(): | ||
task_spec.caching_options.enable_cache = enable_caching | ||
task_spec.caching_options.cache_key = cache_key |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: default to
None
, and set the proto field only when it's notNone