From 7c4db3712080b50950740dc2ef1ce39a7b5c74aa Mon Sep 17 00:00:00 2001 From: aliciaaevans Date: Wed, 27 Dec 2023 18:09:42 -0500 Subject: [PATCH 1/2] feat: choose worker recipes by depth level --- bioconda_utils/build.py | 33 ++++++++++++++++++++++++++------- bioconda_utils/cli.py | 7 +++++-- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/bioconda_utils/build.py b/bioconda_utils/build.py index 26d8bcdb02..bdf77e002f 100644 --- a/bioconda_utils/build.py +++ b/bioconda_utils/build.py @@ -7,14 +7,12 @@ import os import logging import itertools -import time from typing import List, Optional from bioconda_utils.skiplist import Skiplist from bioconda_utils.build_failure import BuildFailureRecord from bioconda_utils.githandler import GitHandler -import conda from conda.exports import UnsatisfiableError from conda_build.exceptions import DependencyNeedsBuildingError import networkx as nx @@ -230,20 +228,38 @@ def remove_cycles(dag, name2recipes, failed, skip_dependent): return dag.subgraph(name for name in dag if name not in nodes_in_cycles) -def get_subdags(dag, n_workers, worker_offset): +def get_subdags(dag, n_workers, worker_offset, subdag_depth = 0): if n_workers > 1 and worker_offset >= n_workers: raise ValueError( "n-workers is less than the worker-offset given! " "Either decrease --n-workers or decrease --worker-offset!") # Get connected subdags and sort by nodes + # If subdag_depth is None, each root node and all children (not previously assigned) are assigned to the same worker. + # This may fail when attempting to build child nodes with parents assigned to other workers. + # If subdag_depth is set, only nodes of a certain depth will be built (i.e., 0: only root nodes, + # 1: only nodes with parents that are root nodes, etc.). They are assigned evenly across workers. if n_workers > 1: root_nodes = sorted([k for (k, v) in dag.in_degree() if v == 0]) nodes = set() found = set() + children = [] + + if subdag_depth is not None: + working_dag = nx.DiGraph(dag) + # Only build the current "root" nodes after removing + for i in range(0, subdag_depth + 1): + print("{} recipes at depth {}".format(len(root_nodes), i)) + if len(root_nodes) == 0: + break + if i < subdag_depth: + working_dag.remove_nodes_from(root_nodes) + root_nodes = sorted([k for (k, v) in working_dag.in_degree() if v == 0]) + for idx, root_node in enumerate(root_nodes): - # Flatten the nested list - children = itertools.chain(*nx.dfs_successors(dag, root_node).values()) + if subdag_depth is None: + # Flatten the nested list + children = itertools.chain(*nx.dfs_successors(dag, root_node).values()) # This is the only obvious way of ensuring that all nodes are included # in exactly 1 subgraph found.add(root_node) @@ -256,6 +272,7 @@ def get_subdags(dag, n_workers, worker_offset): else: for child in children: found.add(child) + subdags = dag.subgraph(list(nodes)) logger.info("Building and testing sub-DAGs %i in each group of %i, which is %i packages", worker_offset, n_workers, len(subdags.nodes())) else: @@ -300,7 +317,8 @@ def build_recipes(recipe_folder: str, config_path: str, recipes: List[str], mulled_conda_image: str = pkg_test.MULLED_CONDA_IMAGE, record_build_failures: bool = False, skiplist_leafs: bool = False, - live_logs: bool = True): + live_logs: bool = True, + subdag_depth: int = None): """ Build one or many bioconda packages. @@ -329,6 +347,7 @@ def build_recipes(recipe_folder: str, config_path: str, recipes: List[str], keep_old_work: Do not remove anything from environment, even after successful build and test. skiplist_leafs: If True, blacklist leaf packages that fail to build live_logs: If True, enable live logging during the build process + subdag_depth: Number of levels of nodes to skip. (Optional, only if using n_workers) """ if not recipes: logger.info("Nothing to be done.") @@ -364,7 +383,7 @@ def build_recipes(recipe_folder: str, config_path: str, recipes: List[str], skip_dependent = defaultdict(list) dag = remove_cycles(dag, name2recipes, failed, skip_dependent) - subdag = get_subdags(dag, n_workers, worker_offset) + subdag = get_subdags(dag, n_workers, worker_offset, subdag_depth) if not subdag: logger.info("Nothing to be done.") return True diff --git a/bioconda_utils/cli.py b/bioconda_utils/cli.py index 1b89e5f6ad..84f69a4ad1 100644 --- a/bioconda_utils/cli.py +++ b/bioconda_utils/cli.py @@ -435,6 +435,7 @@ def do_lint(recipe_folder, config, packages="*", cache=None, list_checks=False, @arg("--record-build-failures", action="store_true", help="Record build failures in build_failure.yaml next to the recipe.") @arg("--skiplist-leafs", action="store_true", help="Skiplist leaf recipes (i.e. ones that are not depended on by any other recipes) that fail to build.") @arg('--disable-live-logs', action='store_true', help="Disable live logging during the build process") +@arg('--subdag-depth', type=int, help="Number of levels of root nodes to skip. (Optional, and only if using n_workers)") @enable_logging() def build(recipe_folder, config, packages="*", git_range=None, testonly=False, force=False, docker=None, mulled_test=False, build_script_template=None, @@ -445,7 +446,8 @@ def build(recipe_folder, config, packages="*", git_range=None, testonly=False, docker_base_image=None, record_build_failures=False, skiplist_leafs=False, - disable_live_logs=False): + disable_live_logs=False, + subdag_depth=None): cfg = utils.load_config(config) setup = cfg.get('setup', None) if setup: @@ -506,7 +508,8 @@ def build(recipe_folder, config, packages="*", git_range=None, testonly=False, mulled_conda_image=mulled_conda_image, record_build_failures=record_build_failures, skiplist_leafs=skiplist_leafs, - live_logs=(not disable_live_logs)) + live_logs=(not disable_live_logs), + subdag_depth=subdag_depth) exit(0 if success else 1) From 45c6b375db869812be5952c9b8443ab4c8320cee Mon Sep 17 00:00:00 2001 From: aliciaaevans Date: Fri, 5 Apr 2024 15:03:35 -0400 Subject: [PATCH 2/2] remove confusing default value --- bioconda_utils/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bioconda_utils/build.py b/bioconda_utils/build.py index f58bb6d891..f1d3aecfbf 100644 --- a/bioconda_utils/build.py +++ b/bioconda_utils/build.py @@ -228,7 +228,7 @@ def remove_cycles(dag, name2recipes, failed, skip_dependent): return dag.subgraph(name for name in dag if name not in nodes_in_cycles) -def get_subdags(dag, n_workers, worker_offset, subdag_depth = 0): +def get_subdags(dag, n_workers, worker_offset, subdag_depth): if n_workers > 1 and worker_offset >= n_workers: raise ValueError( "n-workers is less than the worker-offset given! "