From 6e74f8dcd15be84715d8d1b245ab8fbdf68a5f38 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:21:41 -0700 Subject: [PATCH 01/33] Update schema to support new exclusive syntax for separate launcher/allocation control --- .../schemas/yamlspecification.json | 22 +++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/maestrowf/specification/schemas/yamlspecification.json b/maestrowf/specification/schemas/yamlspecification.json index 6007bba6..20cc8cbf 100644 --- a/maestrowf/specification/schemas/yamlspecification.json +++ b/maestrowf/specification/schemas/yamlspecification.json @@ -93,8 +93,26 @@ "exclusive": { "anyOf": [ {"type": "boolean"}, - {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} - ]}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"}, + { + "type": "object", + "properties": { + "allocation": { + "anyOf": [ + {"type": "boolean"}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ] + }, + "launcher": { + "anyOf": [ + {"type": "boolean"}, + {"type": "string", "pattern": "^\\$\\(\\w+\\)$"} + ] + } + } + } + ] + }, "nested": {"type": "boolean"}, "waitable": {"type": "boolean"}, "priority": { From 4bee81e204b7703949798be6c63d5e450904d547 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:22:25 -0700 Subject: [PATCH 02/33] Initial prototype of optional launcher/allocation args pass through handling in flux interfaces --- maestrowf/abstracts/interfaces/flux.py | 35 ++++ .../interfaces/script/_flux/flux0_49_0.py | 165 +++++++++++++-- .../interfaces/script/fluxscriptadapter.py | 195 ++++++++++++++++-- 3 files changed, 371 insertions(+), 24 deletions(-) diff --git a/maestrowf/abstracts/interfaces/flux.py b/maestrowf/abstracts/interfaces/flux.py index 03303e61..3e6d3997 100644 --- a/maestrowf/abstracts/interfaces/flux.py +++ b/maestrowf/abstracts/interfaces/flux.py @@ -248,3 +248,38 @@ def key(self): :return: A string of the name of a FluxInterface class. """ ... + + @classmethod + def addtl_alloc_arg_types(cls): + """ + Return set of additional allocation args that this adapter knows how + to wire up to the jobspec python apis, e.g. 'attributes', + 'shell_options', ... This is aimed specifically at the repeated types, + which collect many flags/key=value pairs which go through a specific + jobspec call. Everything not here gets dumped into a 'misc' group + for individual handling. + + :return: List of string + + .. note:: + + Should we have an enum for these or something vs random strings? + """ + return [] + + @classmethod + def render_additional_args(cls, args_dict): + """ + Helper to render additional argument sets to flux cli format for + use in constructing $(LAUNCHER) line and flux batch directives. + This default implementation yields a single empty string. + + :param args_dict: Dictionary of flux arg keys and name: value pairs + :yield: formatted strings of cli options/values + + .. note:: + + Promote this to the general/base adapters to handle non-normalizable + scheduler/machine specific options + """ + yield "" diff --git a/maestrowf/interfaces/script/_flux/flux0_49_0.py b/maestrowf/interfaces/script/_flux/flux0_49_0.py index 76322da6..36d53e06 100644 --- a/maestrowf/interfaces/script/_flux/flux0_49_0.py +++ b/maestrowf/interfaces/script/_flux/flux0_49_0.py @@ -33,6 +33,45 @@ class FluxInterface_0490(FluxInterface): StepPriority.EXPEDITE: 31, } + # Config for the groups of alloc args with their own jobspec methods + known_alloc_arg_types = ["attributes", "shell_options", "conf"] + addtl_alloc_arg_type_map = { + "setopt": "shell_options", + "o": "shell_options", + "setattr": "attributes", + "S": "attributes", + "conf": "conf", + } + + @classmethod + def addtl_alloc_arg_types(cls): + """ + Return set of additional allocation args that this adapter knows how + to wire up to the jobspec python apis, e.g. 'attributes', + 'shell_options', ... This is aimed specifically at the repeated types, + which collect many flags/key=value pairs which go through a specific + jobspec call. Everything not here gets dumped into a 'misc' group + for individual handling. + + :return: List of string + + .. note:: + + Should we have an enum for these or something vs random strings? + """ + return cls.known_alloc_arg_types + + @classmethod + def addtl_alloc_arg_type_map(cls, option): + """ + Map verbose/brief cli arg option name (o from -o, setopt from --setopt) + onto known alloc arg types this interface implements + + :param option: option string corresponding to flux cli input + :return: string, one of known_alloc_arg_types + """ + return cls.addtl_alloc_arg_type_map.get(option, None) + @classmethod def get_flux_urgency(cls, urgency) -> int: if isinstance(urgency, str): @@ -46,6 +85,45 @@ def get_flux_urgency(cls, urgency) -> int: LOGGER.debug("Float urgency of '%s' given..", urgency) return ceil(float(urgency) * 31) + @classmethod + def render_additional_args(cls, args_dict): + """ + Helper to render additional argument sets to flux cli format for + use in constructing $(LAUNCHER) line and flux batch directives. + + :param args_dict: Dictionary of flux arg keys and name: value pairs + :yield: formatted strings of cli options/values + + .. note:: + + Promote this to the general/base adapters to handle non-normalizable + scheduler/machine specific options + """ + def arg_info_type(arg_key): + if len(arg_key) == 1: + return {"prefix": "-", "sep": " "} + else: + return {"prefix": "--", "sep": "="} + + def render_arg_value(arg_name, arg_value): + # Handling for flag type values, e.g. -o fastload + if arg_value: + return f"{arg_name}={arg_value}" + else: + return f"{arg_name}" + + for arg_key, arg_value in args_dict.items(): + arg_info = arg_info_type(arg_key) + + for av_name, av_value in arg_value.items(): + value_str = render_arg_value(av_name, av_value) + yield "{prefix}{key}{sep}{value}".format( + prefix=arg_info['prefix'], + key=arg_key, + sep=arg_info['sep'], + value=value_str + ) + @classmethod def submit( cls, @@ -60,11 +138,19 @@ def submit( force_broker=True, urgency=StepPriority.MEDIUM, waitable=False, - batch_attrs=None, # TODO: expose opts/conf/shell_opts + addtl_batch_args=None, + exclusive=False, **kwargs, ): - if batch_attrs is None: - batch_attrs = {} + # Sanitize/initialize the extra batch args + if addtl_batch_args is None: + addtl_batch_args = {} + + # May want to also support setattr_shell_option at some point? + for batch_arg_type in ["attributes", "shell_options", "conf"]: + + if batch_arg_type not in addtl_batch_args: + addtl_batch_args[batch_arg_type] = {} try: # TODO: add better error handling/throwing in the class func @@ -79,6 +165,13 @@ def submit( # for a single node, don't use a broker -- but introduce a flag # that can force a single node to run in a broker. + # Attach any conf inputs to the jobspec + if "conf" in addtl_batch_args: + conf_dict = addtl_batch_args['conf'] + + else: + conf_dict = None + if force_broker: LOGGER.debug( "Launch under Flux sub-broker. [force_broker=%s, " @@ -86,6 +179,8 @@ def submit( force_broker, nodes, ) + # Need to attach broker opts to the constructor? + # TODO: Add in extra broker options if not null ngpus_per_slot = int(ceil(ngpus / nodes)) jobspec = flux.job.JobspecV1.from_nest_command( [path], @@ -93,8 +188,13 @@ def submit( cores_per_slot=cores_per_task, num_slots=procs, gpus_per_slot=ngpus_per_slot, + conf=conf_dict, + exclusive=exclusive, ) else: + if conf_dict: + LOGGER.warn("'conf' options not currently supported with " + " nested=False. Ignoring.") LOGGER.debug( "Launch under root Flux broker. [force_broker=%s, " "nodes=%d]", @@ -107,6 +207,7 @@ def submit( num_nodes=nodes, cores_per_task=cores_per_task, gpus_per_task=ngpus, + exclusive=exclusive ) LOGGER.debug("Handle address -- %s", hex(id(cls.flux_handle))) @@ -118,10 +219,26 @@ def submit( jobspec.environment = dict(os.environ) # Slurp in extra attributes if not null (queue, bank, ..) - for batch_attr_name, batch_attr_value in batch_attrs.items(): + # (-S/--setattr) + for batch_attr_name, batch_attr_value in addtl_batch_args["setattr"].items(): if batch_attr_value: jobspec.setattr(batch_attr_name, batch_attr_value) - + else: + LOGGER.warn("Flux adapter v0.49 received null value of '%s'" + " for attribute '%s'. Omitting from jobspec.", + batch_opt_name, + batch_opt_value) + + # Add in job shell options if not null (-o/--setopt) + for batch_opt_name, batch_opt_value in addtl_batch_args["setopt"].items(): + if batch_opt_value: + jobspec.setattr_shell_option(batch_opt_name, batch_opt_value) + else: + LOGGER.warn("Flux adapter v0.49 received null value of '%s'" + " for shell option '%s'. Omitting from jobspec.", + batch_opt_name, + batch_opt_value) + if walltime > 0: jobspec.duration = walltime @@ -164,7 +281,8 @@ def submit( return jobid, retcode, submit_status @classmethod - def parallelize(cls, procs, nodes=None, **kwargs): + def parallelize(cls, procs, nodes=None, launcher_args=None, **kwargs): + args = ["flux", "run", "-n", str(procs)] # if we've specified nodes, add that to wreckrun @@ -192,15 +310,38 @@ def parallelize(cls, procs, nodes=None, **kwargs): args.append("-g") args.append(gpus) - # flux has additional arguments that can be passed via the '-o' flag. + # flux has additional arguments that can be passed via flags such as + # '-o', '-S', ... + if launcher_args is None: + launcher_args = {} + + # Look for optional exclusive flag + exclusive = kwargs.pop('exclusive', False) + addtl = [] + LOGGER.info("Processing 'exclusive': %s", exclusive) + if exclusive: + addtl.append("--exclusive") + addtl_args = kwargs.get("addtl_args", {}) - for key, value in addtl_args.items(): - addtl.append(f"{key}={value}") + if addtl_args and launcher_args: + # TODO: better way to mark things deprecated that's not buried? + LOGGER.warn("'args' input is deprecated in v1.1.12. Use the more " + "flexible 'launcher_args' going forward. Combining.") + if 'o' in launcher_args: + launcher_args['o'].update(**addtl_args) + else: + launcher_args['o'] = addtl_args + + addtl += [arg for arg in cls.render_additional_args(launcher_args)] + args.extend(addtl) + # for key, value in addtl_args.items(): + + # addtl.append(f"{key}={value}") - if addtl: - args.append("-o") - args.append(",".join(addtl)) + # if addtl: + # args.append("-o") + # args.append(",".join(addtl)) return " ".join(args) diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index 794760ba..865de765 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -95,11 +95,42 @@ def __init__(self, **kwargs): # NOTE: Host doesn"t seem to matter for FLUX. sbatch assumes that the # current host is where submission occurs. self.add_batch_parameter("nodes", kwargs.pop("nodes", "1")) + self._allocation_args = kwargs.get("allocation_args", {}) + LOGGER.info(f"Allocation args: {self._allocation_args}") + self._launcher_args = kwargs.get("launcher_args", {}) self._addl_args = kwargs.get("args", {}) + + # Setup prefixes associated with each kind of option for routing to + # appropriate jobspec api's + # Additional args can pass through on launcher only, using rule of + # verbose has '--' prefix, '=' flag/value separator and + # brief has '-' prefix, ' ' flag/value seaprator: e.g. + # --setattr=foo=bar, or -S foo=bar + self._brief_arg_info = {"prefix": "-", "sep": " "} + self._verbose_arg_info = {"prefix": "--", "sep": "="} + + # Setup known arg types + self._known_alloc_arg_types = ["attributes", "shell_options", "conf"] + self._allocation_args_map = { + "setopt": "shell_options", + "o": "shell_options", + "setattr": "attributes", + "S": "attributes", + "conf": "conf", + } + + # setup template string that all flux cli args/batch directives use for rendering + self._flux_arg_str = "{prefix}{key}{sep}{value}" + + self._attr_prefixes = ['S', 'setattr'] + self._opts_prefixes = ['o', 'setopt'] + self._conf_prefixes = ['conf'] # No abbreviated form for this in flux docs + # Add --setattr fields to batch job/broker; default to "" such # that 'truthiness' can exclude them from the jobspec if not provided queue = kwargs.pop("queue", "") + bank = kwargs.pop("bank", "") available_queues = self._interface.get_broker_queues() # Ignore queue if specified and we detect broker only has anonymous queue if not available_queues and queue: @@ -113,19 +144,62 @@ def __init__(self, **kwargs): self._batch_attrs = { "system.queue": queue, - "system.bank": kwargs.pop("bank", ""), + "system.bank": bank, } self.add_batch_parameter("queue", queue) + self.add_batch_parameter("bank", bank) + + # Pop off the exclusive flags + step_exclusive = 'exclusive' in kwargs + exclusive = kwargs.pop("exclusive", False) # Default to old < 1.1.12 behavior + if not isinstance(exclusive, dict): + self._exclusive = { + "allocation": exclusive, + "launcher": False, + } + else: + self._exclusive = exclusive + + # Check for the flag in additional args and pop it off, letting step key win + # NOTE: only need presence of key as this mimics cli like flag behavior + # TODO: promote this to formally supported behavior for all adapters + exclusive_keys = ['x', 'exclusive'] + if all(ekey in self._allocation_args for ekey in exclusive_keys): + LOGGER.warn("Repeated addition of exclusive flags 'x' and 'exclusive' in allocation_args.") + + alloc_eflags = [self._allocation_args.pop(ekey, None) for ekey in exclusive_keys] + if alloc_eflags: + if step_exclusive: + LOGGER.warn("Overriding batch block allocation_args with steps exclusive setting '%s'", + exclusive) + else: + self._exclusive['allocation'] = True + + if all(ekey in self._launcher_args for ekey in exclusive_keys): + LOGGER.warn("Repeated addition of exclusive flags 'x' and 'exclusive' in launcher_args.") + + launcher_eflags = [self._launcher_args.pop(ekey, None) for ekey in exclusive_keys] + if launcher_eflags: + if step_exclusive and self._exclusive['launcher']: + LOGGER.warn("Overriding batch block launcher_args with steps exclusive setting '%s'", + exclusive) + else: + self._exclusive['launcher'] = True + + self.add_batch_parameter("exclusive", self._exclusive['allocation']) - # Header is only for informational purposes. - # TODO: export these as flux directives for manual step rerunning + # Populate formally supported flux directives in the header + self._flux_directive = "#flux: " self._header = { - "nodes": "#INFO (nodes) {nodes}", - "walltime": "#INFO (walltime) {walltime}", - "version": "#INFO (flux adapter version) {version}", - "flux_version": "#INFO (flux version) {flux_version}", - "queue": "#INFO (queue) {queue}", + "nodes": f"{self._flux_directive}" + "-N {nodes}", + # NOTE: always use seconds to guard against upstream default behavior changes + "walltime": f"{self._flux_directive}" + "-t {walltime}s", + "queue": f"{self._flux_directive}" + "-q {queue}", + "bank": f"{self._flux_directive}" + "--bank {bank}", + } + if self._exclusive['allocation']: + self._header["exclusive"]= f"{self._flux_directive}" + "--exclusive" self._cmd_flags = { "ntasks": "-n", @@ -134,9 +208,17 @@ def __init__(self, **kwargs): self._extension = "flux.sh" self.h = None + # Addition info flags to add to the header: MAESTRO only! flux ignores + # anything after first non-flux-directive line so this must go last + self._info_directive = "#INFO " + self._header_info = { + "version": f"{self._info_directive}" + "(flux adapter version) {version}", + "flux_version": f"{self._info_directive}" + "(flux version) {flux_version}" + } + if uri: self.add_batch_parameter("flux_uri", uri) - self._header['flux_uri'] = "#INFO (flux_uri) {flux_uri}" + self._header_info['flux_uri'] = f"{self._info_directive}" + "(flux_uri) {flux_uri}" @property def extension(self): @@ -165,6 +247,74 @@ def _convert_walltime_to_seconds(self, walltime): LOGGER.error(msg) raise ValueError(msg) + def render_additional_args(self, args_dict): + """ + Helper to render additional argument sets to flux cli format for + use in constructing $(LAUNCHER) line and flux batch directives. + + :param args_dict: Dictionary of flux arg keys and name: value pairs + """ + def arg_info_type(arg_key): + if len(arg_key) == 1: + return {"prefix": "-", "sep": " "} + else: + return {"prefix": "--", "sep": "="} + + def render_arg_value(arg_name, arg_value): + # Handling for flag type values, e.g. -o fastload + if arg_value: + return f"{arg_name}={arg_value}" + else: + return f"{arg_name}" + + for arg_key, arg_value in args_dict.items(): + arg_info = arg_info_type(arg_key) + + for av_name, av_value in arg_value.items(): + value_str = render_arg_value(av_name, av_value) + yield "{prefix}{key}{sep}{value}".format( + prefix=arg_info['prefix'], + key=arg_key, + sep=arg_info['sep'], + value=value_str + ) + + def pack_addtl_batch_args(self): + """ + Normalize the allocation args and pack up into the interface specific + groups that have assocated jobspec methods, e.g. conf, setattr, setopt. + + :return: dictionary of allocation arg groups to attach to jobspecs + """ + addtl_batch_args = { + arg_type: {} + for arg_type in self._interface.addtl_alloc_arg_types + } + for arg_key, arg_values in self._allocation_args.items(): + # TODO: move this into a validation function for pre launch + # batch args validation + arg_type = self._interface.addtl_alloc_arg_type_map(arg_key) + if arg_type is None: + # NO WARNINGS HERE: args in 'misc' type handled elsewhere + # TODO: add better mechanism for tracking whicn args + # actually get used; dicts can't do this.. + continue + + new_arg_values = arg_values + # Match default of flag types in flux cli. + # see https://github.com/flux-framework/flux-core/blob/a3860d4dea5b5a17c473cff4385276e882275252/src/bindings/python/flux/cli/base.py#L734 + # NOTE: only doing this in alloc; let LAUNCHER cli pass through + # to flux cli (None values are omittied, e.g. + # {o: fastload: None} renders to -o fastload + # Python api doesn't appear to have default value handling? + for key, value in new_arg_values.items(): + if value is None: + value = 1 + + addtl_batch_args[arg_type].update(new_arg_values) + + return addtl_batch_args + def get_header(self, step): """ Generate the header present at the top of Flux execution scripts. @@ -189,6 +339,21 @@ def get_header(self, step): for key, value in self._header.items(): if key not in batch_header: continue + + modified_header.append(value.format(**batch_header)) + + # Process any optional allocation args + # for alloc_arg_key, alloc_args in self._allocation_args.items(): + # LOGGER.info(f"{alloc_arg_key=}: {alloc_args=}") + for rendered_arg in self._interface.render_additional_args(self._allocation_args): + if rendered_arg: + # Silent pass through for old versions which don't implement any + # interface for batch/allocation args + modified_header.append(f"{self._flux_directive}" + rendered_arg) + + # Process INFO lines at the end: flux stops parsing directives after any + # lines starting tag+prefix (e.g. "#flux:" ) that doesn't match the flux directives + for key, value in self._header_info.items(): modified_header.append(value.format(**batch_header)) return "\n".join(modified_header) @@ -204,8 +369,11 @@ def get_parallelize_command(self, procs, nodes=None, **kwargs): and procs. """ ntasks = nodes if nodes else self._batch.get("nodes", 1) + # TODO: fix this temp hack when standardizing the exclusive key handling + kwargs['exclusive'] = self._exclusive['launcher'] return self._interface.parallelize( - procs, nodes=ntasks, addtl_args=self._addl_args, **kwargs) + procs, nodes=ntasks, addtl_args=self._addl_args, + launcher_args=self._launcher_args, **kwargs) def submit(self, step, path, cwd, job_map=None, env=None): """ @@ -234,7 +402,7 @@ def submit(self, step, path, cwd, job_map=None, env=None): processors = 1 else: processors = int(processors) - + force_broker = step.run.get("nested", True) walltime = \ self._convert_walltime_to_seconds(step.run.get("walltime", 0)) @@ -275,11 +443,14 @@ def submit(self, step, path, cwd, job_map=None, env=None): # Unpack waitable flag and pass it along if there: only pass it along if # it's in the step maybe, leaving each adapter to retain their defaults? waitable = step.run.get("waitable", False) + jobid, retcode, submit_status = \ self._interface.submit( nodes, processors, cores_per_task, path, cwd, walltime, ngpus, job_name=step.name, force_broker=force_broker, urgency=urgency, - waitable=waitable, batch_attrs=self._batch_attrs + waitable=waitable, + addtl_batch_args=self.pack_addtl_batch_args(), + exclusive=self._exclusive['allocation'] ) return SubmissionRecord(submit_status, retcode, jobid) From 4ccc47014bf9255d8b7f915c046ce2ec0ab130c3 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 14 Aug 2025 12:05:12 -0700 Subject: [PATCH 03/33] Properly apply step exclusive key as override to batch/adapter settings. Sync up slurm with new syntax --- .../interfaces/schedulerscriptadapter.py | 30 +++++++++++ .../interfaces/script/fluxscriptadapter.py | 52 +++++++++++++------ .../interfaces/script/slurmscriptadapter.py | 9 ++-- 3 files changed, 70 insertions(+), 21 deletions(-) diff --git a/maestrowf/abstracts/interfaces/schedulerscriptadapter.py b/maestrowf/abstracts/interfaces/schedulerscriptadapter.py index 128799ec..5dfcace1 100644 --- a/maestrowf/abstracts/interfaces/schedulerscriptadapter.py +++ b/maestrowf/abstracts/interfaces/schedulerscriptadapter.py @@ -92,6 +92,36 @@ def add_batch_parameter(self, name, value): """ self._batch[name] = value + @classmethod + def get_exclusive(cls, step_exclusive): + """ + Helper for normalizing new/legacy exclusive syntax in the step keys. + + :param step_exclusive: value of 'exclusive' key in StudyStep.run + :return dict: normalized dict with 'allocation' and 'launcher' keys + and bool values for each + + .. note:: + + Move this upstream into a future studystep validator? also add + hooks for per scheduler normalizing of the extra args from batch + blocks + """ + # Handle old scalar syntax which applied to allocatios only + if not isinstance(step_exclusive, dict): + return { + "allocation": step_exclusive, + "launcher": False, + } + else: + # Yaml schema limits keys already + exclusive_dict = step_exclusive + if 'allocation' not in step_exclusive: + exclusive_dict['allocation'] = False + if 'launcher' not in step_exclusive: + exclusive_dict['launcher'] = False + return exclusive_dict + @abstractmethod def get_header(self, step): """ diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index 865de765..59591e67 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -149,18 +149,11 @@ def __init__(self, **kwargs): self.add_batch_parameter("queue", queue) self.add_batch_parameter("bank", bank) - # Pop off the exclusive flags - step_exclusive = 'exclusive' in kwargs - exclusive = kwargs.pop("exclusive", False) # Default to old < 1.1.12 behavior - if not isinstance(exclusive, dict): - self._exclusive = { - "allocation": exclusive, - "launcher": False, - } - else: - self._exclusive = exclusive + # Pop off the exclusive flags (NOTE: usually comes from steps, not through constructor?) + step_exclusive = 'exclusive' in kwargs # Track whether it was in there at all + self._exclusive = self.get_exclusive(kwargs.pop("exclusive", False)) # Default to old < 1.1.12 behavior - # Check for the flag in additional args and pop it off, letting step key win + # Check for the flag in additional args and pop it off, letting step key win later # NOTE: only need presence of key as this mimics cli like flag behavior # TODO: promote this to formally supported behavior for all adapters exclusive_keys = ['x', 'exclusive'] @@ -186,6 +179,7 @@ def __init__(self, **kwargs): else: self._exclusive['launcher'] = True + # NOTE: self.add_batch_parameter("exclusive", self._exclusive['allocation']) # Populate formally supported flux directives in the header @@ -198,8 +192,6 @@ def __init__(self, **kwargs): "bank": f"{self._flux_directive}" + "--bank {bank}", } - if self._exclusive['allocation']: - self._header["exclusive"]= f"{self._flux_directive}" + "--exclusive" self._cmd_flags = { "ntasks": "-n", @@ -342,9 +334,17 @@ def get_header(self, step): modified_header.append(value.format(**batch_header)) + # Handle exclusive flag + step_exclusive_given = "exclusive" in step.run + step_exclusive = self._exclusive + if step_exclusive_given: + # Override the default with this step's setting + step_exclusive.update(self.get_exclusive(step.run.get("exclusive", False))) + + if step_exclusive['allocation']: + modified_header.append(f"{self._flux_directive}" + "--exclusive") + # Process any optional allocation args - # for alloc_arg_key, alloc_args in self._allocation_args.items(): - # LOGGER.info(f"{alloc_arg_key=}: {alloc_args=}") for rendered_arg in self._interface.render_additional_args(self._allocation_args): if rendered_arg: # Silent pass through for old versions which don't implement any @@ -369,8 +369,18 @@ def get_parallelize_command(self, procs, nodes=None, **kwargs): and procs. """ ntasks = nodes if nodes else self._batch.get("nodes", 1) + + # Handle the exclusive flags, updating batch block settings (default) + # with what's set in the step + step_exclusive_given = "exclusive" in kwargs + step_exclusive = self._exclusive + if step_exclusive_given: + # Override the default with this step's setting + step_exclusive.update(self.get_exclusive(kwargs.get("exclusive", False))) + # TODO: fix this temp hack when standardizing the exclusive key handling - kwargs['exclusive'] = self._exclusive['launcher'] + kwargs['exclusive'] = step_exclusive['launcher'] + return self._interface.parallelize( procs, nodes=ntasks, addtl_args=self._addl_args, launcher_args=self._launcher_args, **kwargs) @@ -440,6 +450,14 @@ def submit(self, step, path, cwd, job_map=None, env=None): LOGGER.error(msg) raise ValueError(msg) + # Handle the exclusive flags, updating batch block settings (default) + # with what's set in the step + step_exclusive_given = "exclusive" in step.run + step_exclusive = self._exclusive + if step_exclusive_given: + # Override the default with this step's setting + step_exclusive.update(self.get_exclusive(step.run.get("exclusive", False))) + # Unpack waitable flag and pass it along if there: only pass it along if # it's in the step maybe, leaving each adapter to retain their defaults? waitable = step.run.get("waitable", False) @@ -450,7 +468,7 @@ def submit(self, step, path, cwd, job_map=None, env=None): job_name=step.name, force_broker=force_broker, urgency=urgency, waitable=waitable, addtl_batch_args=self.pack_addtl_batch_args(), - exclusive=self._exclusive['allocation'] + exclusive=step_exclusive['allocation'] ) return SubmissionRecord(submit_status, retcode, jobid) diff --git a/maestrowf/interfaces/script/slurmscriptadapter.py b/maestrowf/interfaces/script/slurmscriptadapter.py index 3fe1ea83..f0751235 100644 --- a/maestrowf/interfaces/script/slurmscriptadapter.py +++ b/maestrowf/interfaces/script/slurmscriptadapter.py @@ -96,7 +96,7 @@ def __init__(self, **kwargs): } self._ntask_header = "#SBATCH --ntasks={procs}" - self._exclusive = "#SBATCH --exclusive" + self._exclusive_header = "#SBATCH --exclusive" self._qos = "#SBATCH --qos={qos}" self._cmd_flags = { @@ -154,9 +154,10 @@ def get_header(self, step): if procs_in_batch or not nodes: modified_header.append(self._ntask_header.format(**resources)) - exclusive = resources.get("exclusive", False) - if exclusive: - modified_header.append(self._exclusive) + exclusive = self.get_exclusive(resources.get("exclusive", False)) + + if exclusive['allocation']: + modified_header.append(self._exclusive_header) qos = resources.get("qos") if qos: From ea2d3b44911e28ecab2beff98b85efacda750a4d Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 14 Aug 2025 12:07:51 -0700 Subject: [PATCH 04/33] update method type on exclusive sanitizer --- maestrowf/abstracts/interfaces/schedulerscriptadapter.py | 4 ++-- maestrowf/interfaces/script/fluxscriptadapter.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/maestrowf/abstracts/interfaces/schedulerscriptadapter.py b/maestrowf/abstracts/interfaces/schedulerscriptadapter.py index 5dfcace1..4f3b979e 100644 --- a/maestrowf/abstracts/interfaces/schedulerscriptadapter.py +++ b/maestrowf/abstracts/interfaces/schedulerscriptadapter.py @@ -92,8 +92,8 @@ def add_batch_parameter(self, name, value): """ self._batch[name] = value - @classmethod - def get_exclusive(cls, step_exclusive): + @staticmethod + def get_exclusive(step_exclusive): """ Helper for normalizing new/legacy exclusive syntax in the step keys. diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index 59591e67..f9dbbd4b 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -380,7 +380,7 @@ def get_parallelize_command(self, procs, nodes=None, **kwargs): # TODO: fix this temp hack when standardizing the exclusive key handling kwargs['exclusive'] = step_exclusive['launcher'] - + return self._interface.parallelize( procs, nodes=ntasks, addtl_args=self._addl_args, launcher_args=self._launcher_args, **kwargs) From cd632b9cc51e246449372b61bb1191eef967a5e9 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 14 Aug 2025 17:10:21 -0700 Subject: [PATCH 05/33] Initial documentation for new optional argument handling in batch block --- docs/Maestro/scheduling.md | 84 +++++++++++++++++++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/docs/Maestro/scheduling.md b/docs/Maestro/scheduling.md index 7efec08e..11d650a4 100644 --- a/docs/Maestro/scheduling.md +++ b/docs/Maestro/scheduling.md @@ -23,9 +23,13 @@ steps: | `procs` | No | int | Optional number of tasks in batch allocations: note this is also a per step key | | `flux_uri` | Yes* | str | URI of the Flux instance to schedule jobs to. * Only used with `type`=`flux`. NOTE: It is recommended to rely on environment variables instead, as URIs are very ephemeral and may change frequently.| | `version` | No | str | Optional version of flux scheduler; for accommodating api changes | -| `args` | No | dict | Optional additional args to pass to scheduler; keys are arg names, values are arg values | +| :warning: `args` | No | dict | Optional additional args to pass to scheduler; keys are arg names, values are arg values | +| `allocation_args` | No | dict | Optional scheduler specific options/flags to add to allocations :material-information-slab-circle: flux only in :material-tag:`1.1.12` | +| `launcher_args` | No | dict | Optional scheduler specific options/flags to add to launcher commands :material-information-slab-circle: flux only in :material-tag:`1.1.12` | +!!! warning "`args` deprecated" + `args` has been marked deprecated in :material-tag: `1.1.12` in favor of the more flexible `allocation_args` and `launcher_args` The information in this block is used to populate the step specific batch scripts with the appropriate header comment blocks (e.g. '#SBATCH --partition' for slurm). Additional keys such as step specific @@ -40,6 +44,26 @@ run locally unless at least the ``nodes`` or ``procs`` key in the step is popula See [queues and banks](how_to_guides/running_with_flux.md#queues-and-banks) section in the how-to guide on running with flux for more discussion. +### Extra Arguments +--- + +There are new groups in the batch block in :material-tag:`1.1.12` that facilitate adding custom options to both allocations and the `$(LAUNCHER)` invocations independently. These are grouped into two dictionaries in the batch block which are meant to enable passing in options that Maestro cannot abstract across schedulers more generally: + +* `allocation_args` for the allocation target (batch directives such as `#Flux: --setopt=foo=bar`) +* `launcher_args` for the `$(LAUNCHER)` target (`flux run --setopt=foo=bar`) + +These are ~structured mappings which are designed to mimic cli usage for intuitive mapping from raw scheduler usage to Maestro. Each of these dictionaries' keys correspond to a scheduler specific CLI argument/option or flag. The serialization rules are as follows, with specific examples here shown for the initial implementation in the flux adapter (other schedulers will yield prefix/separator rules specific to their implementation): + +| **Key Type** | **CLI Prefix** | **Separator** | **Example YAML** | **Example CLI Output** | +| :- | :-: | :-: | :-: | :- | +| Single letter | `-` | space (`" "`) | `o: {bar: 42}` | `-o bar=42` | +| Multi-letter | `--` | `=` | `setopt: {foo: bar} | `--setopt=foo=bar` | +| Boolean flag w/key | as above | as above | `setopt: {foobar: } | `--setopt=foobar` | +| Boolean flag w/o key | as above | as above | `exclusive: ` | `--exclusive` | + +Note in the boolean flag strategies, a space is required after the `:` after `foobar: ` or `exclusive`, otherwise yaml will fail to parse and assign the Null value used to tag a key as a boolean flag. See [flux](#Flux) for special considerations fo the `allocation_args` + + ## LAUNCHER Token --- @@ -187,6 +211,64 @@ See the [flux framework](https://flux-framework.readthedocs.io/en/latest/index.h The Flux scheduler itself and Maestro's flux adapter are still in a state of flux and may go through breaking changes more frequently than the Slurm and LSF scheduler adapters. +### Extra Flux Args +---- + +As of :material-tag:`1.1.12`, the flux adapter takes advantage of new argument pass through for scheduler options that Maestro cannot abstract away. This is done via `allocation_args` and `launcher_args` in the batch block, which expand upon the previous `args` input which only applied to `$(LAUNCHER)`. There are some caveat's here due to the way Maestro talks to flux. The current flux adapters all use the python api's from Flux to build the batch jobs, with the serialized batch script being serialized separately instead of submitted directly as with the other schedulers. A consequence of this is the `allocation_args` map to specific call points on that python api, and thus the option pass through is not quite arbitrary. There are 4 currently supported options for allocations which cover a majority of usecases (open an issue and let us know if one you need isn't covered!): + +* shell options: `-o/--setopt` prefixed arguments +* attributes: `-S/--setattr` prefixed arguments +* conf: `--conf` prefixed arguments +* exclusive flags: `-x, --exclusive` are used to set defaults, with step exclusive keys overriding + +!!! warning + + All other flags will be allowed in `allocation_args`, but they will essentially be ignored when serializing the step scripts and submitting jobs + +The `launcher_args` (`$(LAUNCHER)`) will pass through anything as it is a string generator just like other script adapters. :warning: These are not validated! Passing arguments that flux doesn't know what to do with may result in errors. + +#### Example Batch Block +--- + +``` yaml +batch: + type: flux + host: machineA + bank: guests + queue: debug + allocation_args: + setopt: + foo: bar + o: + bar: 42 + setattr: + foobar: "whoops" + conf: + resource.rediscover: "true" # Use string "true" for Flux compatibility, not "True" or bool True + launcher_args: + setopt: + optiona: # Boolean flag, no value needed +``` + +#### Example Batch Script +--- +Assuming the step has keys `{procs: 1, nodes: 1, cores per task: 1, walltime: "5:00"}`: + +``` console +#flux: -q debug +#flux: --bank=guests +#flux: -t 300s +#flux: --setopt=foo=bar +#flux: -o bar=42 +#flux: --setattr=foobar=whoops +#flux: --conf=resource.rediscover=true + +flux run -n 1 -N 1 -c 1 --setopt=optiona myapplication +``` + +!!! note + + Using flux directives here to illustrate even though python api is used. These directives will be in the step scripts, retaining repeatability/record of what was submitted and viewable with the dry run feature ## LSF: a Tale of Two Launchers ---- From 1e824e18a970729289f4d86b9bd520e768751fb4 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Tue, 26 Aug 2025 10:39:53 -0700 Subject: [PATCH 06/33] Fix stray refactoring debris --- maestrowf/interfaces/script/_flux/flux0_49_0.py | 8 ++++---- maestrowf/interfaces/script/fluxscriptadapter.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/maestrowf/interfaces/script/_flux/flux0_49_0.py b/maestrowf/interfaces/script/_flux/flux0_49_0.py index 36d53e06..98bf1dc4 100644 --- a/maestrowf/interfaces/script/_flux/flux0_49_0.py +++ b/maestrowf/interfaces/script/_flux/flux0_49_0.py @@ -226,8 +226,8 @@ def submit( else: LOGGER.warn("Flux adapter v0.49 received null value of '%s'" " for attribute '%s'. Omitting from jobspec.", - batch_opt_name, - batch_opt_value) + batch_attr_value, + batch_attr_name) # Add in job shell options if not null (-o/--setopt) for batch_opt_name, batch_opt_value in addtl_batch_args["setopt"].items(): @@ -236,8 +236,8 @@ def submit( else: LOGGER.warn("Flux adapter v0.49 received null value of '%s'" " for shell option '%s'. Omitting from jobspec.", - batch_opt_name, - batch_opt_value) + batch_opt_value, + batch_opt_name) if walltime > 0: jobspec.duration = walltime diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index f9dbbd4b..dd71fd0a 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -164,7 +164,7 @@ def __init__(self, **kwargs): if alloc_eflags: if step_exclusive: LOGGER.warn("Overriding batch block allocation_args with steps exclusive setting '%s'", - exclusive) + step_exclusive) else: self._exclusive['allocation'] = True @@ -175,7 +175,7 @@ def __init__(self, **kwargs): if launcher_eflags: if step_exclusive and self._exclusive['launcher']: LOGGER.warn("Overriding batch block launcher_args with steps exclusive setting '%s'", - exclusive) + step_exclusive) else: self._exclusive['launcher'] = True From 0a79699a6da0cbac1697a7c86c13f96245cf3b05 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Tue, 9 Sep 2025 18:04:10 -0700 Subject: [PATCH 07/33] Fixtures and tests for verifying flux script writing correctly passes through allocation and and launcher args --- tests/conftest.py | 200 +++++++++++++++++- ...ye_world_GREETING.Hello.NAME.Pam.flux.sh.1 | 15 ++ ...lo_world_GREETING.Hello.NAME.Pam.flux.sh.1 | 15 ++ .../specs/hello_bye_parameterized_flux_1.yaml | 67 ++++++ .../script/test_fluxscriptadapter.py | 83 +++++++- 5 files changed, 375 insertions(+), 5 deletions(-) create mode 100644 tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 create mode 100644 tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 create mode 100644 tests/data/specs/hello_bye_parameterized_flux_1.yaml diff --git a/tests/conftest.py b/tests/conftest.py index 3d1863d3..17de4523 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,10 +1,24 @@ from collections import defaultdict +import difflib +import logging import os +from pathlib import Path +import re +import shutil from subprocess import check_output import pytest -from maestrowf.utils import parse_version +from maestrowf.datastructures.core import Study +from maestrowf.datastructures.environment import Variable +from maestrowf.specification import YAMLSpecification + +from maestrowf.utils import ( + create_parentdir, + LoggerUtility, + make_safe_path, + parse_version +) from packaging.version import InvalidVersion SCHEDULERS = set(('sched_lsf', 'sched_slurm', 'sched_flux')) @@ -143,3 +157,187 @@ def load_status_csv(file_name): return os.path.join(dirpath, "status", "test_status_data", file_name) return load_status_csv + + +@pytest.fixture +def data_dir(): + """Base directory for top level shared test data.""" + return Path(__file__).parent / "data" + + +@pytest.fixture +def variant_expected_output(data_dir): + def _load_variant_expected_output(expected_output_name): + return data_dir / "expected_spec_outputs" / expected_output_name + + return _load_variant_expected_output + + +@pytest.fixture +def variant_spec_path(data_dir): + """Utility fixture to load yaml spec's from top level shared test data""" + def _load_variant_spec(spec_name): + # Not loading it here: defer to yamlspecification.. + return data_dir / "specs" / spec_name + return _load_variant_spec + + +@pytest.fixture +def load_study(): + """Fixture to provide an unexecuted study object""" + def _load_study(spec_path, output_path, dry_run=False): + + # Setup some default args to use for testing + use_tmp_dir = True # NOTE: likely want to let pytest control this? + hash_ws = False + throttle = 0 + submission_attempts = 3 + restart_limit = 1 + + # Load the Specification + spec = YAMLSpecification.load_specification(spec_path) + + environment = spec.get_study_environment() + steps = spec.get_study_steps() + + # Set up the output directory. + out_dir = environment.remove("OUTPUT_PATH") + if output_path: + # If out is specified in the args, ignore OUTPUT_PATH. + output_path = os.path.abspath(output_path) + else: + if out_dir is None: + # If we don't find OUTPUT_PATH in the environment, assume pwd. + out_dir = os.path.abspath("./") + else: + # We just take the value from the environment. + out_dir = os.path.abspath(out_dir.value) + + out_name = spec.name.replace(" ", "_") + # NOTE: shouldn't need timestamp for testing; omitting for now + # out_name = "{}_{}".format( + # spec.name.replace(" ", "_"), + # time.strftime("%Y%m%d-%H%M%S") + # ) + output_path = make_safe_path(out_dir, *[out_name]) + environment.add(Variable("OUTPUT_PATH", output_path)) + + # Set up file logging + create_parentdir(os.path.join(output_path, "logs")) + output_path = Path(output_path) + log_path = output_path / "logs" / "{}.log".format(spec.name) + # log_path = os.path.join(output_path, "logs", "{}.log".format(spec.name)) + # TODO: revisit this logger/name -> don't use __name__ as in maestro.py? + LOGGER = logging.getLogger() + LOG_UTIL = LoggerUtility(LOGGER) + LFORMAT = "[%(asctime)s: %(levelname)s] %(message)s" + LOG_UTIL.add_file_handler(log_path, LFORMAT, 2) # INFO level + + # Addition of the $(SPECROOT) to the environment. + spec_root = os.path.split(spec_path)[0] + spec_root = Variable("SPECROOT", os.path.abspath(spec_root)) + environment.add(spec_root) + + # Don't wire up pgen for now. + parameters = spec.get_parameters() + + # Setup the study. + study = Study(spec.name, spec.description, studyenv=environment, + parameters=parameters, steps=steps, out_path=output_path) + + # Set up the study workspace and configure it for execution. + study.setup_workspace() + study.configure_study( + throttle=throttle, + submission_attempts=submission_attempts, + restart_limit=restart_limit, + use_tmp=use_tmp_dir, + hash_ws=hash_ws, + dry_run=dry_run) + study.setup_environment() + + batch = {"type": "local"} + if spec.batch: + batch = spec.batch + if "type" not in batch: + batch["type"] = "local" + + # Copy the spec to the output directory + shutil.copy(spec_path, study.output_path) + + # Use the Conductor's classmethod to store the study. + # Conductor.store_study(study) + # Conductor.store_batch(study.output_path, batch) + + return study + + return _load_study + + +@pytest.fixture +def text_diff(): + """ + Fixture to diff two text streams, ignoring lines that match any pattern in + optional ignore_patterns. Ignore patterns are a list of regexes. + """ + def _diff(actual, expected, ignore_patterns=None): + """ + Compare two text streams, ignoring lines matching any ignore_patterns. + Text streams are assumed to not be split on line endings yet. + + :param actual: The actual text output (str) + :param expected: The expected text (str) + :param ignore_patterns: List of regex patterns to ignore/whitelist (optional) + :raises AssertionError: If the filtered texts do not match + """ + if ignore_patterns is None: + ignore_patterns = [] + + def line_matches(line): + return [re.search(pattern, line) for pattern in ignore_patterns] + + def filter_lines(lines): + for line in lines: + if any(line_matches(line)): + continue + yield line + + actual_lines = actual.splitlines() + if actual_lines and actual_lines[-1].strip() == "": + actual_lines.pop() + expected_lines = expected.splitlines() + if expected_lines and expected_lines[-1].strip() == "": + expected_lines.pop() + + def annotate_ignored_lines(lines_to_annotate): + for line in lines_to_annotate: + if any(line_matches(line)): + yield f"IGNORED: {line}" + else: + yield line + + actual_filtered_lines = list(filter_lines(actual_lines)) + + expected_filtered_lines = list(filter_lines(expected_lines)) + + if actual_filtered_lines != expected_filtered_lines: + actual_annotated_lines = list(annotate_ignored_lines(actual_lines)) + + expected_annotated_lines = list(annotate_ignored_lines(expected_lines)) + + diff = list( + difflib.unified_diff( + expected_annotated_lines, + actual_annotated_lines, + fromfile="expected", + tofile="actual", + lineterm="" + ) + ) + + diff = "\n".join(diff) + pytest.fail(f"Text streams differ (ignoring marked lines):\n{diff}") + + return True + + return _diff diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 new file mode 100644 index 00000000..47a0100b --- /dev/null +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 @@ -0,0 +1,15 @@ +#!/bin/bash +#flux: -N 1 +#flux: -t 60.0s +#flux: -q pdebug +#flux: --bank guests +#flux: --exclusive +#flux: --setopt=foo=bar +#flux: -o bar=42 +#flux: --setattr=gpumode=CPX +#flux: --conf=resource.rediscover=true +#INFO (flux adapter version) 0.49.0 +#INFO (flux version) 0.76.0 + +flux run -n 1 -N 1 -c 1 --setopt=fastload echo "Bye, World!" > bye.txt +flux run -n 1 -N 1 -c 1 --setopt=fastload sleep 1 diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 new file mode 100644 index 00000000..512933cb --- /dev/null +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 @@ -0,0 +1,15 @@ +#!/bin/bash +#flux: -N 1 +#flux: -t 60.0s +#flux: -q pdebug +#flux: --bank guests +#flux: --exclusive +#flux: --setopt=foo=bar +#flux: -o bar=42 +#flux: --setattr=gpumode=CPX +#flux: --conf=resource.rediscover=true +#INFO (flux adapter version) 0.49.0 +#INFO (flux version) 0.76.0 + +flux run -n 1 -N 1 -c 1 --setopt=fastload echo "Hello, Pam!" > Hello_Pam.txt +flux run -n 1 -N 1 -c 1 --setopt=fastload sleep 1 diff --git a/tests/data/specs/hello_bye_parameterized_flux_1.yaml b/tests/data/specs/hello_bye_parameterized_flux_1.yaml new file mode 100644 index 00000000..4dc0d924 --- /dev/null +++ b/tests/data/specs/hello_bye_parameterized_flux_1.yaml @@ -0,0 +1,67 @@ +description: + name: hello_bye_world_flux_1 + description: Test variant of hello_bye_parameterized that exercises extra flux args + +batch: + type : flux + host : rzadams + bank : guests + queue : pdebug + allocation_args: + setopt: # > 1 letter gets '--' prefix: --setopt foo=true + foo: bar + o: # single letter = '-' prefix; o == setopt for flux: -o bar=42 + bar: 42 + setattr: + gpumode: CPX + conf: + resource.rediscover: "true" # Flux wants 'true', not 'True' that python bool serializes to + + launcher_args: + setopt: + fastload: + + + +env: + variables: + OUTPUT_PATH: hello_bye_world + labels: + OUT_FORMAT: $(GREETING)_$(NAME).txt + +study: + - name: hello_world + description: Say hello to someone! + run: + cmd: | + $(LAUNCHER) echo "$(GREETING), $(NAME)!" > $(OUT_FORMAT) + $(LAUNCHER) sleep 1 + procs: 1 + nested: True + exclusive: True + walltime: "00:60" + + - name: bye_world + description: Say bye to someone! + run: + cmd: | + $(LAUNCHER) echo "Bye, World!" > bye.txt + $(LAUNCHER) sleep 1 + procs: 1 + nested: True + walltime: "00:60" + depends: [hello_world] + +global.parameters: + # NAME: + # values: [Pam, Jim, Michael, Dwight] + # label: NAME.%% + # GREETING: + # values: [Hello, Ciao, Hey, Hi] + # label: GREETING.%% + NAME: + values: [Pam] + label: NAME.%% + GREETING: + values: [Hello] + label: GREETING.%% diff --git a/tests/interfaces/script/test_fluxscriptadapter.py b/tests/interfaces/script/test_fluxscriptadapter.py index b433e3b1..cd14a5fd 100644 --- a/tests/interfaces/script/test_fluxscriptadapter.py +++ b/tests/interfaces/script/test_fluxscriptadapter.py @@ -34,9 +34,17 @@ as it was converted to dynamically load all ScriptAdapters using a namespace plugin methodology. """ +import re + from maestrowf.interfaces.script.fluxscriptadapter import FluxScriptAdapter from maestrowf.interfaces import ScriptAdapterFactory +from maestrowf.datastructures.core import Study +from maestrowf.datastructures.core.executiongraph import ExecutionGraph, _StepRecord +from maestrowf.specification import YAMLSpecification + +from rich.pretty import pprint +import pytest def test_flux_adapter(): """ @@ -44,7 +52,7 @@ def test_flux_adapter(): this is validate that existing specifications do not break. :return: """ - assert(FluxScriptAdapter.key == 'flux') + assert FluxScriptAdapter.key == 'flux' def test_flux_adapter_in_factory(): @@ -55,10 +63,77 @@ def test_flux_adapter_in_factory(): """ saf = ScriptAdapterFactory # Make sure FluxScriptAdapter is in the facotries object - assert(saf.factories[FluxScriptAdapter.key] == FluxScriptAdapter) + assert saf.factories[FluxScriptAdapter.key] == FluxScriptAdapter # Make sure the FluxScriptAdapter key is in the valid adapters - assert(FluxScriptAdapter.key in ScriptAdapterFactory.get_valid_adapters()) + assert FluxScriptAdapter.key in ScriptAdapterFactory.get_valid_adapters() # Make sure that get_adapter returns the FluxScriptAdapter when asking # for it by key - assert(ScriptAdapterFactory.get_adapter(FluxScriptAdapter.key) == + assert (ScriptAdapterFactory.get_adapter(FluxScriptAdapter.key) == FluxScriptAdapter) + + +@pytest.mark.parametrize( + "spec_file, variant_id, expected_batch_files", + [ + ( + "hello_bye_parameterized_flux", + 1, + { # NOTE: should we contsruct this, and just use study + step_id + sched.sh.variant_id? + "hello_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1", + "bye_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1" + } + ), + ] +) +def test_flux_script_serialization( + spec_file, + variant_id, + expected_batch_files, + variant_spec_path, + load_study, + variant_expected_output, + text_diff, + tmp_path # Pytest tmp dir fixture: Path()): +): + spec_path = variant_spec_path(spec_file + f"_{variant_id}.yaml") + pprint(spec_path) + yaml_spec = YAMLSpecification.load_specification(spec_path) # Load this up to get the batch info + + study: Study = load_study(spec_path, tmp_path, dry_run=True) + + pkl_path: str + ex_graph: ExecutionGraph + pkl_path, ex_graph = study.stage() + ex_graph.set_adapter(yaml_spec.batch) + + adapter = ScriptAdapterFactory.get_adapter(yaml_spec.batch["type"]) + adapter = adapter(**ex_graph._adapter) + + # Setup a diff ignore pattern to filter out the #INFO (flux version ... + ignore_patterns = [ + re.compile(r'#INFO \(flux version\)') + ] + + # Loop over the steps and execute them + for step_name, step_record in ex_graph.values.items(): + if step_name == "_source": + continue + + ex_graph._execute_record(step_record, adapter) + # pprint("Step name:") + # pprint(step_name) + # pprint("Step record:") + # pprint(step_record) + # # pprint(_record) + # pprint("Written script:") + with open(step_record.script, "r") as written_script_file: + written_script = written_script_file.read() + # pprint(written_script.splitlines()) + + assert step_name in expected_batch_files + + with open(variant_expected_output(expected_batch_files[step_name]), 'r') as ebf: + expected_script = ebf.read() + + # assert written_script == expected_script + assert text_diff(written_script, expected_script, ignore_patterns=ignore_patterns) From dfb0aea218c3f980f64c1632dff0f3c99a023a3d Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 11 Sep 2025 11:09:08 -0700 Subject: [PATCH 08/33] Guard flux script writer to only run when flux is present --- tests/interfaces/script/test_fluxscriptadapter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/interfaces/script/test_fluxscriptadapter.py b/tests/interfaces/script/test_fluxscriptadapter.py index cd14a5fd..a338b812 100644 --- a/tests/interfaces/script/test_fluxscriptadapter.py +++ b/tests/interfaces/script/test_fluxscriptadapter.py @@ -72,6 +72,7 @@ def test_flux_adapter_in_factory(): FluxScriptAdapter) +@pytest.mark.sched_flux # This one needs to at least import flux to work @pytest.mark.parametrize( "spec_file, variant_id, expected_batch_files", [ From 4e70971635a1425f1a09b3e36118c6d311f830c9 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:16:28 -0700 Subject: [PATCH 09/33] Add helper for flattening dictionaries into dot syntax strings --- maestrowf/utils.py | 40 +++++++++++++++++++++++++++++++++++ tests/utils/test_utils.py | 44 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 83 insertions(+), 1 deletion(-) diff --git a/maestrowf/utils.py b/maestrowf/utils.py index 6873f654..cbb44d51 100644 --- a/maestrowf/utils.py +++ b/maestrowf/utils.py @@ -385,3 +385,43 @@ def parse_version(version_string): LOGGER.info("Could not parse version '%s'", version_string) raise InvalidVersion + + +def dict_to_dot_strings(dictionary, prefix='', result=None, print_none=True): + """ + Helper to convert dictionaries into dot syntax strings, e.g. convertin + from maestro's yaml into flux's treedict style syntax for more concise + batch directives. + + :param dictionary: Dictionary of args to flatten + :type dictionary: dict + :param prefix: Prefix to use at current level (used for recursion) + :type prefix: str + :param result: Accumulated result list (used for recursion) + :type result: list or None + :param print_none: Flag to print =None literal if value=None, or "" + :type print_none: bool + :returns: list of string views of dictionary in 'key.value' syntax + :rtype: list + + Example + >>> dict_to_dot_strings({"foo": {"bar": 2, "foo2": 42}}) + ['foo.bar=2', 'foo.foo2=42'] + """ + if result is None: + result = [] + + for key, value in dictionary.items(): + # Create the current key path + current_key = f"{prefix}.{key}" if prefix else key + + if isinstance(value, dict): + # Recursively process nested dictionaries + dict_to_dot_strings(value, current_key, result, print_none) + elif value is None and not print_none: + result.append(f"{current_key}") + else: + # Add the terminal value and complete the string + result.append(f"{current_key}={value}") + + return result diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index ed189261..889251af 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -7,7 +7,7 @@ from maestrowf.datastructures.core import ParameterGenerator from maestrowf.datastructures.core.parameters import Combination -from maestrowf.utils import make_safe_path, parse_version +from maestrowf.utils import make_safe_path, parse_version, dict_to_dot_strings from packaging.version import Version, InvalidVersion from hypothesis import given, HealthCheck, settings, strategies @@ -174,3 +174,45 @@ def test_path_sanitizer(tmpdir, test_path_str): # Handle case of strings that only contain characters make_safe_path says are invalid, # leading to empty strings. assert test_path_name == "" + + +@pytest.mark.parametrize( + "dict_to_flatten, print_none, expected_strings", + [ + ( + {"foo": {"bar": 2}}, + True, + ["foo.bar=2"], + ), + ( + {"foo": {"bar": 2, "foo2": 42}}, + True, + ["foo.bar=2", "foo.foo2=42"], + ), + ( + {"foo": {"bar": 2, "foo3": None}}, + False, + ["foo.bar=2", "foo.foo3"], + ), + ( + {"foo": {"bar": 2, "foo3": None}}, + True, + ["foo.bar=2", "foo.foo3=None"], + ), + ( + { + "foo": {"bar": 2, "foo2": 42}, + "foobar": {"too_many_foos": 9001}, + }, + True, + ["foo.bar=2", "foo.foo2=42", "foobar.too_many_foos=9001"], + ), + ], +) +def test_dict_to_strings(dict_to_flatten, print_none, expected_strings): + """ + Test flattening of dictionaries to lists of dot syntax strings + """ + dot_strings = dict_to_dot_strings(dict_to_flatten, print_none=print_none) + + assert dot_strings == expected_strings From d450bbd8f5783ec237a14a72598cf316d97b820d Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:18:34 -0700 Subject: [PATCH 10/33] Update flux script test and test spec to use new dot string util for flux directives --- .../interfaces/script/_flux/flux0_49_0.py | 25 +++++++++++++------ .../interfaces/script/fluxscriptadapter.py | 2 +- .../specs/hello_bye_parameterized_flux_1.yaml | 3 ++- 3 files changed, 20 insertions(+), 10 deletions(-) diff --git a/maestrowf/interfaces/script/_flux/flux0_49_0.py b/maestrowf/interfaces/script/_flux/flux0_49_0.py index 98bf1dc4..b90c7b35 100644 --- a/maestrowf/interfaces/script/_flux/flux0_49_0.py +++ b/maestrowf/interfaces/script/_flux/flux0_49_0.py @@ -10,6 +10,7 @@ SubmissionCode, ) from maestrowf.abstracts.interfaces.flux import FluxInterface +from maestrowf.utils import dict_to_dot_strings LOGGER = logging.getLogger(__name__) @@ -35,7 +36,7 @@ class FluxInterface_0490(FluxInterface): # Config for the groups of alloc args with their own jobspec methods known_alloc_arg_types = ["attributes", "shell_options", "conf"] - addtl_alloc_arg_type_map = { + _addtl_alloc_arg_type_map = { "setopt": "shell_options", "o": "shell_options", "setattr": "attributes", @@ -70,7 +71,7 @@ def addtl_alloc_arg_type_map(cls, option): :param option: option string corresponding to flux cli input :return: string, one of known_alloc_arg_types """ - return cls.addtl_alloc_arg_type_map.get(option, None) + return cls._addtl_alloc_arg_type_map.get(option, None) @classmethod def get_flux_urgency(cls, urgency) -> int: @@ -115,14 +116,22 @@ def render_arg_value(arg_name, arg_value): for arg_key, arg_value in args_dict.items(): arg_info = arg_info_type(arg_key) - for av_name, av_value in arg_value.items(): - value_str = render_arg_value(av_name, av_value) - yield "{prefix}{key}{sep}{value}".format( + dot_string_args = dict_to_dot_strings(arg_value, print_none=False) + for dot_string_arg in dot_string_args: + yield "{prefix}{key}{sep}{dotstringvalue}".format( prefix=arg_info['prefix'], key=arg_key, sep=arg_info['sep'], - value=value_str + dotstringvalue=dot_string_arg ) + # for av_name, av_value in arg_value.items(): + # value_str = render_arg_value(av_name, av_value) + # yield "{prefix}{key}{sep}{value}".format( + # prefix=arg_info['prefix'], + # key=arg_key, + # sep=arg_info['sep'], + # value=value_str + # ) @classmethod def submit( @@ -220,7 +229,7 @@ def submit( # Slurp in extra attributes if not null (queue, bank, ..) # (-S/--setattr) - for batch_attr_name, batch_attr_value in addtl_batch_args["setattr"].items(): + for batch_attr_name, batch_attr_value in addtl_batch_args["attributes"].items(): if batch_attr_value: jobspec.setattr(batch_attr_name, batch_attr_value) else: @@ -230,7 +239,7 @@ def submit( batch_attr_name) # Add in job shell options if not null (-o/--setopt) - for batch_opt_name, batch_opt_value in addtl_batch_args["setopt"].items(): + for batch_opt_name, batch_opt_value in addtl_batch_args["shell_options"].items(): if batch_opt_value: jobspec.setattr_shell_option(batch_opt_name, batch_opt_value) else: diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index dd71fd0a..93f9e24d 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -280,7 +280,7 @@ def pack_addtl_batch_args(self): """ addtl_batch_args = { arg_type: {} - for arg_type in self._interface.addtl_alloc_arg_types + for arg_type in self._interface.addtl_alloc_arg_types() } for arg_key, arg_values in self._allocation_args.items(): # TODO: move this into a validation function for pre launch diff --git a/tests/data/specs/hello_bye_parameterized_flux_1.yaml b/tests/data/specs/hello_bye_parameterized_flux_1.yaml index 4dc0d924..8bffcad0 100644 --- a/tests/data/specs/hello_bye_parameterized_flux_1.yaml +++ b/tests/data/specs/hello_bye_parameterized_flux_1.yaml @@ -15,7 +15,8 @@ batch: setattr: gpumode: CPX conf: - resource.rediscover: "true" # Flux wants 'true', not 'True' that python bool serializes to + resource: + rediscover: "true" # Flux wants 'true', not 'True' that python bool serializes to launcher_args: setopt: From af62de276600ba4d06fb06cec8aa1c57cb590ff5 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 18 Sep 2025 18:19:45 -0700 Subject: [PATCH 11/33] Add test for verifying live jobspecs get correct allocation and launcher args --- tests/conftest.py | 60 ++++++++++++++ .../script/test_fluxscriptadapter.py | 83 +++++++++++++++++++ 2 files changed, 143 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index 17de4523..6e3d4e63 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -341,3 +341,63 @@ def annotate_ignored_lines(lines_to_annotate): return True return _diff + + +@pytest.mark.sched_flux +@pytest.fixture +def flux_jobspec_check(): + import flux + + def _diff_jobspec_keys(jobid, expected, path=None): + """ + Helper to recursively check for values in flux jobspec's. + + :param jobid: flux jobid to look up jobspec for (int or f58) + :param expected: nested dicts of key/values to verify in jobspec (dict) + :param path: optional initial path in jobspec dict to search under + """ + # NOTE: may need some helpers here if needing to mess with uri to change broker + fh = flux.Flux() + + # Get the jobspec (do we care about original?) + # NOTE: job_kvs_lookup vs job_info_lookup? does it matter? + + # Default returns id, jobspec keys + jobspec = flux.job.job_kvs_lookup(fh, flux.job.JobID(jobid), decode=True, original=False)['jobspec'] + + def assert_nested_dict_subset(actual, expected, path=None): + """Recursively assert key/values in actual/expected dictionaries""" + if path is None: + path = [] + + for key, expected_value in expected.items(): + current_path = path + [repr(key)] + assert key in actual, f"Missing key at {'.'.join(current_path)}" + actual_value = actual[key] + if isinstance(expected_value, dict): + assert isinstance(actual_value, dict), ( + f"Expected dict at {'.'.join(current_path)}, " + f"got {type(actual_value).__name__}" + ) + assert_nested_dict_subset(actual_value, + expected_value, + current_path) + elif isinstance(expected_value, list): + assert isinstance(actual_value, list), ( + f"Expected list at {'.'.join(current_path)}, " + f"got {type(actual_value).__name__}" + ) + for i, item in enumerate(expected_value): + if item is not ...: # Ellipsis means "skip this index" + assert_nested_dict_subset(actual_value[i], + item, + current_path + [str(i)]) + else: + assert actual_value == expected_value, ( + f"Value mismatch at {'.'.join(current_path)}: " + f"expected {expected_value!r}, got {actual_value!r}" + ) + + assert_nested_dict_subset(jobspec, expected, path=path) + + return _diff_jobspec_keys diff --git a/tests/interfaces/script/test_fluxscriptadapter.py b/tests/interfaces/script/test_fluxscriptadapter.py index a338b812..f3092e22 100644 --- a/tests/interfaces/script/test_fluxscriptadapter.py +++ b/tests/interfaces/script/test_fluxscriptadapter.py @@ -138,3 +138,86 @@ def test_flux_script_serialization( # assert written_script == expected_script assert text_diff(written_script, expected_script, ignore_patterns=ignore_patterns) + + + +@pytest.mark.sched_flux # This one needs to at least import flux to work +@pytest.mark.parametrize( + "spec_file, variant_id, expected_jobspec_keys", + [ + ( + "hello_bye_parameterized_flux", + 1, + { # NOTE: should we contsruct this, and just use study + step_id + sched.sh.variant_id? + "hello_world_GREETING.Hello.NAME.Pam": { + 'resources': [{'exclusive': True}], + 'attributes': { + 'system': { + 'shell': { + 'options': {'bar': 42, 'foo': 'bar'}, + }, + 'files': { + 'conf.json': {'data': {'resource': {'rediscover': True}}} + }, + 'gpumode': 'SPC' + } + } + }, + # "bye_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1" + } + ), + ] +) +def test_flux_job_opts( + spec_file, + variant_id, + expected_jobspec_keys, + variant_spec_path, + load_study, + flux_jobspec_check, + tmp_path # Pytest tmp dir fixture: Path()): +): + spec_path = variant_spec_path(spec_file + f"_{variant_id}.yaml") + pprint(spec_path) + yaml_spec = YAMLSpecification.load_specification(spec_path) # Load this up to get the batch info + + study: Study = load_study(spec_path, tmp_path, dry_run=False) + + pkl_path: str + ex_graph: ExecutionGraph + pkl_path, ex_graph = study.stage() + ex_graph.set_adapter(yaml_spec.batch) + + adapter = ScriptAdapterFactory.get_adapter(yaml_spec.batch["type"]) + adapter = adapter(**ex_graph._adapter) + + # Setup a diff ignore pattern to filter out the #INFO (flux version ... + ignore_patterns = [ + re.compile(r'#INFO \(flux version\)') + ] + + # Loop over the steps and execute them + for step_name, step_record in ex_graph.values.items(): + if step_name == "_source": + continue + + ex_graph._execute_record(step_record, adapter) + pprint("Step name:") + pprint(step_name) + pprint("Step record:") + pprint(step_record) + + retcode, job_status = ex_graph.check_study_status() + + pprint(f"{retcode=}, {job_status=}") + for record_name, status in job_status.items(): + if step_name == record_name: + jobid = ex_graph.values[record_name].jobid[-1] + flux_jobspec_check(jobid, expected_jobspec_keys[step_name]) + + # pprint(_record) + # pprint("Written script:") + + # assert step_name in expected_batch_files + + assert False From 567e51c09055e1cd8fd12e84a88417c5fe1ef141 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Tue, 7 Oct 2025 10:50:32 -0700 Subject: [PATCH 12/33] Add funcs for working with dotpath dicts/tuples and tests --- maestrowf/utils.py | 100 ++++++++++++++++++++++++++++++++++++- tests/utils/test_utils.py | 101 +++++++++++++++++++++++++++++++++++++- 2 files changed, 199 insertions(+), 2 deletions(-) diff --git a/maestrowf/utils.py b/maestrowf/utils.py index cbb44d51..50b5aa30 100644 --- a/maestrowf/utils.py +++ b/maestrowf/utils.py @@ -30,6 +30,8 @@ """A collection of more general utility functions.""" from collections import OrderedDict +from collections.abc import Mapping + import coloredlogs import logging import os @@ -44,6 +46,8 @@ from packaging.version import parse as pkg_ver_parse from packaging.version import Version, InvalidVersion +from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple + LOGGER = logging.getLogger(__name__) _SEMVER_REGEX = re.compile(r"""^(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)\.(?P0|[1-9]\d*)(?:-(?P(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+(?P[0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$""") @@ -389,7 +393,7 @@ def parse_version(version_string): def dict_to_dot_strings(dictionary, prefix='', result=None, print_none=True): """ - Helper to convert dictionaries into dot syntax strings, e.g. convertin + Helper to convert dictionaries into dot syntax strings, e.g. converting from maestro's yaml into flux's treedict style syntax for more concise batch directives. @@ -425,3 +429,97 @@ def dict_to_dot_strings(dictionary, prefix='', result=None, print_none=True): result.append(f"{current_key}={value}") return result + + +def iter_dotpath_items(mapping: Mapping[str, Any], sep: str = ".", prefix: Optional[str] = None): + """ + Yield (dotpath, value) pairs for all leaves in a nested mapping/dictionary. + Leaves are any non-mapping objects. + + :param mapping: Mapping to flatten into list of dotpath strings + :type mapping: collections.abc.Mapping + :param sep: Optional separator used to join keys. Default = '.' + :type sep: str + :yields: Tuples of (dotpath, value) + :ytype: tuple[str, any] + """ + if not isinstance(mapping, Mapping): + raise TypeError("iter_dotpath_items expects a mapping at the root") + + def join(base: str, key: str) -> str: + """Helper to join two keys if non-terminal""" + return f"{base}{sep}{key}" if base else key + + def walk(node: Any, base: str) -> Iterator[Tuple[str, Any]]: + """Recursively walk the mapping, depth first""" + if isinstance(node, Mapping): + for k, v in node.items(): + ks = str(k) + yield from walk(v, join(base, ks)) + + else: + # Node is a leaf (anything but a mapping) + yield (base, node) + + root = prefix or "" + yield from walk(mapping, root) + + +def unflatten_dotpath_tuples(dotpaths: List[Tuple[str, Any]], sep: str = ".") -> Dict[str, Any]: + """ + Flatten a list of (dotpath_string, value) back into a nested dictionary. + + Note this does not account for '=value' being in the strings as the source + of this is expected to be dictionaries from the study spec in yaml, not a + fully rendered cli directives. + + :param dotpaths: Tuple of dotpath string flattend dicts and values + :type dotpaths: Tuple[str, Any] + :param sep: Separator used to split dotpath strings into nested keys. + Default = "." + :type sep: str + :returns: nested dictionary view of the dotpath flattened input + + """ + root: Dict[str, Any] = {} + for path, value in dotpaths: + segments = str(path).split(sep) if path else [""] + cursor: Dict[str, Any] = root + for seg in segments[:-1]: + if seg not in cursor or not isinstance(cursor[seg], dict): + cursor[seg] = {} + cursor = cursor[seg] + + cursor[segments[-1]] = value + + return root + + +def unflatten_dotpath_dict(dotpath_dict: Dict[str, Any], sep: str = ".") -> Dict[str, Any]: + """ + Flatten a dictionary that may contain dotpath encoded keys into a pure + nested dictionary. + + Note this does not account for '=value' being in the strings as the source + of this is expected to be dictionaries from the study spec in yaml, not a + fully rendered cli directives. + + :param dotpaths: Tuple of dotpath string flattend dicts and values + :type dotpaths: Tuple[str, Any] + :param sep: Separator used to split dotpath strings into nested keys. + Default = "." + :type sep: str + :returns: nested dictionary view of the dotpath flattened input + """ + root: Dict[str, Any] = {} + for path, value in dotpath_dict.items(): + segments = str(path).split(sep) if path else [""] + cursor: Dict[str, Any] = root + for seg in segments[:-1]: + if seg not in cursor or not isinstance(cursor[seg], dict): + cursor[seg] = {} + cursor = cursor[seg] + + cursor[segments[-1]] = value + + return root diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 889251af..9f5ac606 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -7,7 +7,14 @@ from maestrowf.datastructures.core import ParameterGenerator from maestrowf.datastructures.core.parameters import Combination -from maestrowf.utils import make_safe_path, parse_version, dict_to_dot_strings +from maestrowf.utils import ( + make_safe_path, + parse_version, + dict_to_dot_strings, + iter_dotpath_items, + unflatten_dotpath_tuples, + unflatten_dotpath_dict, +) from packaging.version import Version, InvalidVersion from hypothesis import given, HealthCheck, settings, strategies @@ -216,3 +223,95 @@ def test_dict_to_strings(dict_to_flatten, print_none, expected_strings): dot_strings = dict_to_dot_strings(dict_to_flatten, print_none=print_none) assert dot_strings == expected_strings + +@pytest.mark.parametrize( + "dict_to_flatten, expected_tuples", + [ + ( + {"foo": {"bar": 2}}, + [("foo.bar", 2)], + ), + ( + {"foo": {"bar": 2, "foo2": 42}}, + [("foo.bar", 2), ("foo.foo2", 42)], + ), + ( + {"foo": {"bar": 2, "foo3": None}}, + [("foo.bar", 2), ("foo.foo3", None)], + ), + ( + { + "foo": {"bar": 2, "foo2": 42}, + "foobar": {"too_many_foos": 9001}, + "foo.foobar": 19 + }, + [("foo.bar", 2), ("foo.foo2", 42), ("foobar.too_many_foos", 9001), ("foo.foobar", 19)], + ), + ], +) +def test_dict_to_dotpath_tuples(dict_to_flatten, expected_tuples): + """ + Test flattening of dictionaries to lists of dot syntax strings + """ + dotpath_tuples = list(iter_dotpath_items(dict_to_flatten)) + + assert dotpath_tuples == expected_tuples + + +@pytest.mark.parametrize( + "dotpaths_to_unflatten, expected_dicts", + [ + ( + [("foo.bar", 2)], + {"foo": {"bar": 2}}, + ), + ( + [("foo.bar", 2), ("foo.foo2", 42)], + {"foo": {"bar": 2, "foo2": 42}}, + ), + ( + [("foo.bar", 2), ("foo.foo3", None)], + {"foo": {"bar": 2, "foo3": None}}, + ), + ( + [("foo.bar", 2), ("foo.foo2", 42), ("foobar.too_many_foos", 9001), ("foo.foobar", 19)], + { + "foo": {"bar": 2, "foo2": 42, "foobar": 19}, + "foobar": {"too_many_foos": 9001}, + }, + ), + ], +) +def test_unflatten_dotpath_tuples(dotpaths_to_unflatten, expected_dicts): + """ + Test unflattening of mixed dotpath tuples to nested dicts + """ + unflattened_dict = unflatten_dotpath_tuples(dotpaths_to_unflatten) + + assert unflattened_dict == expected_dicts + + +@pytest.mark.parametrize( + "dict_to_flatten, expected_dict", + [ + ( + { + "foo": {"bar": 2, "foo2": 42}, + "foo.foobar": 19, + "foobar": {"too_many_foos": 9001}, + }, + { + "foo": {"bar": 2, "foo2": 42, "foobar": 19}, + "foobar": {"too_many_foos": 9001}, + }, + ), + ], +) +def test_unflatten_dotpath_dict(dict_to_flatten, expected_dict): + """ + Test unflattening dicts that may have dotpath style keys to pure + dictionary + """ + flattened_dict = unflatten_dotpath_dict(dict_to_flatten) + + assert flattened_dict == expected_dict From cebd2175624f8aeebe3809c85c934f2c51f10996 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Tue, 7 Oct 2025 11:04:45 -0700 Subject: [PATCH 13/33] Add tests of non scalar leaf values in dotpath machinery --- tests/utils/test_utils.py | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 9f5ac606..88754fce 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -247,6 +247,14 @@ def test_dict_to_strings(dict_to_flatten, print_none, expected_strings): }, [("foo.bar", 2), ("foo.foo2", 42), ("foobar.too_many_foos", 9001), ("foo.foobar", 19)], ), + ( + { + "foo": {"bar": [2, 4], "foo2": 42}, + "foobar": {"too_many_foos": 9001}, + "foo.foobar": [19, 21] + }, + [("foo.bar", [2, 4]), ("foo.foo2", 42), ("foobar.too_many_foos", 9001), ("foo.foobar", [19, 21])], + ), ], ) def test_dict_to_dotpath_tuples(dict_to_flatten, expected_tuples): @@ -280,6 +288,13 @@ def test_dict_to_dotpath_tuples(dict_to_flatten, expected_tuples): "foobar": {"too_many_foos": 9001}, }, ), + ( + [("foo.bar", [2, 4]), ("foo.foo2", 42), ("foobar.too_many_foos", 9001), ("foo.foobar", [19, 21])], + { + "foo": {"bar": [2, 4], "foo2": 42, "foobar": [19, 21]}, + "foobar": {"too_many_foos": 9001}, + }, + ), ], ) def test_unflatten_dotpath_tuples(dotpaths_to_unflatten, expected_dicts): @@ -305,6 +320,17 @@ def test_unflatten_dotpath_tuples(dotpaths_to_unflatten, expected_dicts): "foobar": {"too_many_foos": 9001}, }, ), + ( + { + "foo": {"bar": [2, 4], "foo2": 42}, + "foo.foobar": [19, 21], + "foobar": {"too_many_foos": 9001}, + }, + { + "foo": {"bar": [2, 4], "foo2": 42, "foobar": [19, 21]}, + "foobar": {"too_many_foos": 9001}, + }, + ), ], ) def test_unflatten_dotpath_dict(dict_to_flatten, expected_dict): From 20d3409ead6177ab00cfe5d197809e27d946a589 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:13:49 -0700 Subject: [PATCH 14/33] Add value coercion for dotpath utils, recursive update --- maestrowf/utils.py | 20 +++++++++++++++++++- tests/utils/test_utils.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/maestrowf/utils.py b/maestrowf/utils.py index 50b5aa30..479ea8a5 100644 --- a/maestrowf/utils.py +++ b/maestrowf/utils.py @@ -46,7 +46,7 @@ from packaging.version import parse as pkg_ver_parse from packaging.version import Version, InvalidVersion -from typing import Any, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple +from typing import Any, Callable, Dict, Iterable, Iterator, List, Mapping, Optional, Tuple LOGGER = logging.getLogger(__name__) @@ -523,3 +523,21 @@ def unflatten_dotpath_dict(dotpath_dict: Dict[str, Any], sep: str = ".") -> Dict cursor[segments[-1]] = value return root + + +def coerce_dict_values(obj_to_transform: Any, transform: Callable)-> Dict: + if isinstance(obj_to_transform, Mapping): + return {k: coerce_dict_values(v, transform) + for k, v in obj_to_transform.items()} + + # Return transformed leaf + return transform(obj_to_transform) + + +def update_recursive(base_dict, update_dict): + for k, v in update_dict.items(): + if k in base_dict and isinstance(base_dict[k], dict) and isinstance(v, dict): + update_recursive(base_dict[k], v) + else: + base_dict[k] = v + return base_dict diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 88754fce..27830367 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -14,6 +14,7 @@ iter_dotpath_items, unflatten_dotpath_tuples, unflatten_dotpath_dict, + coerce_dict_values ) from packaging.version import Version, InvalidVersion @@ -341,3 +342,32 @@ def test_unflatten_dotpath_dict(dict_to_flatten, expected_dict): flattened_dict = unflatten_dotpath_dict(dict_to_flatten) assert flattened_dict == expected_dict + + +@pytest.mark.parametrize( + "dict_to_coerce, transform, expected_dict", + [ + ( + { + "foo": {"bar": 2, "foo2": None}, + "foobar": None, + "foobar2": 3 + }, + (lambda x: 1 if x is None else x), + { + "foo": {"bar": 2, "foo2": 1}, + "foobar": 1, + "foobar2": 3 + }, + ), + ], +) +def test_coerce_dict_values(dict_to_coerce, transform, expected_dict): + """ + Test unflattening dicts that may have dotpath style keys to pure + dictionary + """ + coerced_dict = coerce_dict_values(dict_to_coerce, transform) + + assert coerced_dict == expected_dict + assert coerced_dict != dict_to_coerce From c923e74e54ef29148ac43059df7415bdf16454d8 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:15:03 -0700 Subject: [PATCH 15/33] Refactor additional arg handling to deal with python api end points needing different formats --- maestrowf/abstracts/interfaces/flux.py | 52 +++- .../interfaces/script/_flux/flux0_49_0.py | 232 +++++++++++++----- .../interfaces/script/fluxscriptadapter.py | 112 ++++----- .../script/test_fluxscriptadapter.py | 8 +- 4 files changed, 283 insertions(+), 121 deletions(-) diff --git a/maestrowf/abstracts/interfaces/flux.py b/maestrowf/abstracts/interfaces/flux.py index 3e6d3997..5ea3d0a0 100644 --- a/maestrowf/abstracts/interfaces/flux.py +++ b/maestrowf/abstracts/interfaces/flux.py @@ -265,6 +265,7 @@ def addtl_alloc_arg_types(cls): Should we have an enum for these or something vs random strings? """ + # default nothing, overriding in implementation return [] @classmethod @@ -281,5 +282,54 @@ def render_additional_args(cls, args_dict): Promote this to the general/base adapters to handle non-normalizable scheduler/machine specific options - """ + """ yield "" + + @classmethod + def addtl_alloc_arg_type_map(cls, option): + """ + Map verbose/brief cli arg option name (o from -o, setopt from --setopt) + onto known alloc arg types this interface implements + + :param option: option string corresponding to flux cli input + :return: string, one of known_alloc_arg_types + """ + # Default to pass through, override in implementation + return option + + @classmethod + def get_addtl_arg_cli_key(cls, arg_type): + """ + Return expected cli key associated with each normalized arg type. + `arg_type` not in known_arg_types are assumed to be the key already + to facilitate flexible pass through to launcher + + :param arg_type: string noting arg group or cli key + :returns: cli key used for this arg + + .. note:: + + Can we find a reasonable default prefix (where are things put + by default in flux, attributes.system?) + """ + # Default to pass through, handling known types/mapping in implementation + return arg_type + + @staticmethod + def get_cli_arg_prefix_sep(cli_key): + """ + Helper for rendering extra options on cli/batch directives. Sets prefix + and value separator based on length of cli key. Flux has two conventions: + single letter cli_key has prefix of '-' and separator of ' ' while + multiletter cli_key has prefix of '--' and separator of '='. Examples + '-o foo=2' or '--setopt=foo=2' for single letter cli_key (o) and + multiletter (setopt) forms to set the same option. + + :param cli_key: the key to use on the cli form of an argument + :type cli_key: str + :returns: dict containing 'prefix' and 'sep' for use in rendering + """ + if len(cli_key) == 1: + return {"prefix": "-", "sep": " "} + else: + return {"prefix": "--", "sep": "="} diff --git a/maestrowf/interfaces/script/_flux/flux0_49_0.py b/maestrowf/interfaces/script/_flux/flux0_49_0.py index b90c7b35..f17b78e2 100644 --- a/maestrowf/interfaces/script/_flux/flux0_49_0.py +++ b/maestrowf/interfaces/script/_flux/flux0_49_0.py @@ -10,8 +10,15 @@ SubmissionCode, ) from maestrowf.abstracts.interfaces.flux import FluxInterface -from maestrowf.utils import dict_to_dot_strings +from maestrowf.utils import ( + dict_to_dot_strings, + iter_dotpath_items, + unflatten_dotpath_dict, + coerce_dict_values, + update_recursive +) +from rich.pretty import pprint LOGGER = logging.getLogger(__name__) try: @@ -43,6 +50,11 @@ class FluxInterface_0490(FluxInterface): "S": "attributes", "conf": "conf", } + _addtl_arg_cli_key = { + "attributes": "setattr", + "shell_options": "setopt", + "conf": "conf" + } @classmethod def addtl_alloc_arg_types(cls): @@ -73,6 +85,30 @@ def addtl_alloc_arg_type_map(cls, option): """ return cls._addtl_alloc_arg_type_map.get(option, None) + @classmethod + def get_addtl_arg_cli_key(cls, arg_type): + """ + Return expected cli key associated with each normalized arg type. + `arg_type` not in known_arg_types are assumed to be the key already + to facilitate flexible pass through to launcher + + :param arg_type: string noting arg group or cli key + :returns: cli key used for this arg + + .. note:: + + Can we find a reasonable default prefix (where are things put + by default in flux, attributes.system?) + """ + if arg_type in cls.known_alloc_arg_types: + return cls._addtl_arg_cli_key.get(arg_type) + + elif arg_type in cls._addtl_alloc_arg_type_map: + return cls._addtl_arg_cli_key.get(cls._addtl_alloc_arg_type_map[arg_type]) + + else: + return arg_type + @classmethod def get_flux_urgency(cls, urgency) -> int: if isinstance(urgency, str): @@ -100,38 +136,110 @@ def render_additional_args(cls, args_dict): Promote this to the general/base adapters to handle non-normalizable scheduler/machine specific options """ - def arg_info_type(arg_key): - if len(arg_key) == 1: - return {"prefix": "-", "sep": " "} + + base_render_tmpl = "{prefix}{cli_key}{sep}{dotpath}" + value_render_tmpl = "={value}" + for arg_key, arg_value in args_dict.items(): + # Get the cli key and associated rendering info + cli_key = cls.get_addtl_arg_cli_key(arg_key) + cli_info = cls.get_cli_arg_prefix_sep(cli_key) + + # Note: dotpath encoding comes after the group/prefix (setattr, ...) + for dotpath, value in iter_dotpath_items(arg_value): + # print(f"{dotpath=} {value=}") + rendered_opt = base_render_tmpl.format( + prefix=cli_info['prefix'], + cli_key=cli_key, + sep=cli_info['sep'], + dotpath=dotpath + ) + + # Flag types have None, and we want to exclude it from launcher + # and headers to better match interactive use omitting '=None' + if value: + rendered_opt += value_render_tmpl.format(value=value) + + yield rendered_opt + + @classmethod + def normalize_additional_args(cls, args_dict): + """ + Helper to normalize additional arguments to known types and an + unflattened nested dictionary structure. This unflattens any + dotpath encoded nested dictionary keys. + + :param args_dict: Dictionary of flux arg keys and name: value pairs + :return: dict of packed args with top level keys being the adapter + version specific addtl_alloc_arg_types + """ + # First, normalize and unflatten everything into dicts + # print(f"original_additional_args: {args_dict=}") + unflattened_batch_args = { + arg_type: {} + for arg_type in cls.addtl_alloc_arg_types() + } + + for arg_key, arg_values in args_dict.items(): + arg_type = cls.addtl_alloc_arg_type_map(arg_key) + # print(f"normalize_additional_args: {arg_key=} -> {arg_type=}") + if arg_type is None: + # We only restrict for allocations, so defer logging/rejection + # of types not handled by jobspec builder + arg_type = arg_key + + if isinstance(arg_values, dict): + unflattened_batch_arg = {arg_type: unflatten_dotpath_dict(arg_values)} + # unflattened_batch_args[arg_key] = unflatten_dotpath_dict(arg_values) else: - return {"prefix": "--", "sep": "="} + unflattened_batch_arg = {arg_type: arg_values} + # unflattened_batch_args[arg_key] = arg_values + + # Update to ensure we don't clobber prior values + # print(f"pre-update unflattened_batch_args: {unflattened_batch_args=}") + unflattened_batch_args = update_recursive(unflattened_batch_args, + unflattened_batch_arg) + # print(f"post-update unflattened_batch_args: {unflattened_batch_args=}") + + # print(f"normalized_additional_args: {unflattened_batch_args=}") + return unflattened_batch_args - def render_arg_value(arg_name, arg_value): - # Handling for flag type values, e.g. -o fastload - if arg_value: - return f"{arg_name}={arg_value}" + @classmethod + def pack_addtl_batch_args(cls, args_dict): + """ + Normalize the allocation args and pack up into the interface specific + groups that have assocated jobspec methods, e.g. conf, setattr, setopt. + Ensure arg formats match their end point's types. For current flux + versions as of 0.78.0: setattr and setopt require dicts be flattened + into lists of dotpath encoded strings and values, while conf requires + a completely unflattened dict. + + :param args_dict: Dict with keys normalized to cls.addtl_alloc_arg_types + :return: dictionary of allocation arg groups to attach to jobspecs + """ + # Normalize None values common in flag inputs for use in python api + # see https://github.com/flux-framework/flux-core/blob/a3860d4dea5b5a17c473cff4385276e882275252/src/bindings/python/flux/cli/base.py#L734 + # NOTE: only doing this in alloc; LAUNCHER cli passes through + # to flux cli (None values are omittied, e.g. + # {o: fastload: None} renders to -o fastload + # Python api doesn't appear to have default value handling? + packed_batch_args = { + arg_type: {} + for arg_type in cls.addtl_alloc_arg_types() + } + + dotpath_format = ["attributes", "shell_options"] + + for arg_key, arg_values in args_dict.items(): + coerced_vals = coerce_dict_values(arg_values, + lambda x: 1 if x is None else x) + if arg_key in dotpath_format: + group_values = list(iter_dotpath_items(coerced_vals)) else: - return f"{arg_name}" + group_values = coerced_vals - for arg_key, arg_value in args_dict.items(): - arg_info = arg_info_type(arg_key) - - dot_string_args = dict_to_dot_strings(arg_value, print_none=False) - for dot_string_arg in dot_string_args: - yield "{prefix}{key}{sep}{dotstringvalue}".format( - prefix=arg_info['prefix'], - key=arg_key, - sep=arg_info['sep'], - dotstringvalue=dot_string_arg - ) - # for av_name, av_value in arg_value.items(): - # value_str = render_arg_value(av_name, av_value) - # yield "{prefix}{key}{sep}{value}".format( - # prefix=arg_info['prefix'], - # key=arg_key, - # sep=arg_info['sep'], - # value=value_str - # ) + packed_batch_args[arg_key] = group_values + + return packed_batch_args @classmethod def submit( @@ -147,6 +255,8 @@ def submit( force_broker=True, urgency=StepPriority.MEDIUM, waitable=False, + queue=None, + bank=None, addtl_batch_args=None, exclusive=False, **kwargs, @@ -155,11 +265,11 @@ def submit( if addtl_batch_args is None: addtl_batch_args = {} - # May want to also support setattr_shell_option at some point? - for batch_arg_type in ["attributes", "shell_options", "conf"]: + # May want to also support setattr_shell_option at some point? + for batch_arg_type in cls.addtl_alloc_arg_types(): #["attributes", "shell_options", "conf"]: - if batch_arg_type not in addtl_batch_args: - addtl_batch_args[batch_arg_type] = {} + if batch_arg_type not in addtl_batch_args: + addtl_batch_args[batch_arg_type] = {} try: # TODO: add better error handling/throwing in the class func @@ -175,11 +285,12 @@ def submit( # that can force a single node to run in a broker. # Attach any conf inputs to the jobspec - if "conf" in addtl_batch_args: - conf_dict = addtl_batch_args['conf'] + conf_dict = addtl_batch_args.get('conf', None) + # if "conf" in addtl_batch_args: + # conf_dict = addtl_batch_args['conf'] - else: - conf_dict = None + # else: + # conf_dict = None if force_broker: LOGGER.debug( @@ -199,6 +310,8 @@ def submit( gpus_per_slot=ngpus_per_slot, conf=conf_dict, exclusive=exclusive, + queue=queue, + bank=bank ) else: if conf_dict: @@ -216,7 +329,9 @@ def submit( num_nodes=nodes, cores_per_task=cores_per_task, gpus_per_task=ngpus, - exclusive=exclusive + exclusive=exclusive, + queue=queue, + bank=bank ) LOGGER.debug("Handle address -- %s", hex(id(cls.flux_handle))) @@ -227,26 +342,33 @@ def submit( jobspec.cwd = cwd jobspec.environment = dict(os.environ) - # Slurp in extra attributes if not null (queue, bank, ..) + # Slurp in extra attributes if not null # (-S/--setattr) - for batch_attr_name, batch_attr_value in addtl_batch_args["attributes"].items(): - if batch_attr_value: - jobspec.setattr(batch_attr_name, batch_attr_value) - else: - LOGGER.warn("Flux adapter v0.49 received null value of '%s'" - " for attribute '%s'. Omitting from jobspec.", - batch_attr_value, - batch_attr_name) + # NOTE: these are sanitized upstream to be dotpath, value tuples + for batch_attr_dotpath, batch_attr_value in addtl_batch_args["attributes"]: + # pprint(f"Setting {batch_attr_dotpath}:") + # pprint(batch_attr_value) + jobspec.setattr(batch_attr_dotpath, batch_attr_value) + # pprint(jobspec.dumps()) + # else: + # pprint(f"Flux adapter v0.49 received null value of '{batch_attr_value}'" + # f" for attribute '{batch_attr_dotpath}'. Omitting from jobspec.") + + # LOGGER.warn("Flux adapter v0.49 received null value of '%s'" + # " for attribute '%s'. Omitting from jobspec.", + # batch_attr_value, + # batch_attr_dotpath) # Add in job shell options if not null (-o/--setopt) - for batch_opt_name, batch_opt_value in addtl_batch_args["shell_options"].items(): - if batch_opt_value: - jobspec.setattr_shell_option(batch_opt_name, batch_opt_value) - else: - LOGGER.warn("Flux adapter v0.49 received null value of '%s'" - " for shell option '%s'. Omitting from jobspec.", - batch_opt_value, - batch_opt_name) + # NOTE: these are sanitized upstream to be dotpath, value tuples + for batch_opt_dotpath, batch_opt_value in addtl_batch_args["shell_options"]: + # if batch_opt_value: + jobspec.setattr_shell_option(batch_opt_dotpath, batch_opt_value) + # else: + # LOGGER.warn("Flux adapter v0.49 received null value of '%s'" + # " for shell option '%s'. Omitting from jobspec.", + # batch_opt_value, + # batch_opt_dotpath) if walltime > 0: jobspec.duration = walltime diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index 93f9e24d..e802f411 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -39,6 +39,8 @@ FluxFactory from maestrowf.utils import make_safe_path +from rich.pretty import pprint + LOGGER = logging.getLogger(__name__) status_re = re.compile(r"Job \d+ status: (.*)$") # env_filter = re.compile(r"^(SSH_|LSF)") @@ -100,35 +102,19 @@ def __init__(self, **kwargs): self._launcher_args = kwargs.get("launcher_args", {}) self._addl_args = kwargs.get("args", {}) - - # Setup prefixes associated with each kind of option for routing to - # appropriate jobspec api's - # Additional args can pass through on launcher only, using rule of - # verbose has '--' prefix, '=' flag/value separator and - # brief has '-' prefix, ' ' flag/value seaprator: e.g. - # --setattr=foo=bar, or -S foo=bar - self._brief_arg_info = {"prefix": "-", "sep": " "} - self._verbose_arg_info = {"prefix": "--", "sep": "="} - - # Setup known arg types - self._known_alloc_arg_types = ["attributes", "shell_options", "conf"] - self._allocation_args_map = { - "setopt": "shell_options", - "o": "shell_options", - "setattr": "attributes", - "S": "attributes", - "conf": "conf", - } + # Normalize the allocation and launcher args to a flat dict, removing dotpath encoding + # TODO: workon a validation api that can be hooked up to yamlspec ingestion machinery for + # early error messaging + if self._allocation_args: + self._allocation_args = self._interface.normalize_additional_args(self._allocation_args) + # pprint(f"{self._allocation_args=}") - # setup template string that all flux cli args/batch directives use for rendering - self._flux_arg_str = "{prefix}{key}{sep}{value}" + if self._launcher_args: + self._launcher_args = self._interface.normalize_additional_args(self._launcher_args) + # pprint(f"{self._launcher_args=}") - self._attr_prefixes = ['S', 'setattr'] - self._opts_prefixes = ['o', 'setopt'] - self._conf_prefixes = ['conf'] # No abbreviated form for this in flux docs - - # Add --setattr fields to batch job/broker; default to "" such - # that 'truthiness' can exclude them from the jobspec if not provided + # Default queue/bank to "" such that 'truthiness' can exclude them + # from the jobspec/scripts if not provided queue = kwargs.pop("queue", "") bank = kwargs.pop("bank", "") available_queues = self._interface.get_broker_queues() @@ -142,6 +128,7 @@ def __init__(self, **kwargs): ) queue = "" + # NOTE: if system is omitted, flux defaults to adding that prefix self._batch_attrs = { "system.queue": queue, "system.bank": bank, @@ -239,38 +226,6 @@ def _convert_walltime_to_seconds(self, walltime): LOGGER.error(msg) raise ValueError(msg) - def render_additional_args(self, args_dict): - """ - Helper to render additional argument sets to flux cli format for - use in constructing $(LAUNCHER) line and flux batch directives. - - :param args_dict: Dictionary of flux arg keys and name: value pairs - """ - def arg_info_type(arg_key): - if len(arg_key) == 1: - return {"prefix": "-", "sep": " "} - else: - return {"prefix": "--", "sep": "="} - - def render_arg_value(arg_name, arg_value): - # Handling for flag type values, e.g. -o fastload - if arg_value: - return f"{arg_name}={arg_value}" - else: - return f"{arg_name}" - - for arg_key, arg_value in args_dict.items(): - arg_info = arg_info_type(arg_key) - - for av_name, av_value in arg_value.items(): - value_str = render_arg_value(av_name, av_value) - yield "{prefix}{key}{sep}{value}".format( - prefix=arg_info['prefix'], - key=arg_key, - sep=arg_info['sep'], - value=value_str - ) - def pack_addtl_batch_args(self): """ Normalize the allocation args and pack up into the interface specific @@ -289,7 +244,7 @@ def pack_addtl_batch_args(self): if arg_type is None: # NO WARNINGS HERE: args in 'misc' type handled elsewhere # TODO: add better mechanism for tracking whicn args - # actually get used; dicts can't do this.. + # actually get used; dicts can'ppt do this.. continue new_arg_values = arg_values @@ -327,6 +282,10 @@ def get_header(self, step): batch_header["comment"] = step.description.replace("\n", " ") batch_header["flux_version"] = self._broker_version + # Handle queue -> omit if anonymous queue was detected + if not batch_header["queue"]: + batch_header.pop("queue") # Should we also pop bank here? + modified_header = ["#!{}".format(self._exec)] for key, value in self._header.items(): if key not in batch_header: @@ -462,13 +421,42 @@ def submit(self, step, path, cwd, job_map=None, env=None): # it's in the step maybe, leaving each adapter to retain their defaults? waitable = step.run.get("waitable", False) + # Normalize the allocation args to api flux.job.JobspecV1 expects + packed_alloc_args = self._interface.pack_addtl_batch_args(self._allocation_args) + # print(f"{normalized_alloc_args=}") + + # Setup placholder for the queue/bank attributes if no already added by user + # in the allocation_args + # NOTE: should we flatten these treedict style? conf looks like no treedict, but + # the others look like they support it even via python api + # if "system" not in normalized_alloc_args["attributes"]: + # normalized_alloc_args["attributes"]["system"] = {} + + # Add queue and bank + queue = self._batch["queue"] + if queue == "": + queue = None + bank = self._batch["bank"] + if bank == "": + bank = None + + # if self._batch["queue"]: + # normalized_alloc_args["attributes"]["system"]["queue"] = self._batch["queue"] + # if self._batch["bank"]: + # # TODO: revisit whether it makes sense to add bank if queue is empty -> + # # nested brokers usually have neither, and bank falls through silently.. + # normalized_alloc_args["attributes"]["system"]["bank"] = self._batch["bank"] + + # pprint(f"Packed alloc args for {step.name}:") + # pprint(packed_alloc_args) jobid, retcode, submit_status = \ self._interface.submit( nodes, processors, cores_per_task, path, cwd, walltime, ngpus, job_name=step.name, force_broker=force_broker, urgency=urgency, waitable=waitable, - addtl_batch_args=self.pack_addtl_batch_args(), - exclusive=step_exclusive['allocation'] + addtl_batch_args=packed_alloc_args, #self.pack_addtl_batch_args(), + exclusive=step_exclusive['allocation'], + ) return SubmissionRecord(submit_status, retcode, jobid) diff --git a/tests/interfaces/script/test_fluxscriptadapter.py b/tests/interfaces/script/test_fluxscriptadapter.py index f3092e22..04ee20fe 100644 --- a/tests/interfaces/script/test_fluxscriptadapter.py +++ b/tests/interfaces/script/test_fluxscriptadapter.py @@ -97,7 +97,7 @@ def test_flux_script_serialization( tmp_path # Pytest tmp dir fixture: Path()): ): spec_path = variant_spec_path(spec_file + f"_{variant_id}.yaml") - pprint(spec_path) + # pprint(spec_path) yaml_spec = YAMLSpecification.load_specification(spec_path) # Load this up to get the batch info study: Study = load_study(spec_path, tmp_path, dry_run=True) @@ -156,10 +156,12 @@ def test_flux_script_serialization( 'shell': { 'options': {'bar': 42, 'foo': 'bar'}, }, + 'queue': 'pdebug', + 'bank': 'guests', 'files': { - 'conf.json': {'data': {'resource': {'rediscover': True}}} + 'conf.json': {'data': {'resource': {'rediscover': "true"}}} }, - 'gpumode': 'SPC' + 'gpumode': 'SPX' } } }, From 523660340a002fba5bec4e5195b07823945a874c Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:16:59 -0700 Subject: [PATCH 16/33] Replace fail with assertionerror to avoid stopping entire test session on failures --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 6e3d4e63..667d7252 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -336,7 +336,7 @@ def annotate_ignored_lines(lines_to_annotate): ) diff = "\n".join(diff) - pytest.fail(f"Text streams differ (ignoring marked lines):\n{diff}") + raise AssertionError(f"Text streams differ (ignoring marked lines):\n{diff}") return True From def5b6582667fdedc05e473dc1441cc1f8cc3eb1 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 9 Oct 2025 17:17:20 -0700 Subject: [PATCH 17/33] Add extra info to failed jobspec tests to show full spec in case things ended up in completely different subtrees --- tests/conftest.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 667d7252..3720e4a0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -7,6 +7,7 @@ import shutil from subprocess import check_output +import pprint import pytest from maestrowf.datastructures.core import Study @@ -345,7 +346,7 @@ def annotate_ignored_lines(lines_to_annotate): @pytest.mark.sched_flux @pytest.fixture -def flux_jobspec_check(): +def flux_jobspec_check(request): import flux def _diff_jobspec_keys(jobid, expected, path=None): @@ -398,6 +399,12 @@ def assert_nested_dict_subset(actual, expected, path=None): f"expected {expected_value!r}, got {actual_value!r}" ) - assert_nested_dict_subset(jobspec, expected, path=path) + try: + assert_nested_dict_subset(jobspec, expected, path=path) + except AssertionError: + jobspec_str = pprint.pformat(jobspec, width=80) + request.node.add_report_section( + "call", "jobspec", f"\n=== Jobspec dump on failure for job {jobid} ===\n{jobspec_str}\n=== End jobspec dump ===\n") + raise return _diff_jobspec_keys From 9bcc05924ebd47440b1ae201be8da72751912a8f Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Wed, 15 Oct 2025 16:38:07 -0700 Subject: [PATCH 18/33] Finish wiring up queue/bank and update test specs/expected outputs --- maestrowf/interfaces/script/fluxscriptadapter.py | 2 +- ...ed_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 | 4 +--- ..._flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 | 4 +--- tests/data/specs/hello_bye_parameterized_flux_1.yaml | 2 +- tests/interfaces/script/test_fluxscriptadapter.py | 10 ++++------ 5 files changed, 8 insertions(+), 14 deletions(-) diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index e802f411..9673d211 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -453,7 +453,7 @@ def submit(self, step, path, cwd, job_map=None, env=None): self._interface.submit( nodes, processors, cores_per_task, path, cwd, walltime, ngpus, job_name=step.name, force_broker=force_broker, urgency=urgency, - waitable=waitable, + waitable=waitable, queue=queue, bank=bank, addtl_batch_args=packed_alloc_args, #self.pack_addtl_batch_args(), exclusive=step_exclusive['allocation'], diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 index 47a0100b..2bf498d4 100644 --- a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 @@ -2,14 +2,12 @@ #flux: -N 1 #flux: -t 60.0s #flux: -q pdebug -#flux: --bank guests +#flux: --bank ecopper #flux: --exclusive #flux: --setopt=foo=bar #flux: -o bar=42 #flux: --setattr=gpumode=CPX #flux: --conf=resource.rediscover=true -#INFO (flux adapter version) 0.49.0 -#INFO (flux version) 0.76.0 flux run -n 1 -N 1 -c 1 --setopt=fastload echo "Bye, World!" > bye.txt flux run -n 1 -N 1 -c 1 --setopt=fastload sleep 1 diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 index 512933cb..a5449d52 100644 --- a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 @@ -2,14 +2,12 @@ #flux: -N 1 #flux: -t 60.0s #flux: -q pdebug -#flux: --bank guests +#flux: --bank ecopper #flux: --exclusive #flux: --setopt=foo=bar #flux: -o bar=42 #flux: --setattr=gpumode=CPX #flux: --conf=resource.rediscover=true -#INFO (flux adapter version) 0.49.0 -#INFO (flux version) 0.76.0 flux run -n 1 -N 1 -c 1 --setopt=fastload echo "Hello, Pam!" > Hello_Pam.txt flux run -n 1 -N 1 -c 1 --setopt=fastload sleep 1 diff --git a/tests/data/specs/hello_bye_parameterized_flux_1.yaml b/tests/data/specs/hello_bye_parameterized_flux_1.yaml index 8bffcad0..93a05e55 100644 --- a/tests/data/specs/hello_bye_parameterized_flux_1.yaml +++ b/tests/data/specs/hello_bye_parameterized_flux_1.yaml @@ -5,7 +5,7 @@ description: batch: type : flux host : rzadams - bank : guests + bank : ecopper queue : pdebug allocation_args: setopt: # > 1 letter gets '--' prefix: --setopt foo=true diff --git a/tests/interfaces/script/test_fluxscriptadapter.py b/tests/interfaces/script/test_fluxscriptadapter.py index 04ee20fe..8d444d1f 100644 --- a/tests/interfaces/script/test_fluxscriptadapter.py +++ b/tests/interfaces/script/test_fluxscriptadapter.py @@ -147,7 +147,7 @@ def test_flux_script_serialization( [ ( "hello_bye_parameterized_flux", - 1, + 2, # Use the invalid queue one to avoid waiting for jobs to run { # NOTE: should we contsruct this, and just use study + step_id + sched.sh.variant_id? "hello_world_GREETING.Hello.NAME.Pam": { 'resources': [{'exclusive': True}], @@ -157,11 +157,11 @@ def test_flux_script_serialization( 'options': {'bar': 42, 'foo': 'bar'}, }, 'queue': 'pdebug', - 'bank': 'guests', + 'bank': 'invalid_bank', 'files': { - 'conf.json': {'data': {'resource': {'rediscover': "true"}}} + 'conf.json': {'data': {'resource': {'rediscover': "true"}, "noverify": "true"}} }, - 'gpumode': 'SPX' + 'gpumode': 'CPX' } } }, @@ -221,5 +221,3 @@ def test_flux_job_opts( # pprint("Written script:") # assert step_name in expected_batch_files - - assert False From 93c05f0c2eaa29ebdb9802c86a35bc053f8a208a Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 16 Oct 2025 12:26:58 -0700 Subject: [PATCH 19/33] Move INFO lines to allow in step directives, fix test data to reflect normalization of alloc args --- maestrowf/interfaces/script/fluxscriptadapter.py | 16 ++++++++++------ ...x.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 | 9 ++++++--- ...hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 | 9 ++++++--- .../specs/hello_bye_parameterized_flux_1.yaml | 2 +- 4 files changed, 23 insertions(+), 13 deletions(-) diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index 9673d211..053ebf5c 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -176,7 +176,7 @@ def __init__(self, **kwargs): # NOTE: always use seconds to guard against upstream default behavior changes "walltime": f"{self._flux_directive}" + "-t {walltime}s", "queue": f"{self._flux_directive}" + "-q {queue}", - "bank": f"{self._flux_directive}" + "--bank {bank}", + "bank": f"{self._flux_directive}" + "--bank={bank}", } @@ -287,6 +287,15 @@ def get_header(self, step): batch_header.pop("queue") # Should we also pop bank here? modified_header = ["#!{}".format(self._exec)] + + # Process INFO lines at the start to + # lines starting tag+prefix (e.g. "#flux:" ) that doesn't match the flux directives + for key, value in self._header_info.items(): + modified_header.append(value.format(**batch_header)) + + # Inject a blank # comment line between the flux directives for readability + modified_header.append("#") + for key, value in self._header.items(): if key not in batch_header: continue @@ -310,11 +319,6 @@ def get_header(self, step): # interface for batch/allocation args modified_header.append(f"{self._flux_directive}" + rendered_arg) - # Process INFO lines at the end: flux stops parsing directives after any - # lines starting tag+prefix (e.g. "#flux:" ) that doesn't match the flux directives - for key, value in self._header_info.items(): - modified_header.append(value.format(**batch_header)) - return "\n".join(modified_header) def get_parallelize_command(self, procs, nodes=None, **kwargs): diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 index 2bf498d4..56462a18 100644 --- a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 @@ -1,12 +1,15 @@ #!/bin/bash +#INFO (flux adapter version) 0.49.0 +#INFO (flux version) 0.78.0 +# #flux: -N 1 #flux: -t 60.0s #flux: -q pdebug -#flux: --bank ecopper +#flux: --bank=guests #flux: --exclusive -#flux: --setopt=foo=bar -#flux: -o bar=42 #flux: --setattr=gpumode=CPX +#flux: --setopt=foo=bar +#flux: --setopt=bar=42 #flux: --conf=resource.rediscover=true flux run -n 1 -N 1 -c 1 --setopt=fastload echo "Bye, World!" > bye.txt diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 index a5449d52..384b8cc7 100644 --- a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.1 @@ -1,12 +1,15 @@ #!/bin/bash +#INFO (flux adapter version) 0.49.0 +#INFO (flux version) 0.78.0 +# #flux: -N 1 #flux: -t 60.0s #flux: -q pdebug -#flux: --bank ecopper +#flux: --bank=guests #flux: --exclusive -#flux: --setopt=foo=bar -#flux: -o bar=42 #flux: --setattr=gpumode=CPX +#flux: --setopt=foo=bar +#flux: --setopt=bar=42 #flux: --conf=resource.rediscover=true flux run -n 1 -N 1 -c 1 --setopt=fastload echo "Hello, Pam!" > Hello_Pam.txt diff --git a/tests/data/specs/hello_bye_parameterized_flux_1.yaml b/tests/data/specs/hello_bye_parameterized_flux_1.yaml index 93a05e55..8bffcad0 100644 --- a/tests/data/specs/hello_bye_parameterized_flux_1.yaml +++ b/tests/data/specs/hello_bye_parameterized_flux_1.yaml @@ -5,7 +5,7 @@ description: batch: type : flux host : rzadams - bank : ecopper + bank : guests queue : pdebug allocation_args: setopt: # > 1 letter gets '--' prefix: --setopt foo=true From fb112288b0603667ec56724702760335b880c015 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 16 Oct 2025 12:44:35 -0700 Subject: [PATCH 20/33] Add docstrings/tests for dict utils --- maestrowf/utils.py | 37 +++++++++++++++++++++++++++++++ tests/utils/test_utils.py | 46 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/maestrowf/utils.py b/maestrowf/utils.py index 479ea8a5..89b3cd13 100644 --- a/maestrowf/utils.py +++ b/maestrowf/utils.py @@ -526,6 +526,25 @@ def unflatten_dotpath_dict(dotpath_dict: Dict[str, Any], sep: str = ".") -> Dict def coerce_dict_values(obj_to_transform: Any, transform: Callable)-> Dict: + """ + Recursively apply a transformation function to all values in a nested mapping. + + Traverses the input object, recursively applying the given transformation function + to each leaf value. If the input is a mapping (such as a dictionary), the function + will process each value in the mapping. Non-mapping values are transformed directly. + + This is useful for cases where you need to sanitize or coerce certain values + throughout a deeply nested dictionary structure. For example, the Flux CLI + converts ``None`` values to ``1`` for treedict inputs that represent flags. + This function can be used to replicate that behavior in the Python API. + + :param obj_to_transform: The object to transform. Can be a mapping or any other value. + :type obj_to_transform: Any + :param transform: A callable that takes a single value and returns the transformed value. + :type transform: Callable + :return: A new mapping with the same structure as the input, but with all leaf values transformed. + :rtype: dict + """ if isinstance(obj_to_transform, Mapping): return {k: coerce_dict_values(v, transform) for k, v in obj_to_transform.items()} @@ -535,6 +554,24 @@ def coerce_dict_values(obj_to_transform: Any, transform: Callable)-> Dict: def update_recursive(base_dict, update_dict): + """ + Recursively update a dictionary with values from another dictionary. + + For each key in ``update_dict``, updates the corresponding value in ``base_dict``. + If both values for a given key are dictionaries, the update is performed recursively. + Otherwise, the value from ``update_dict`` overwrites the value in ``base_dict``. + + :param base_dict: The dictionary to update in-place. + :type base_dict: dict + :param update_dict: The dictionary containing updates. + :type update_dict: dict + :return: The updated base dictionary. + :rtype: dict + + .. note:: + If a key exists in both dictionaries but the values are not both dicts, + the value from ``update_dict`` will overwrite the value in ``base_dict``. + """ for k, v in update_dict.items(): if k in base_dict and isinstance(base_dict[k], dict) and isinstance(v, dict): update_recursive(base_dict[k], v) diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 27830367..ca36aa05 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -14,7 +14,8 @@ iter_dotpath_items, unflatten_dotpath_tuples, unflatten_dotpath_dict, - coerce_dict_values + coerce_dict_values, + update_recursive ) from packaging.version import Version, InvalidVersion @@ -371,3 +372,46 @@ def test_coerce_dict_values(dict_to_coerce, transform, expected_dict): assert coerced_dict == expected_dict assert coerced_dict != dict_to_coerce + +@pytest.mark.parametrize( + "base_dict, update_dict, expected", + [ + # Simple overwrite + ( + {"a": 1, "b": 2}, + {"b": 3, "c": 4}, + {"a": 1, "b": 3, "c": 4} + ), + # Nested dictionary merge (recursive) + ( + {"a": {"b": 1, "c": 2}}, + {"a": {"c": 3, "d": 4}}, + {"a": {"b": 1, "c": 3, "d": 4}} + ), + # Overwrite with non-dict in update_dict + ( + {"a": {"b": 1}, "c": 2}, + {"a": 42}, + {"a": 42, "c": 2} + ), + # Deeply nested merge + ( + {"a": {"b": {"c": 1}}}, + {"a": {"b": {"d": 2}}}, + {"a": {"b": {"c": 1, "d": 2}}} + ), + # Mixed: recursive merge and overwrite + ( + {"x": {"y": 1}, "z": 10}, + {"x": {"y": 2, "w": 3}, "z": {"nested": 5}}, + {"x": {"y": 2, "w": 3}, "z": {"nested": 5}} + ), + ] +) +def test_update_recursive(base_dict, update_dict, expected): + # Use deepcopy to avoid mutating test data + # base = deepcopy(base_dict) + result = update_recursive(base_dict, update_dict) + assert result == expected + # Also check that base itself was mutated as expected + assert base_dict == expected From 4e50a921fd7d9c75e835a9a56a02ac6b81c6eebf Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 16 Oct 2025 12:45:25 -0700 Subject: [PATCH 21/33] Remove deepcopy comments --- tests/utils/test_utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index ca36aa05..038ebb59 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -410,8 +410,7 @@ def test_coerce_dict_values(dict_to_coerce, transform, expected_dict): ) def test_update_recursive(base_dict, update_dict, expected): # Use deepcopy to avoid mutating test data - # base = deepcopy(base_dict) result = update_recursive(base_dict, update_dict) assert result == expected - # Also check that base itself was mutated as expected + # Also check that base_dict itself was mutated as expected assert base_dict == expected From d4079bd4dd16291ae28f379261f6145bc4740ba3 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Thu, 16 Oct 2025 13:04:41 -0700 Subject: [PATCH 22/33] Misc tweaks and fixes, and update batch script outputs to reflect normalization --- docs/Maestro/scheduling.md | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/docs/Maestro/scheduling.md b/docs/Maestro/scheduling.md index 11d650a4..3b3fe41a 100644 --- a/docs/Maestro/scheduling.md +++ b/docs/Maestro/scheduling.md @@ -52,16 +52,18 @@ There are new groups in the batch block in :material-tag:`1.1.12` that facilitat * `allocation_args` for the allocation target (batch directives such as `#Flux: --setopt=foo=bar`) * `launcher_args` for the `$(LAUNCHER)` target (`flux run --setopt=foo=bar`) -These are ~structured mappings which are designed to mimic cli usage for intuitive mapping from raw scheduler usage to Maestro. Each of these dictionaries' keys correspond to a scheduler specific CLI argument/option or flag. The serialization rules are as follows, with specific examples here shown for the initial implementation in the flux adapter (other schedulers will yield prefix/separator rules specific to their implementation): +These are ~structured mappings which are designed to mimic cli and batch script directive syntax for intuitive mapping from raw scheduler usage to Maestro. Each of these dictionaries' keys correspond to a scheduler specific CLI argument/option or flag. The serialization rules are as follows, with specific examples here shown for the initial implementation in the flux adapter (other schedulers will yield prefix/separator rules specific to their implementation): -| **Key Type** | **CLI Prefix** | **Separator** | **Example YAML** | **Example CLI Output** | -| :- | :-: | :-: | :-: | :- | -| Single letter | `-` | space (`" "`) | `o: {bar: 42}` | `-o bar=42` | -| Multi-letter | `--` | `=` | `setopt: {foo: bar} | `--setopt=foo=bar` | -| Boolean flag w/key | as above | as above | `setopt: {foobar: } | `--setopt=foobar` | -| Boolean flag w/o key | as above | as above | `exclusive: ` | `--exclusive` | +| **Key Type** | **Prefix** | **Separator** | **Example YAML** | **Example CLI Input/Directive** | +| :- | :- | :- | :- | :- | +| Single letter | `-` | `" "` (space) | `o: {bar: 42}` | `-o bar=42` | +| Multi-letter | `--` | `=` | `setopt: {foo: bar}` | `--setopt=foo=bar` | +| Boolean flag w/key | as above | as above | `setopt: {foobar: }` | `--setopt=foobar` | +| Boolean flag w/o key | as above | as above | `exclusive: ` | `--exclusive` | -Note in the boolean flag strategies, a space is required after the `:` after `foobar: ` or `exclusive`, otherwise yaml will fail to parse and assign the Null value used to tag a key as a boolean flag. See [flux](#Flux) for special considerations fo the `allocation_args` +!!! note "Boolean/Flag type arguments" + + In the boolean flag strategies, a space is required after the `:` after `foobar: ` or `exclusive`, otherwise yaml will fail to parse and assign the Null value used to tag a key as a boolean flag. See [flux](#Flux) for special considerations for the `allocation_args`. ## LAUNCHER Token @@ -259,7 +261,7 @@ Assuming the step has keys `{procs: 1, nodes: 1, cores per task: 1, walltime: "5 #flux: --bank=guests #flux: -t 300s #flux: --setopt=foo=bar -#flux: -o bar=42 +#flux: --setopt=bar=42 #flux: --setattr=foobar=whoops #flux: --conf=resource.rediscover=true @@ -268,7 +270,7 @@ flux run -n 1 -N 1 -c 1 --setopt=optiona myapplication !!! note - Using flux directives here to illustrate even though python api is used. These directives will be in the step scripts, retaining repeatability/record of what was submitted and viewable with the dry run feature + Using flux directives here to illustrate even though python api is used. These directives will be in the step scripts, retaining repeatability/record of what was submitted and viewable with the dry run feature. The batch/allocation arguments are normalized to the long form (`--setattr` instead of `-S`) and will show up that way in the serialized batch scripts. ## LSF: a Tale of Two Launchers ---- From 25d7482c8ba0045910234d9b9cc1e86728e2cd0c Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Wed, 22 Oct 2025 16:52:47 -0700 Subject: [PATCH 23/33] Rework exclusive handling for proper layering of steps ontop of batch defaults. Add additional script test case and slurm script tests. --- .../interfaces/schedulerscriptadapter.py | 59 ++++++++++--- .../interfaces/script/fluxscriptadapter.py | 80 +++++------------- .../interfaces/script/slurmscriptadapter.py | 7 +- ...ye_world_GREETING.Hello.NAME.Pam.flux.sh.1 | 1 - ...ye_world_GREETING.Hello.NAME.Pam.flux.sh.3 | 16 ++++ ...lo_world_GREETING.Hello.NAME.Pam.flux.sh.3 | 16 ++++ ...e_world_GREETING.Hello.NAME.Pam.slurm.sh.1 | 12 +++ ...o_world_GREETING.Hello.NAME.Pam.slurm.sh.1 | 13 +++ .../specs/hello_bye_parameterized_flux_2.yaml | 69 +++++++++++++++ .../specs/hello_bye_parameterized_flux_3.yaml | 72 ++++++++++++++++ .../hello_bye_parameterized_slurm_1.yaml | 50 +++++++++++ .../script/test_fluxscriptadapter.py | 16 +++- .../script/test_slurmscriptadapter.py | 83 +++++++++++++++++-- 13 files changed, 413 insertions(+), 81 deletions(-) create mode 100644 tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.3 create mode 100644 tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.3 create mode 100644 tests/data/expected_spec_outputs/hello_bye_parameterized_slurm.bye_world_GREETING.Hello.NAME.Pam.slurm.sh.1 create mode 100644 tests/data/expected_spec_outputs/hello_bye_parameterized_slurm.hello_world_GREETING.Hello.NAME.Pam.slurm.sh.1 create mode 100644 tests/data/specs/hello_bye_parameterized_flux_2.yaml create mode 100644 tests/data/specs/hello_bye_parameterized_flux_3.yaml create mode 100644 tests/data/specs/hello_bye_parameterized_slurm_1.yaml diff --git a/maestrowf/abstracts/interfaces/schedulerscriptadapter.py b/maestrowf/abstracts/interfaces/schedulerscriptadapter.py index 4f3b979e..3cb3ced0 100644 --- a/maestrowf/abstracts/interfaces/schedulerscriptadapter.py +++ b/maestrowf/abstracts/interfaces/schedulerscriptadapter.py @@ -92,14 +92,15 @@ def add_batch_parameter(self, name, value): """ self._batch[name] = value - @staticmethod - def get_exclusive(step_exclusive): + def get_exclusive(self, step_exclusive): """ Helper for normalizing new/legacy exclusive syntax in the step keys. + None used as sentinel for 'not provided' to facilitate layering + on top of default values. - :param step_exclusive: value of 'exclusive' key in StudyStep.run + :param step_exclusive: value of 'exclusive' key in a StudyStep.run :return dict: normalized dict with 'allocation' and 'launcher' keys - and bool values for each + and bool or None values for each .. note:: @@ -109,19 +110,57 @@ def get_exclusive(step_exclusive): """ # Handle old scalar syntax which applied to allocatios only if not isinstance(step_exclusive, dict): - return { - "allocation": step_exclusive, - "launcher": False, - } + if step_exclusive is not None: + return { + "allocation": step_exclusive, + "launcher": None, + } + else: + return { + "allocation": None, + "launcher": None, + } else: # Yaml schema limits keys already exclusive_dict = step_exclusive if 'allocation' not in step_exclusive: - exclusive_dict['allocation'] = False + exclusive_dict['allocation'] = None if 'launcher' not in step_exclusive: - exclusive_dict['launcher'] = False + exclusive_dict['launcher'] = None return exclusive_dict + def resolve_exclusive(self, adapter_exclusive, step_exclusive): + """ + Helper layering step exclusive config ontop of adapter, treating + adapter as default values to override. + + :param adapter_exclusive: 'default' exclusive settings from batch + config. This is a per queue/machine constant. + :type adapter_exclusive: dict + :param step_exclusive: value of 'exclusive' key in StudyStep.run + :type adapter_exclusive: dict + :return dict: normalized dict with 'allocation' and 'launcher' keys + and bool values for each + + .. note:: + + Move this upstream into a future studystep validator? also add + hooks for per scheduler normalizing of the extra args from batch + blocks + """ + # Normalize the step's exclusive to gracefully handle old behavior + exclusive_updates = self.get_exclusive(step_exclusive) + + # Apply update such that step wins if not None + exclusive_dict = {} + for key, default_val in adapter_exclusive.items(): + if key in exclusive_updates and exclusive_updates[key] is not None: + exclusive_dict[key] = exclusive_updates[key] + else: + exclusive_dict[key] = default_val + + return exclusive_dict + @abstractmethod def get_header(self, step): """ diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index 053ebf5c..32e3e20b 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -39,7 +39,6 @@ FluxFactory from maestrowf.utils import make_safe_path -from rich.pretty import pprint LOGGER = logging.getLogger(__name__) status_re = re.compile(r"Job \d+ status: (.*)$") @@ -107,11 +106,9 @@ def __init__(self, **kwargs): # early error messaging if self._allocation_args: self._allocation_args = self._interface.normalize_additional_args(self._allocation_args) - # pprint(f"{self._allocation_args=}") if self._launcher_args: self._launcher_args = self._interface.normalize_additional_args(self._launcher_args) - # pprint(f"{self._launcher_args=}") # Default queue/bank to "" such that 'truthiness' can exclude them # from the jobspec/scripts if not provided @@ -128,45 +125,28 @@ def __init__(self, **kwargs): ) queue = "" - # NOTE: if system is omitted, flux defaults to adding that prefix - self._batch_attrs = { - "system.queue": queue, - "system.bank": bank, - } self.add_batch_parameter("queue", queue) self.add_batch_parameter("bank", bank) - # Pop off the exclusive flags (NOTE: usually comes from steps, not through constructor?) - step_exclusive = 'exclusive' in kwargs # Track whether it was in there at all - self._exclusive = self.get_exclusive(kwargs.pop("exclusive", False)) # Default to old < 1.1.12 behavior - # Check for the flag in additional args and pop it off, letting step key win later # NOTE: only need presence of key as this mimics cli like flag behavior - # TODO: promote this to formally supported behavior for all adapters + # TODO: promote this to formally supported behavior for all adapters, push down into interfaces? exclusive_keys = ['x', 'exclusive'] + self._exclusive = {"allocation": False, "launcher": False} if all(ekey in self._allocation_args for ekey in exclusive_keys): LOGGER.warn("Repeated addition of exclusive flags 'x' and 'exclusive' in allocation_args.") - alloc_eflags = [self._allocation_args.pop(ekey, None) for ekey in exclusive_keys] + alloc_eflags = [self._allocation_args.pop(ekey) for ekey in exclusive_keys if ekey in self._allocation_args] if alloc_eflags: - if step_exclusive: - LOGGER.warn("Overriding batch block allocation_args with steps exclusive setting '%s'", - step_exclusive) - else: - self._exclusive['allocation'] = True + self._exclusive['allocation'] = True if all(ekey in self._launcher_args for ekey in exclusive_keys): LOGGER.warn("Repeated addition of exclusive flags 'x' and 'exclusive' in launcher_args.") - launcher_eflags = [self._launcher_args.pop(ekey, None) for ekey in exclusive_keys] + launcher_eflags = [self._launcher_args.pop(ekey) for ekey in exclusive_keys if ekey in self._launcher_args] if launcher_eflags: - if step_exclusive and self._exclusive['launcher']: - LOGGER.warn("Overriding batch block launcher_args with steps exclusive setting '%s'", - step_exclusive) - else: - self._exclusive['launcher'] = True + self._exclusive['launcher'] = True - # NOTE: self.add_batch_parameter("exclusive", self._exclusive['allocation']) # Populate formally supported flux directives in the header @@ -177,7 +157,6 @@ def __init__(self, **kwargs): "walltime": f"{self._flux_directive}" + "-t {walltime}s", "queue": f"{self._flux_directive}" + "-q {queue}", "bank": f"{self._flux_directive}" + "--bank={bank}", - } self._cmd_flags = { @@ -303,11 +282,11 @@ def get_header(self, step): modified_header.append(value.format(**batch_header)) # Handle exclusive flag - step_exclusive_given = "exclusive" in step.run - step_exclusive = self._exclusive - if step_exclusive_given: - # Override the default with this step's setting - step_exclusive.update(self.get_exclusive(step.run.get("exclusive", False))) + # Handle the exclusive flags, updating batch block settings (default) + # with what's set in the step, if any given + step_exclusive = step.run.get("exclusive", None) + + step_exclusive = self.resolve_exclusive(self._exclusive, step_exclusive) if step_exclusive['allocation']: modified_header.append(f"{self._flux_directive}" + "--exclusive") @@ -334,12 +313,10 @@ def get_parallelize_command(self, procs, nodes=None, **kwargs): ntasks = nodes if nodes else self._batch.get("nodes", 1) # Handle the exclusive flags, updating batch block settings (default) - # with what's set in the step - step_exclusive_given = "exclusive" in kwargs - step_exclusive = self._exclusive - if step_exclusive_given: - # Override the default with this step's setting - step_exclusive.update(self.get_exclusive(kwargs.get("exclusive", False))) + # with what's set in the step, if any given + step_exclusive = kwargs.pop("exclusive", None) + + step_exclusive = self.resolve_exclusive(self._exclusive, step_exclusive) # TODO: fix this temp hack when standardizing the exclusive key handling kwargs['exclusive'] = step_exclusive['launcher'] @@ -414,12 +391,10 @@ def submit(self, step, path, cwd, job_map=None, env=None): raise ValueError(msg) # Handle the exclusive flags, updating batch block settings (default) - # with what's set in the step - step_exclusive_given = "exclusive" in step.run - step_exclusive = self._exclusive - if step_exclusive_given: - # Override the default with this step's setting - step_exclusive.update(self.get_exclusive(step.run.get("exclusive", False))) + # with what's set in the step, if any given + step_exclusive = step.run.get("exclusive", None) + + step_exclusive = self.resolve_exclusive(self._exclusive, step_exclusive) # Unpack waitable flag and pass it along if there: only pass it along if # it's in the step maybe, leaving each adapter to retain their defaults? @@ -427,14 +402,6 @@ def submit(self, step, path, cwd, job_map=None, env=None): # Normalize the allocation args to api flux.job.JobspecV1 expects packed_alloc_args = self._interface.pack_addtl_batch_args(self._allocation_args) - # print(f"{normalized_alloc_args=}") - - # Setup placholder for the queue/bank attributes if no already added by user - # in the allocation_args - # NOTE: should we flatten these treedict style? conf looks like no treedict, but - # the others look like they support it even via python api - # if "system" not in normalized_alloc_args["attributes"]: - # normalized_alloc_args["attributes"]["system"] = {} # Add queue and bank queue = self._batch["queue"] @@ -444,15 +411,6 @@ def submit(self, step, path, cwd, job_map=None, env=None): if bank == "": bank = None - # if self._batch["queue"]: - # normalized_alloc_args["attributes"]["system"]["queue"] = self._batch["queue"] - # if self._batch["bank"]: - # # TODO: revisit whether it makes sense to add bank if queue is empty -> - # # nested brokers usually have neither, and bank falls through silently.. - # normalized_alloc_args["attributes"]["system"]["bank"] = self._batch["bank"] - - # pprint(f"Packed alloc args for {step.name}:") - # pprint(packed_alloc_args) jobid, retcode, submit_status = \ self._interface.submit( nodes, processors, cores_per_task, path, cwd, walltime, ngpus, diff --git a/maestrowf/interfaces/script/slurmscriptadapter.py b/maestrowf/interfaces/script/slurmscriptadapter.py index f0751235..e64d5175 100644 --- a/maestrowf/interfaces/script/slurmscriptadapter.py +++ b/maestrowf/interfaces/script/slurmscriptadapter.py @@ -78,6 +78,11 @@ def __init__(self, **kwargs): # Check for procs separately, as we don't want it in the header if it's # not present. procs = kwargs.get("procs", None) + + # Placeholder till future refactor to push up into spec ingestion/step + # parsing.. + self._exclusive = {"allocation": False, "launcher": False} + if procs: self.add_batch_parameter("procs", procs) @@ -154,7 +159,7 @@ def get_header(self, step): if procs_in_batch or not nodes: modified_header.append(self._ntask_header.format(**resources)) - exclusive = self.get_exclusive(resources.get("exclusive", False)) + exclusive = self.resolve_exclusive(self._exclusive, resources.get("exclusive", None)) if exclusive['allocation']: modified_header.append(self._exclusive_header) diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 index 56462a18..b5af7fcc 100644 --- a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1 @@ -6,7 +6,6 @@ #flux: -t 60.0s #flux: -q pdebug #flux: --bank=guests -#flux: --exclusive #flux: --setattr=gpumode=CPX #flux: --setopt=foo=bar #flux: --setopt=bar=42 diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.3 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.3 new file mode 100644 index 00000000..56462a18 --- /dev/null +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.3 @@ -0,0 +1,16 @@ +#!/bin/bash +#INFO (flux adapter version) 0.49.0 +#INFO (flux version) 0.78.0 +# +#flux: -N 1 +#flux: -t 60.0s +#flux: -q pdebug +#flux: --bank=guests +#flux: --exclusive +#flux: --setattr=gpumode=CPX +#flux: --setopt=foo=bar +#flux: --setopt=bar=42 +#flux: --conf=resource.rediscover=true + +flux run -n 1 -N 1 -c 1 --setopt=fastload echo "Bye, World!" > bye.txt +flux run -n 1 -N 1 -c 1 --setopt=fastload sleep 1 diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.3 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.3 new file mode 100644 index 00000000..d584e9be --- /dev/null +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.3 @@ -0,0 +1,16 @@ +#!/bin/bash +#INFO (flux adapter version) 0.49.0 +#INFO (flux version) 0.78.0 +# +#flux: -N 1 +#flux: -t 60.0s +#flux: -q pdebug +#flux: --bank=guests +#flux: --exclusive +#flux: --setattr=gpumode=CPX +#flux: --setopt=foo=bar +#flux: --setopt=bar=42 +#flux: --conf=resource.rediscover=true + +flux run -n 1 -N 1 -c 1 --exclusive --setopt=fastload echo "Hello, Pam!" > Hello_Pam.txt +flux run -n 1 -N 1 -c 1 --exclusive --setopt=fastload sleep 1 diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_slurm.bye_world_GREETING.Hello.NAME.Pam.slurm.sh.1 b/tests/data/expected_spec_outputs/hello_bye_parameterized_slurm.bye_world_GREETING.Hello.NAME.Pam.slurm.sh.1 new file mode 100644 index 00000000..35b2abcb --- /dev/null +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_slurm.bye_world_GREETING.Hello.NAME.Pam.slurm.sh.1 @@ -0,0 +1,12 @@ +#!/bin/bash +#SBATCH --partition=pdebug +#SBATCH --account=guests +#SBATCH --time=00:60 +#SBATCH --job-name="bye_world_GREETING.Hello.NAME.Pam" +#SBATCH --output="bye_world_GREETING.Hello.NAME.Pam.out" +#SBATCH --error="bye_world_GREETING.Hello.NAME.Pam.err" +#SBATCH --comment "Say bye to someone!" +#SBATCH --ntasks=1 + +srun -n 1 echo "Bye, World!" > bye.txt +srun -n 1 sleep 1 diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_slurm.hello_world_GREETING.Hello.NAME.Pam.slurm.sh.1 b/tests/data/expected_spec_outputs/hello_bye_parameterized_slurm.hello_world_GREETING.Hello.NAME.Pam.slurm.sh.1 new file mode 100644 index 00000000..8893b495 --- /dev/null +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_slurm.hello_world_GREETING.Hello.NAME.Pam.slurm.sh.1 @@ -0,0 +1,13 @@ +#!/bin/bash +#SBATCH --partition=pdebug +#SBATCH --account=guests +#SBATCH --time=00:60 +#SBATCH --job-name="hello_world_GREETING.Hello.NAME.Pam" +#SBATCH --output="hello_world_GREETING.Hello.NAME.Pam.out" +#SBATCH --error="hello_world_GREETING.Hello.NAME.Pam.err" +#SBATCH --comment "Say hello to someone!" +#SBATCH --ntasks=1 +#SBATCH --exclusive + +srun -n 1 echo "Hello, Pam!" > Hello_Pam.txt +srun -n 1 sleep 1 diff --git a/tests/data/specs/hello_bye_parameterized_flux_2.yaml b/tests/data/specs/hello_bye_parameterized_flux_2.yaml new file mode 100644 index 00000000..c270f2eb --- /dev/null +++ b/tests/data/specs/hello_bye_parameterized_flux_2.yaml @@ -0,0 +1,69 @@ +description: + name: hello_bye_world_flux_2 + description: Test variant of hello_bye_parameterized that exercises extra flux args. Uses invalid bank on purpose for exercising jobspec/script serialization and not actual running + +batch: + type : flux + host : rzadams + bank : invalid_bank + queue : pdebug + allocation_args: + setopt: # > 1 letter gets '--' prefix: --setopt foo=true + foo: bar + o: # single letter = '-' prefix; o == setopt for flux: -o bar=42 + bar: 42 + setattr: + gpumode: CPX + conf: + resource: + rediscover: "true" # Flux wants 'true', not 'True' that python bool serializes to + noverify: "true" + + launcher_args: + setopt: + fastload: + + + +env: + variables: + OUTPUT_PATH: hello_bye_world + labels: + OUT_FORMAT: $(GREETING)_$(NAME).txt + +study: + - name: hello_world + description: Say hello to someone! + run: + cmd: | + $(LAUNCHER) echo "$(GREETING), $(NAME)!" > $(OUT_FORMAT) + $(LAUNCHER) sleep 1 + procs: 1 + nested: True + exclusive: True + walltime: "00:60" + + - name: bye_world + description: Say bye to someone! + run: + cmd: | + $(LAUNCHER) echo "Bye, World!" > bye.txt + $(LAUNCHER) sleep 1 + procs: 1 + nested: True + walltime: "00:60" + depends: [hello_world] + +global.parameters: + # NAME: + # values: [Pam, Jim, Michael, Dwight] + # label: NAME.%% + # GREETING: + # values: [Hello, Ciao, Hey, Hi] + # label: GREETING.%% + NAME: + values: [Pam] + label: NAME.%% + GREETING: + values: [Hello] + label: GREETING.%% diff --git a/tests/data/specs/hello_bye_parameterized_flux_3.yaml b/tests/data/specs/hello_bye_parameterized_flux_3.yaml new file mode 100644 index 00000000..1ac5ff90 --- /dev/null +++ b/tests/data/specs/hello_bye_parameterized_flux_3.yaml @@ -0,0 +1,72 @@ +description: + name: hello_bye_world_flux_3 + description: Test variant of hello_bye_parameterized that exercises new exclusive flag behaviors + +batch: + type : flux + host : rzadams + bank : guests + queue : pdebug + allocation_args: + setopt: # > 1 letter gets '--' prefix: --setopt foo=true + foo: bar + o: # single letter = '-' prefix; o == setopt for flux: -o bar=42 + bar: 42 + setattr: + gpumode: CPX + conf: + resource: + rediscover: "true" # Flux wants 'true', not 'True' that python bool serializes to + + launcher_args: + setopt: + fastload: + exclusive: # Flag, so no value needed + + + +env: + variables: + OUTPUT_PATH: hello_bye_world + labels: + OUT_FORMAT: $(GREETING)_$(NAME).txt + +study: + - name: hello_world + description: Say hello to someone! + run: + cmd: | + $(LAUNCHER) echo "$(GREETING), $(NAME)!" > $(OUT_FORMAT) + $(LAUNCHER) sleep 1 + procs: 1 + nested: True + exclusive: True # Testing mixed batch/step + walltime: "00:60" + + - name: bye_world + description: Say bye to someone! + run: + cmd: | + $(LAUNCHER) echo "Bye, World!" > bye.txt + $(LAUNCHER) sleep 1 + procs: 1 + nested: True + exclusive: # Test overriding launcher from batch block + allocation: True + launcher: False + walltime: "00:60" + depends: [hello_world] + +global.parameters: + # NAME: + # values: [Pam, Jim, Michael, Dwight] + # label: NAME.%% + # GREETING: + # values: [Hello, Ciao, Hey, Hi] + # label: GREETING.%% + NAME: + values: [Pam] + label: NAME.%% + GREETING: + values: [Hello] + label: GREETING.%% diff --git a/tests/data/specs/hello_bye_parameterized_slurm_1.yaml b/tests/data/specs/hello_bye_parameterized_slurm_1.yaml new file mode 100644 index 00000000..d5506962 --- /dev/null +++ b/tests/data/specs/hello_bye_parameterized_slurm_1.yaml @@ -0,0 +1,50 @@ +description: + name: hello_bye_world_slurm_1 + description: Test variant of hello_bye_parameterized for slurm script serialization + +batch: + type : slurm + host : rzwhippet + bank : guests + queue : pdebug + +env: + variables: + OUTPUT_PATH: hello_bye_world + labels: + OUT_FORMAT: $(GREETING)_$(NAME).txt + +study: + - name: hello_world + description: Say hello to someone! + run: + cmd: | + $(LAUNCHER) echo "$(GREETING), $(NAME)!" > $(OUT_FORMAT) + $(LAUNCHER) sleep 1 + procs: 1 + exclusive: True + walltime: "00:60" + + - name: bye_world + description: Say bye to someone! + run: + cmd: | + $(LAUNCHER) echo "Bye, World!" > bye.txt + $(LAUNCHER) sleep 1 + procs: 1 + walltime: "00:60" + depends: [hello_world] + +global.parameters: + # NAME: + # values: [Pam, Jim, Michael, Dwight] + # label: NAME.%% + # GREETING: + # values: [Hello, Ciao, Hey, Hi] + # label: GREETING.%% + NAME: + values: [Pam] + label: NAME.%% + GREETING: + values: [Hello] + label: GREETING.%% diff --git a/tests/interfaces/script/test_fluxscriptadapter.py b/tests/interfaces/script/test_fluxscriptadapter.py index 8d444d1f..af91226a 100644 --- a/tests/interfaces/script/test_fluxscriptadapter.py +++ b/tests/interfaces/script/test_fluxscriptadapter.py @@ -40,12 +40,13 @@ from maestrowf.interfaces import ScriptAdapterFactory from maestrowf.datastructures.core import Study -from maestrowf.datastructures.core.executiongraph import ExecutionGraph, _StepRecord +from maestrowf.datastructures.core.executiongraph import ExecutionGraph from maestrowf.specification import YAMLSpecification from rich.pretty import pprint import pytest + def test_flux_adapter(): """ Tests to verify that FluxScriptAdapter has the key property set to 'flux' @@ -84,6 +85,14 @@ def test_flux_adapter_in_factory(): "bye_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.1" } ), + ( + "hello_bye_parameterized_flux", + 3, + { # NOTE: should we contsruct this, and just use study + step_id + sched.sh.variant_id? + "hello_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.3", + "bye_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.3" + } + ), ] ) def test_flux_script_serialization( @@ -108,6 +117,7 @@ def test_flux_script_serialization( ex_graph.set_adapter(yaml_spec.batch) adapter = ScriptAdapterFactory.get_adapter(yaml_spec.batch["type"]) + pprint(f"Adapter args: {ex_graph._adapter}") adapter = adapter(**ex_graph._adapter) # Setup a diff ignore pattern to filter out the #INFO (flux version ... @@ -119,9 +129,9 @@ def test_flux_script_serialization( for step_name, step_record in ex_graph.values.items(): if step_name == "_source": continue - + pprint(f"Step name: {step_name}") ex_graph._execute_record(step_record, adapter) - # pprint("Step name:") + # pprint(step_name) # pprint("Step record:") # pprint(step_record) diff --git a/tests/interfaces/script/test_slurmscriptadapter.py b/tests/interfaces/script/test_slurmscriptadapter.py index 3d148296..bd4f5a9c 100644 --- a/tests/interfaces/script/test_slurmscriptadapter.py +++ b/tests/interfaces/script/test_slurmscriptadapter.py @@ -40,22 +40,28 @@ from subprocess import CalledProcessError, check_output import time +from maestrowf.datastructures.core import Study +from maestrowf.datastructures.core.executiongraph import ExecutionGraph + from maestrowf.abstracts.enums import State, JobStatusCode from maestrowf.interfaces.script.slurmscriptadapter import SlurmScriptAdapter from maestrowf.interfaces import ScriptAdapterFactory +from maestrowf.specification import YAMLSpecification from maestrowf.utils import start_process +from rich.pretty import pprint import pytest TESTLOGGER = logging.getLogger(__name__) + def test_slurm_adapter(): """ Tests to verify that SlurmScriptAdapter has the key property set to 'slurm' this is validate that existing specifications do not break. :return: """ - assert(SlurmScriptAdapter.key == 'slurm') + assert SlurmScriptAdapter.key == 'slurm' def test_slurm_adapter_in_factory(): @@ -66,13 +72,12 @@ def test_slurm_adapter_in_factory(): """ saf = ScriptAdapterFactory # Make sure SlurmScriptAdapter is in the facotries object - assert(saf.factories[SlurmScriptAdapter.key] == SlurmScriptAdapter) + assert saf.factories[SlurmScriptAdapter.key] == SlurmScriptAdapter # Make sure the SlurmScriptAdapter key is in the valid adapters - assert(SlurmScriptAdapter.key in ScriptAdapterFactory.get_valid_adapters()) + assert SlurmScriptAdapter.key in ScriptAdapterFactory.get_valid_adapters() # Make sure that get_adapter returns the SlurmScriptAdapter when asking # for it by key - assert(ScriptAdapterFactory.get_adapter(SlurmScriptAdapter.key) == - SlurmScriptAdapter) + assert ScriptAdapterFactory.get_adapter(SlurmScriptAdapter.key) == SlurmScriptAdapter # Slurm fixtures for checking scheduler connectivity @@ -249,3 +254,71 @@ def test_slurm_cancel(slurm_test_jobs, caplog): # then look for cancelled? for jobid, jobstate in job_status.items(): assert jobstate == State.CANCELLED + +# @pytest.mark.sched_slurm # NOTE: don't actually need to be on system for this yet +@pytest.mark.parametrize( + "spec_file, variant_id, expected_batch_files", + [ + ( + "hello_bye_parameterized_slurm", + 1, + { # NOTE: should we construct this, and just use study + step_id + sched.sh.variant_id? + "hello_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_slurm.hello_world_GREETING.Hello.NAME.Pam.slurm.sh.1", + "bye_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_slurm.bye_world_GREETING.Hello.NAME.Pam.slurm.sh.1" + } + ), + ] +) +def test_slurm_script_serialization( + spec_file, + variant_id, + expected_batch_files, + variant_spec_path, + load_study, + variant_expected_output, + text_diff, + tmp_path # Pytest tmp dir fixture: Path()): +): + spec_path = variant_spec_path(spec_file + f"_{variant_id}.yaml") + + yaml_spec = YAMLSpecification.load_specification(spec_path) # Load this up to get the batch info + + study: Study = load_study(spec_path, tmp_path, dry_run=True) + + pkl_path: str + ex_graph: ExecutionGraph + pkl_path, ex_graph = study.stage() + ex_graph.set_adapter(yaml_spec.batch) + + adapter = ScriptAdapterFactory.get_adapter(yaml_spec.batch["type"]) + pprint(f"Adapter args: {ex_graph._adapter}") + adapter = adapter(**ex_graph._adapter) + + # Setup a diff ignore pattern to filter out the #INFO (flux version ... + ignore_patterns = [] + + # Loop over the steps and execute them + for step_name, step_record in ex_graph.values.items(): + if step_name == "_source": + continue + pprint(f"Step name: {step_name}") + ex_graph._execute_record(step_record, adapter) + + # pprint(step_name) + # pprint("Step record:") + # pprint(step_record) + # # pprint(_record) + # pprint("Written script:") + with open(step_record.script, "r") as written_script_file: + written_script = written_script_file.read() + # pprint(written_script.splitlines()) + + assert step_name in expected_batch_files + + pprint(f"{expected_batch_files[step_name]=}") + pprint(f"{variant_expected_output(expected_batch_files[step_name])=}") + with open(variant_expected_output(expected_batch_files[step_name]), 'r') as ebf: + expected_script = ebf.read() + + # assert written_script == expected_script + assert text_diff(written_script, expected_script, ignore_patterns=ignore_patterns) From 74c4361926e96a6aaac7f89756d56b6e2764204d Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Wed, 22 Oct 2025 17:39:34 -0700 Subject: [PATCH 24/33] Address review feedback to clarify examples --- docs/Maestro/scheduling.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/Maestro/scheduling.md b/docs/Maestro/scheduling.md index 3b3fe41a..d72f65c6 100644 --- a/docs/Maestro/scheduling.md +++ b/docs/Maestro/scheduling.md @@ -56,14 +56,14 @@ These are ~structured mappings which are designed to mimic cli and batch script | **Key Type** | **Prefix** | **Separator** | **Example YAML** | **Example CLI Input/Directive** | | :- | :- | :- | :- | :- | -| Single letter | `-` | `" "` (space) | `o: {bar: 42}` | `-o bar=42` | -| Multi-letter | `--` | `=` | `setopt: {foo: bar}` | `--setopt=foo=bar` | -| Boolean flag w/key | as above | as above | `setopt: {foobar: }` | `--setopt=foobar` | -| Boolean flag w/o key | as above | as above | `exclusive: ` | `--exclusive` | +| Single letter | `-` | `" "` (space) |
o:
bar: 42
| `-o bar=42` | +| Multi-letter | `--` | `=` |
setopt:
foo: bar
| `--setopt=foo=bar` | +| Boolean flag w/key | as above | as above |
setopt:
foobar: #
| `--setopt=foobar` | +| Boolean flag w/o key | as above | as above | `exclusive: #` | `--exclusive` | !!! note "Boolean/Flag type arguments" - In the boolean flag strategies, a space is required after the `:` after `foobar: ` or `exclusive`, otherwise yaml will fail to parse and assign the Null value used to tag a key as a boolean flag. See [flux](#Flux) for special considerations for the `allocation_args`. + In the boolean flag strategies, a space is required after the `:` after `foobar: ` or `exclusive`, otherwise yaml will fail to parse and assign the Null value used to tag a key as a boolean flag. See [flux](#Flux) for special considerations for the `allocation_args`. See the above examples where a '#' is added after a space to ensure there is such a space after the `:`. ## LAUNCHER Token @@ -216,7 +216,7 @@ See the [flux framework](https://flux-framework.readthedocs.io/en/latest/index.h ### Extra Flux Args ---- -As of :material-tag:`1.1.12`, the flux adapter takes advantage of new argument pass through for scheduler options that Maestro cannot abstract away. This is done via `allocation_args` and `launcher_args` in the batch block, which expand upon the previous `args` input which only applied to `$(LAUNCHER)`. There are some caveat's here due to the way Maestro talks to flux. The current flux adapters all use the python api's from Flux to build the batch jobs, with the serialized batch script being serialized separately instead of submitted directly as with the other schedulers. A consequence of this is the `allocation_args` map to specific call points on that python api, and thus the option pass through is not quite arbitrary. There are 4 currently supported options for allocations which cover a majority of usecases (open an issue and let us know if one you need isn't covered!): +As of :material-tag:`1.1.12`, the flux adapter takes advantage of new argument pass through for scheduler options that Maestro cannot abstract away. This is done via `allocation_args` and `launcher_args` in the batch block, which expand upon the previous `args` input which only applied to `$(LAUNCHER)`. There are some caveat's here due to the way Maestro talks to flux. The current flux adapters all use the python api's from Flux to build the batch jobs, with the serialized batch script being serialized separately instead of submitted directly as with the other schedulers. A consequence of this is the `allocation_args` map to specific call points on that python api, and thus the option pass through is not quite arbitrary. There are 4 currently supported options for allocations which cover a majority of usecases (open an issue and let us know if there is a usecase you need that is not covered!): * shell options: `-o/--setopt` prefixed arguments * attributes: `-S/--setattr` prefixed arguments @@ -249,7 +249,7 @@ batch: resource.rediscover: "true" # Use string "true" for Flux compatibility, not "True" or bool True launcher_args: setopt: - optiona: # Boolean flag, no value needed + optiona: # Boolean flag, no value needed. NOTE: This is a made up key for demonstration ``` #### Example Batch Script From 85864b6b88debc2ea9af6907e8d65875d2edac3c Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Fri, 24 Oct 2025 11:58:11 -0700 Subject: [PATCH 25/33] Wire up proper filtering and logging of unhandled allocation args in flux adapters, silent pass through for < 0.49 variants --- maestrowf/abstracts/interfaces/flux.py | 20 +++++ .../interfaces/script/_flux/flux0_49_0.py | 34 ++++++--- .../interfaces/script/fluxscriptadapter.py | 6 +- ...ye_world_GREETING.Hello.NAME.Pam.flux.sh.4 | 16 ++++ ...lo_world_GREETING.Hello.NAME.Pam.flux.sh.4 | 16 ++++ .../specs/hello_bye_parameterized_flux_4.yaml | 74 +++++++++++++++++++ .../script/test_fluxscriptadapter.py | 8 ++ 7 files changed, 163 insertions(+), 11 deletions(-) create mode 100644 tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.4 create mode 100644 tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.4 create mode 100644 tests/data/specs/hello_bye_parameterized_flux_4.yaml diff --git a/maestrowf/abstracts/interfaces/flux.py b/maestrowf/abstracts/interfaces/flux.py index 5ea3d0a0..9a9e59c6 100644 --- a/maestrowf/abstracts/interfaces/flux.py +++ b/maestrowf/abstracts/interfaces/flux.py @@ -333,3 +333,23 @@ def get_cli_arg_prefix_sep(cli_key): return {"prefix": "-", "sep": " "} else: return {"prefix": "--", "sep": "="} + + @classmethod + def normalize_additional_args(cls, args_dict, group_name=None, filter_unknown=False): + """ + Helper to normalize additional arguments to known types and an + unflattened nested dictionary structure. This unflattens any + dotpath encoded nested dictionary keys. + + :param args_dict: Dictionary of flux arg keys and name: value pairs + :type args_dict: dict + :param group_name: Optional name of group/tag to use in log messages + when filtering_unknown is on + :type group_name: str + :param filter_unknown: flag to block pass through of unknown args, e.g. + for allocation where we can't handle arbitrary + :type filter_unknown: bool + :return: dict of packed args with top level keys being the adapter + version specific addtl_alloc_arg_types + """ + return args_dict diff --git a/maestrowf/interfaces/script/_flux/flux0_49_0.py b/maestrowf/interfaces/script/_flux/flux0_49_0.py index f17b78e2..6927fd5b 100644 --- a/maestrowf/interfaces/script/_flux/flux0_49_0.py +++ b/maestrowf/interfaces/script/_flux/flux0_49_0.py @@ -162,45 +162,59 @@ def render_additional_args(cls, args_dict): yield rendered_opt @classmethod - def normalize_additional_args(cls, args_dict): + def normalize_additional_args(cls, args_dict, group_name=None, filter_unknown=False): """ Helper to normalize additional arguments to known types and an unflattened nested dictionary structure. This unflattens any dotpath encoded nested dictionary keys. :param args_dict: Dictionary of flux arg keys and name: value pairs + :type args_dict: dict + :param group_name: Optional name of group/tag to use in log messages + when filtering_unknown is on + :type group_name: str + :param filter_unknown: flag to block pass through of unknown args, e.g. + for allocation where we can't handle arbitrary + :type filter_unknown: bool :return: dict of packed args with top level keys being the adapter version specific addtl_alloc_arg_types """ # First, normalize and unflatten everything into dicts - # print(f"original_additional_args: {args_dict=}") unflattened_batch_args = { arg_type: {} for arg_type in cls.addtl_alloc_arg_types() } + if filter_unknown: + known_arg_prefixes = ', '.join([f"'{prefix}'" for prefix in cls._addtl_alloc_arg_type_map.keys()]) + else: + known_arg_prefixes = '' + for arg_key, arg_values in args_dict.items(): arg_type = cls.addtl_alloc_arg_type_map(arg_key) - # print(f"normalize_additional_args: {arg_key=} -> {arg_type=}") if arg_type is None: - # We only restrict for allocations, so defer logging/rejection - # of types not handled by jobspec builder + if filter_unknown: + LOGGER.warn( + "Filtering '%s' in unhandled type '%s' from '%s' args." + " Known types are %s", + arg_values, + arg_key, + str(group_name) if group_name else "UNKNOWN", + known_arg_prefixes + ) + continue + arg_type = arg_key if isinstance(arg_values, dict): unflattened_batch_arg = {arg_type: unflatten_dotpath_dict(arg_values)} - # unflattened_batch_args[arg_key] = unflatten_dotpath_dict(arg_values) else: unflattened_batch_arg = {arg_type: arg_values} - # unflattened_batch_args[arg_key] = arg_values # Update to ensure we don't clobber prior values - # print(f"pre-update unflattened_batch_args: {unflattened_batch_args=}") unflattened_batch_args = update_recursive(unflattened_batch_args, unflattened_batch_arg) - # print(f"post-update unflattened_batch_args: {unflattened_batch_args=}") - # print(f"normalized_additional_args: {unflattened_batch_args=}") return unflattened_batch_args @classmethod diff --git a/maestrowf/interfaces/script/fluxscriptadapter.py b/maestrowf/interfaces/script/fluxscriptadapter.py index 32e3e20b..f3eeb9ef 100644 --- a/maestrowf/interfaces/script/fluxscriptadapter.py +++ b/maestrowf/interfaces/script/fluxscriptadapter.py @@ -105,7 +105,11 @@ def __init__(self, **kwargs): # TODO: workon a validation api that can be hooked up to yamlspec ingestion machinery for # early error messaging if self._allocation_args: - self._allocation_args = self._interface.normalize_additional_args(self._allocation_args) + self._allocation_args = self._interface.normalize_additional_args( + self._allocation_args, + group_name="allocation", + filter_unknown=True + ) if self._launcher_args: self._launcher_args = self._interface.normalize_additional_args(self._launcher_args) diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.4 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.4 new file mode 100644 index 00000000..56462a18 --- /dev/null +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.4 @@ -0,0 +1,16 @@ +#!/bin/bash +#INFO (flux adapter version) 0.49.0 +#INFO (flux version) 0.78.0 +# +#flux: -N 1 +#flux: -t 60.0s +#flux: -q pdebug +#flux: --bank=guests +#flux: --exclusive +#flux: --setattr=gpumode=CPX +#flux: --setopt=foo=bar +#flux: --setopt=bar=42 +#flux: --conf=resource.rediscover=true + +flux run -n 1 -N 1 -c 1 --setopt=fastload echo "Bye, World!" > bye.txt +flux run -n 1 -N 1 -c 1 --setopt=fastload sleep 1 diff --git a/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.4 b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.4 new file mode 100644 index 00000000..d584e9be --- /dev/null +++ b/tests/data/expected_spec_outputs/hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.4 @@ -0,0 +1,16 @@ +#!/bin/bash +#INFO (flux adapter version) 0.49.0 +#INFO (flux version) 0.78.0 +# +#flux: -N 1 +#flux: -t 60.0s +#flux: -q pdebug +#flux: --bank=guests +#flux: --exclusive +#flux: --setattr=gpumode=CPX +#flux: --setopt=foo=bar +#flux: --setopt=bar=42 +#flux: --conf=resource.rediscover=true + +flux run -n 1 -N 1 -c 1 --exclusive --setopt=fastload echo "Hello, Pam!" > Hello_Pam.txt +flux run -n 1 -N 1 -c 1 --exclusive --setopt=fastload sleep 1 diff --git a/tests/data/specs/hello_bye_parameterized_flux_4.yaml b/tests/data/specs/hello_bye_parameterized_flux_4.yaml new file mode 100644 index 00000000..ee5f36cb --- /dev/null +++ b/tests/data/specs/hello_bye_parameterized_flux_4.yaml @@ -0,0 +1,74 @@ +description: + name: hello_bye_world_flux_4 + description: Test variant of hello_bye_parameterized that tests filtering of unhandled alloc arg groups + +batch: + type : flux + host : rzadams + bank : guests + queue : pdebug + allocation_args: + setopt: # > 1 letter gets '--' prefix: --setopt foo=true + foo: bar + o: # single letter = '-' prefix; o == setopt for flux: -o bar=42 + bar: 42 + setattr: + gpumode: CPX + conf: + resource: + rediscover: "true" # Flux wants 'true', not 'True' that python bool serializes to + unhandled_group: + somevar: ignore_me + + launcher_args: + setopt: + fastload: + exclusive: # Flag, so no value needed + + + +env: + variables: + OUTPUT_PATH: hello_bye_world + labels: + OUT_FORMAT: $(GREETING)_$(NAME).txt + +study: + - name: hello_world + description: Say hello to someone! + run: + cmd: | + $(LAUNCHER) echo "$(GREETING), $(NAME)!" > $(OUT_FORMAT) + $(LAUNCHER) sleep 1 + procs: 1 + nested: True + exclusive: True # Testing mixed batch/step + walltime: "00:60" + + - name: bye_world + description: Say bye to someone! + run: + cmd: | + $(LAUNCHER) echo "Bye, World!" > bye.txt + $(LAUNCHER) sleep 1 + procs: 1 + nested: True + exclusive: # Test overriding launcher from batch block + allocation: True + launcher: False + walltime: "00:60" + depends: [hello_world] + +global.parameters: + # NAME: + # values: [Pam, Jim, Michael, Dwight] + # label: NAME.%% + # GREETING: + # values: [Hello, Ciao, Hey, Hi] + # label: GREETING.%% + NAME: + values: [Pam] + label: NAME.%% + GREETING: + values: [Hello] + label: GREETING.%% diff --git a/tests/interfaces/script/test_fluxscriptadapter.py b/tests/interfaces/script/test_fluxscriptadapter.py index af91226a..8ec841e6 100644 --- a/tests/interfaces/script/test_fluxscriptadapter.py +++ b/tests/interfaces/script/test_fluxscriptadapter.py @@ -93,6 +93,14 @@ def test_flux_adapter_in_factory(): "bye_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.3" } ), + ( # Group to test unknown alloc arg filtering + "hello_bye_parameterized_flux", + 4, + { # NOTE: should we contsruct this, and just use study + step_id + sched.sh.variant_id? + "hello_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_flux.hello_world_GREETING.Hello.NAME.Pam.flux.sh.4", + "bye_world_GREETING.Hello.NAME.Pam": "hello_bye_parameterized_flux.bye_world_GREETING.Hello.NAME.Pam.flux.sh.4" + } + ), ] ) def test_flux_script_serialization( From 69aa1cbda16242c4d3fa094572dfd18fd0143972 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Fri, 24 Oct 2025 13:41:00 -0700 Subject: [PATCH 26/33] Cleanup comments/misc formatting fixes --- .../interfaces/script/_flux/flux0_49_0.py | 42 ++++--------------- 1 file changed, 8 insertions(+), 34 deletions(-) diff --git a/maestrowf/interfaces/script/_flux/flux0_49_0.py b/maestrowf/interfaces/script/_flux/flux0_49_0.py index 6927fd5b..b733cdd2 100644 --- a/maestrowf/interfaces/script/_flux/flux0_49_0.py +++ b/maestrowf/interfaces/script/_flux/flux0_49_0.py @@ -107,6 +107,9 @@ def get_addtl_arg_cli_key(cls, arg_type): return cls._addtl_arg_cli_key.get(cls._addtl_alloc_arg_type_map[arg_type]) else: + # NOTE: can't log unknonws here -> this func doesn't know if which + # group of args it's processing (alloc or launcher) + # Defer to alloc normalization to pre-clean the alloc group return arg_type @classmethod @@ -146,12 +149,11 @@ def render_additional_args(cls, args_dict): # Note: dotpath encoding comes after the group/prefix (setattr, ...) for dotpath, value in iter_dotpath_items(arg_value): - # print(f"{dotpath=} {value=}") rendered_opt = base_render_tmpl.format( prefix=cli_info['prefix'], - cli_key=cli_key, - sep=cli_info['sep'], - dotpath=dotpath + cli_key=cli_key, + sep=cli_info['sep'], + dotpath=dotpath ) # Flag types have None, and we want to exclude it from launcher @@ -300,12 +302,7 @@ def submit( # Attach any conf inputs to the jobspec conf_dict = addtl_batch_args.get('conf', None) - # if "conf" in addtl_batch_args: - # conf_dict = addtl_batch_args['conf'] - # else: - # conf_dict = None - if force_broker: LOGGER.debug( "Launch under Flux sub-broker. [force_broker=%s, " @@ -358,31 +355,15 @@ def submit( # Slurp in extra attributes if not null # (-S/--setattr) - # NOTE: these are sanitized upstream to be dotpath, value tuples + # NOTE: these are sanitized upstream to be (dotpath, value) tuples, flags + # set to have value of '1' to mach flux cli for batch_attr_dotpath, batch_attr_value in addtl_batch_args["attributes"]: - # pprint(f"Setting {batch_attr_dotpath}:") - # pprint(batch_attr_value) jobspec.setattr(batch_attr_dotpath, batch_attr_value) - # pprint(jobspec.dumps()) - # else: - # pprint(f"Flux adapter v0.49 received null value of '{batch_attr_value}'" - # f" for attribute '{batch_attr_dotpath}'. Omitting from jobspec.") - - # LOGGER.warn("Flux adapter v0.49 received null value of '%s'" - # " for attribute '%s'. Omitting from jobspec.", - # batch_attr_value, - # batch_attr_dotpath) # Add in job shell options if not null (-o/--setopt) # NOTE: these are sanitized upstream to be dotpath, value tuples for batch_opt_dotpath, batch_opt_value in addtl_batch_args["shell_options"]: - # if batch_opt_value: jobspec.setattr_shell_option(batch_opt_dotpath, batch_opt_value) - # else: - # LOGGER.warn("Flux adapter v0.49 received null value of '%s'" - # " for shell option '%s'. Omitting from jobspec.", - # batch_opt_value, - # batch_opt_dotpath) if walltime > 0: jobspec.duration = walltime @@ -480,13 +461,6 @@ def parallelize(cls, procs, nodes=None, launcher_args=None, **kwargs): addtl += [arg for arg in cls.render_additional_args(launcher_args)] args.extend(addtl) - # for key, value in addtl_args.items(): - - # addtl.append(f"{key}={value}") - - # if addtl: - # args.append("-o") - # args.append(",".join(addtl)) return " ".join(args) From 5869383858e201112fe0bf7fc9205e25fee3de42 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Fri, 24 Oct 2025 15:59:07 -0700 Subject: [PATCH 27/33] Fix coverage to work properly when not at project root --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index a9e568f6..08c77559 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,5 @@ [pytest] -addopts = -p no:warnings --cov-config=.coveragerc --cov-report=term-missing --cov=. --basetemp=./PYTEST_TEMP_DATA +addopts = -p no:warnings --cov-config=.coveragerc --cov-report=term-missing --cov=maestrowf --basetemp=./PYTEST_TEMP_DATA junit_family = xunit2 markers = sched_flux: tests that exercise flux scheduler and require flux bindings From 8673a4270e5614f0b4bb18f723a3340f038ba1e6 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Fri, 24 Oct 2025 16:01:47 -0700 Subject: [PATCH 28/33] Add emacs tmp/backup files to gitignore --- .gitignore | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.gitignore b/.gitignore index c3966c65..d8283931 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,12 @@ sample_output/ # VSCode .vscode +# Emacs backup files +*~ +# Emacs auto-save files +\#*# +.#* + # Doxygen output docs/html/ From b6dd8f787f501b2797835df18f553ffa04000e58 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Mon, 27 Oct 2025 10:44:57 -0700 Subject: [PATCH 29/33] Tick dev version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 8882f665..013c1d14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool] [tool.poetry] name = "maestrowf" -version = "1.1.12dev0" +version = "1.1.12dev1" description = "A tool to easily orchestrate general computational workflows both locally and on supercomputers." license = "MIT License" classifiers = [ From 122a97bb088e0248f5dce7ae9547b38bccb23905 Mon Sep 17 00:00:00 2001 From: Jeremy White <44277022+jwhite242@users.noreply.github.com> Date: Wed, 29 Oct 2025 17:26:13 -0700 Subject: [PATCH 30/33] Rewrite flux jobspec diff fixture for improved flexibility on filtering out expected differences --- tests/conftest.py | 466 ++++++++++++++++-- .../script/test_fluxscriptadapter.py | 38 +- 2 files changed, 446 insertions(+), 58 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 3720e4a0..8ded5526 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,11 +1,14 @@ from collections import defaultdict import difflib +import json import logging import os from pathlib import Path import re import shutil from subprocess import check_output +import subprocess +from typing import Any, Dict, Optional import pprint import pytest @@ -189,7 +192,7 @@ def load_study(): def _load_study(spec_path, output_path, dry_run=False): # Setup some default args to use for testing - use_tmp_dir = True # NOTE: likely want to let pytest control this? + use_tmp_dir = False # NOTE: likely want to let pytest control this? hash_ws = False throttle = 0 submission_attempts = 3 @@ -203,6 +206,8 @@ def _load_study(spec_path, output_path, dry_run=False): # Set up the output directory. out_dir = environment.remove("OUTPUT_PATH") + + pprint.pprint(f"LOAD_STUDY: {str(out_dir)}, {output_path=}") if output_path: # If out is specified in the args, ignore OUTPUT_PATH. output_path = os.path.abspath(output_path) @@ -223,6 +228,7 @@ def _load_study(spec_path, output_path, dry_run=False): output_path = make_safe_path(out_dir, *[out_name]) environment.add(Variable("OUTPUT_PATH", output_path)) + pprint.pprint(f"LOAD_STUDY: post-output_path resolution: {output_path=}") # Set up file logging create_parentdir(os.path.join(output_path, "logs")) output_path = Path(output_path) @@ -349,62 +355,426 @@ def annotate_ignored_lines(lines_to_annotate): def flux_jobspec_check(request): import flux - def _diff_jobspec_keys(jobid, expected, path=None): + def _diff_jobspec_keys( + jobid, + expected, + path=None, + source_label=None, + list_matcher=None, + ignore_keys=None, + debug=False, + debug_log=None, + ): """ - Helper to recursively check for values in flux jobspec's. - - :param jobid: flux jobid to look up jobspec for (int or f58) - :param expected: nested dicts of key/values to verify in jobspec (dict) - :param path: optional initial path in jobspec dict to search under + Verify that 'expected' is a subset of the actual flux jobspec, with support for: + - Ignoring specific keys/indices via ignore_keys (paths, names, or predicate(s)) + - Multiple ignore predicates or path specs + - Optional debugging trace + + Parameters: + - jobid: flux job id + - expected: nested structure of expected keys/values + - path: initial dict path to descend before comparing + - source_label: label for error messages + - list_matcher: optional matcher callable for unordered list of dicts + - ignore_keys: None, single spec, or list/tuple of specs: + * dotted path strings, e.g. "tasks.0.command.3" + * tuples of raw segments, e.g. ("tasks", 0, "command", 3) + * sets/lists of simple key names, e.g. {"timestamp", "uuid"} + * callable predicate(raw_path, key_or_index, actual_value) -> bool + Any spec that returns True will cause that key/index to be skipped. + - debug: True to enable verbose ignore diagnostics + - debug_log: optional callable(msg). Defaults to print if debug is True. + + Assumptions: + - Top-level jobspec is a dict. """ - # NOTE: may need some helpers here if needing to mess with uri to change broker fh = flux.Flux() + jobspec = flux.job.job_kvs_lookup( + fh, flux.job.JobID(jobid), decode=True, original=False + ).get("jobspec") + + prefix = f"[{source_label}] " if source_label else "" + + # Build composite ignore predicate with debugging + ignore_pred = build_ignore_predicate(ignore_keys, debug=debug, debug_log=debug_log) + # pprint.pprint(f"[flux_jobspec_check]: id(ignore_pred) = {id(ignore_pred)}") + def fmt_path(pretty_segments): + return ".".join(pretty_segments) if pretty_segments else "" + + def assert_equal_scalar(actual, expected, raw_path, pretty_path): + assert actual == expected, ( + f"{prefix}Value mismatch at {fmt_path(pretty_path)}: " + f"expected {expected!r}, got {actual!r}" + ) - # Get the jobspec (do we care about original?) - # NOTE: job_kvs_lookup vs job_info_lookup? does it matter? - - # Default returns id, jobspec keys - jobspec = flux.job.job_kvs_lookup(fh, flux.job.JobID(jobid), decode=True, original=False)['jobspec'] - - def assert_nested_dict_subset(actual, expected, path=None): - """Recursively assert key/values in actual/expected dictionaries""" - if path is None: - path = [] - - for key, expected_value in expected.items(): - current_path = path + [repr(key)] - assert key in actual, f"Missing key at {'.'.join(current_path)}" - actual_value = actual[key] - if isinstance(expected_value, dict): - assert isinstance(actual_value, dict), ( - f"Expected dict at {'.'.join(current_path)}, " - f"got {type(actual_value).__name__}" - ) - assert_nested_dict_subset(actual_value, - expected_value, - current_path) - elif isinstance(expected_value, list): - assert isinstance(actual_value, list), ( - f"Expected list at {'.'.join(current_path)}, " - f"got {type(actual_value).__name__}" + def assert_dict_subset(actual_dict, expected_dict, raw_path, pretty_path): + assert isinstance(actual_dict, dict), ( + f"{prefix}Expected dict at {fmt_path(pretty_path)}, " + f"got {type(actual_dict).__name__}" + ) + for key, expected_value in expected_dict.items(): + # Ignore check with raw context + # pprint.pprint(f"[flux_jobspec_check][assert_dict_subset]: id(ignore_pred) = {id(ignore_pred)}") + if ignore_pred(raw_path, key, actual_dict.get(key, None), pretty_path): + continue + + raw_next = raw_path + (key,) + pretty_next = pretty_path + [repr(key)] + assert key in actual_dict, f"{prefix}Missing key at {fmt_path(pretty_next)}" + actual_value = actual_dict[key] + assert_nested_subset(actual_value, expected_value, raw_next, pretty_next) + + def assert_list_positional_subset(actual_list, expected_list, raw_path, pretty_path): + assert isinstance(actual_list, list), ( + f"{prefix}Expected list at {fmt_path(pretty_path)}, " + f"got {type(actual_list).__name__}" + ) + for i, item in enumerate(expected_list): + if item is ...: + continue + + # pprint.pprint(f"[flux_jobspec_check][assert_list_positional_subset]: id(ignore_pred) = {id(ignore_pred)}") + actual_val = actual_list[i] if i < len(actual_list) else None + if ignore_pred(raw_path, i, actual_val, pretty_path): + continue + # else: + # pprint.pprint(f"[ignore-pred]: not ignoring: {raw_path=} {i=} {actual_val=} {pretty_path=}") + + assert i < len(actual_list), ( + f"{prefix}List index out of range at {fmt_path(pretty_path)}[{i}]: " + f"actual length {len(actual_list)}" + ) + assert_nested_subset(actual_list[i], item, raw_path + (i,), pretty_path + [f"[{i}]"]) + + def assert_list_unordered_subset(actual_list, expected_list, raw_path, pretty_path): + assert isinstance(actual_list, list), ( + f"{prefix}Expected list at {fmt_path(pretty_path)}, " + f"got {type(actual_list).__name__}" + ) + # pprint.pprint(f"[flux_jobspec_check][assert_list_unordered_subset]: id(ignore_pred) = {id(ignore_pred)}") + available_indices = [ + i for i in range(len(actual_list)) + if not ignore_pred(raw_path, i, actual_list[i], pretty_path) + ] + + def consume(idx): + try: + available_indices.remove(idx) + return True + except ValueError: + return False + + for i, exp_item in enumerate(expected_list): + if exp_item is ...: + continue + + matched_idx = None + # Try matcher first + if callable(list_matcher): + idx = list_matcher(actual_list, exp_item, pretty_path) + if idx is not None and idx in available_indices: + matched_idx = idx + + # Fallback search + if matched_idx is None: + for idx in list(available_indices): + try: + assert_nested_subset( + actual_list[idx], exp_item, + raw_path + (idx,), pretty_path + [f"[{idx}]"] + ) + matched_idx = idx + break + except AssertionError: + continue + + assert matched_idx is not None, ( + f"{prefix}No matching list element for expected item at " + f"{fmt_path(pretty_path)}[{i}]" + ) + assert consume(matched_idx), ( + f"{prefix}Internal error, matched index {matched_idx} could not be consumed at " + f"{fmt_path(pretty_path)}[{i}]" + ) + + def assert_tuple_unordered_subset(actual_tuple, expected_tuple, raw_path, pretty_path): + assert isinstance(actual_tuple, tuple), ( + f"{prefix}Expected tuple at {fmt_path(pretty_path)}, " + f"got {type(actual_tuple).__name__}" + ) + # pprint.pprint(f"[flux_jobspec_check][assert_tuple_unordered_subset]: id(ignore_pred) = {id(ignore_pred)}") + available_indices = [ + i for i in range(len(actual_tuple)) + if not ignore_pred(raw_path, i, actual_tuple[i], pretty_path) + ] + + def consume(idx): + try: + available_indices.remove(idx) + return True + except ValueError: + return False + + for i, exp_item in enumerate(expected_tuple): + matched_idx = None + for idx in list(available_indices): + try: + assert_nested_subset( + actual_tuple[idx], exp_item, + raw_path + (idx,), pretty_path + [f"({idx})"] + ) + matched_idx = idx + break + except AssertionError: + continue + assert matched_idx is not None, ( + f"{prefix}No matching tuple element for expected item at " + f"{fmt_path(pretty_path)}({i})" + ) + assert consume(matched_idx), ( + f"{prefix}Internal error, matched tuple index {matched_idx} could not be consumed at " + f"{fmt_path(pretty_path)}({i})" + ) + + def assert_nested_subset(actual, expected, raw_path, pretty_path): + if isinstance(expected, dict): + return assert_dict_subset(actual, expected, raw_path, pretty_path) + if isinstance(expected, list): + if callable(list_matcher): + return assert_list_unordered_subset(actual, expected, raw_path, pretty_path) + return assert_list_positional_subset(actual, expected, raw_path, pretty_path) + if isinstance(expected, tuple): + return assert_tuple_unordered_subset(actual, expected, raw_path, pretty_path) + return assert_equal_scalar(actual, expected, raw_path, pretty_path) + + try: + assert isinstance(jobspec, dict), ( + f"{prefix}Top-level jobspec is not a dict, got {type(jobspec).__name__}" + ) + actual_root = jobspec + raw_path = tuple() + pretty_path = [] + + # Descend initial path + if path: + for segment in path: + # If this segment is ignored, stop descending and compare at current level + # pprint.pprint(f"[flux_jobspec_check][main]: id(ignore_pred) = {id(ignore_pred)}") + if ignore_pred(raw_path, segment, actual_root.get(segment, None), pretty_path): + break + assert isinstance(actual_root, dict), ( + f"{prefix}Expected dict while walking path at {fmt_path(pretty_path)}, " + f"got {type(actual_root).__name__}" ) - for i, item in enumerate(expected_value): - if item is not ...: # Ellipsis means "skip this index" - assert_nested_dict_subset(actual_value[i], - item, - current_path + [str(i)]) - else: - assert actual_value == expected_value, ( - f"Value mismatch at {'.'.join(current_path)}: " - f"expected {expected_value!r}, got {actual_value!r}" + assert segment in actual_root, ( + f"{prefix}Missing key while walking path at {fmt_path(pretty_path + [repr(segment)])}" ) + actual_root = actual_root[segment] + raw_path += (segment,) + pretty_path.append(repr(segment)) + + assert_nested_subset(actual_root, expected, raw_path, pretty_path) - try: - assert_nested_dict_subset(jobspec, expected, path=path) except AssertionError: - jobspec_str = pprint.pformat(jobspec, width=80) + jobspec_str = pprint.pformat(jobspec, width=100) + expected_str = pprint.pformat(expected, width=100) request.node.add_report_section( - "call", "jobspec", f"\n=== Jobspec dump on failure for job {jobid} ===\n{jobspec_str}\n=== End jobspec dump ===\n") + "call", + "jobspec", + ( + f"\n=== Jobspec dump on failure for job {jobid} " + f"(expected source: {source_label or 'unknown'}) ===\n{jobspec_str}\n" + f"=== Expected structure ===\n{expected_str}\n" + "=== End jobspec dump ===\n" + ), + ) raise return _diff_jobspec_keys + + +def build_ignore_predicate(ignore_keys, debug=False, debug_log=None): + """ + Build a composite predicate with support for multiple specs. + + Returns predicate(raw_path_tuple, key_or_index_raw, actual_value, pretty_path_list) -> bool. + + Supported input forms for ignore_keys: + - None + - Single predicate callable + - Single path spec: dotted string "a.b.1.c.2" or tuple ("a", "b", 1, "c", 2) + - Collection of mixed specs: [predicate, {"timestamp"}, "tasks.0.command.3", ("tasks", 0, "count")] + - Sets/lists of simple names: {"timestamp", "uuid"} + + Matching rules: + - Full path match compares tuple(raw_path) + (key_or_index,) against normalized path set. + - Simple name match applies when key_or_index is a str and appears in simple_names. + - Custom predicates are called with raw_path, key_or_index, actual_value. + - If any matcher returns True, the element is ignored. + + Debugging: + - If debug=True, every ignore decision is logged with context. + - Provide debug_log(msg) to route logs, otherwise print is used. + """ + if not debug_log and debug: + debug_log = pprint.pprint + + # Normalize all specs into: + normalized_paths = set() # set of tuples of raw segments + simple_names = set() # set of str names + custom_preds = [] # list of callables(raw_path, key_or_index, actual_value) -> bool + + # pprint.pprint(f"[build-pred][main]: post-spec addition, id(normalized_paths) = {id(normalized_paths)}") + def parse_path_str(s): + parts = s.split(".") + parsed = [] + for p in parts: + if p.isdigit(): + parsed.append(int(p)) + else: + parsed.append(p) + return tuple(parsed) + + def add_spec(spec): + if spec is None: + return + if callable(spec): + custom_preds.append(spec) + return + if isinstance(spec, str): + normalized_paths.add(parse_path_str(spec)) + return + if isinstance(spec, tuple): + normalized_paths.add(spec) + return + # If it is a collection, inspect elements + if isinstance(spec, (list, set, tuple)): + # Heuristically treat string elements as simple names unless they look like paths + for x in spec: + if callable(x): + custom_preds.append(x) + elif isinstance(x, str): + # Decide if this is a dotted path or just a name + if "." in x or x.isdigit(): + normalized_paths.add(parse_path_str(x)) + else: + simple_names.add(x) + elif isinstance(x, tuple): + normalized_paths.add(x) + else: + # ignore non-supported types quietly + pass + return + # Fallback, unsupported type ignored + + # Accept single or multiple specs + if isinstance(ignore_keys, (list, tuple, set)): + for spec in ignore_keys: + # pprint.pprint(f"[build-pred]: Adding {spec=}") + add_spec(spec) + + # pprint.pprint(f"[build-pred]: Normalized paths: {normalized_paths=}, types: {list(tuple(type(pi) for pi in i) for i in normalized_paths)}") + else: + add_spec(ignore_keys) + # pprint.pprint(f"[build-pred][main]: post-spec addition, id(normalized_paths) = {id(normalized_paths)}") + def match(raw_path, key_or_index, actual_value): + full = tuple(raw_path) + (key_or_index,) + # pprint.pprint(f"[ignore-pred][match]: {full=}, types: {tuple(type(i) for i in full)}") + # pprint.pprint(f"[ignore-pred][match]: {normalized_paths=}, types: {list(tuple(type(pi) for pi in i) for i in normalized_paths)}") + # pprint.pprint(f"[build_ignore_predicate][match]: id(normalized_paths) = {id(normalized_paths)}") + + if full in normalized_paths: + return True + if isinstance(key_or_index, str) and key_or_index in simple_names: + return True + for pred in custom_preds: + try: + if pred(raw_path, key_or_index, actual_value): + return True + except Exception: + # Do not break matching on predicate error, treat as False + continue + return False + + def composite(raw_path, key_or_index, actual_value, pretty_path): + # pprint.pprint(f"[build_ignore_predicate][composite]: pre-match: id(normalized_paths) = {id(normalized_paths)}") + result = match(raw_path, key_or_index, actual_value) + if debug: + which = [] + if tuple(raw_path) + (key_or_index,) in normalized_paths: + which.append("path") + if isinstance(key_or_index, str) and key_or_index in simple_names: + which.append("name") + for idx, pred in enumerate(custom_preds): + try: + if pred(raw_path, key_or_index, actual_value): + which.append(f"pred[{idx}]") + except Exception: + pass + msg = ( + f"[ignore-debug] raw_path={raw_path}, key_or_index={key_or_index!r}, " + f"pretty_path={'.'.join(pretty_path) if pretty_path else ''}, " + f"actual_value={actual_value!r}, ignored={result}, via={','.join(which) or 'none'}" + ) + debug_log(msg) + return result + + return composite + + +class FluxJobspecError(Exception): + pass + + +@pytest.fixture +def generate_jobspec_from_script(): + """ + Pytest fixture that returns a helper callable: + generate_jobspec_from_script(script_path, extra_args=None) -> Dict[str, Any] + + It runs `flux batch --dry-run