diff --git a/.github/workflows/apptainer-build-deploy.yml b/.github/workflows/apptainer-build-deploy.yml index 00fa8786..f29fa2da 100644 --- a/.github/workflows/apptainer-build-deploy.yml +++ b/.github/workflows/apptainer-build-deploy.yml @@ -25,7 +25,7 @@ jobs: contents: write container: - image: quay.io/singularity/singularity:v3.8.1 + image: apptainer/apptainer:1.2.5 options: --privileged steps: diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index 07f2a1b9..10a56d53 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -3,6 +3,7 @@ name: Release Documentation permissions: write-all on: + pull_request: push: branches: [main] workflow_dispatch: @@ -25,7 +26,7 @@ jobs: pixi-version: v0.62.2 locked: true cache: true - cache-write: ${{ github.ref == 'refs/heads/main' }} + cache-write: false environments: dev - name: Build Docs @@ -35,6 +36,7 @@ jobs: - name: deploy uses: peaceiris/actions-gh-pages@4f9cc6602d3f66b9c108549d475ec49e8ef4d45e + if: startsWith(github.ref, 'refs/tags/v') with: github_token: ${{ secrets.GITHUB_TOKEN }} publish_dir: ./docs/_build/html diff --git a/.github/workflows/super_linter.yml b/.github/workflows/super_linter.yml index 15b7cbf5..96e42f74 100644 --- a/.github/workflows/super_linter.yml +++ b/.github/workflows/super_linter.yml @@ -48,6 +48,7 @@ jobs: VALIDATE_PYTHON_PYINK: false VALIDATE_PYTHON_PYLINT: false VALIDATE_PYTHON_RUFF: false + VALIDATE_PYTHON_RUFF_FORMAT: false VALIDATE_SPELL_CODESPELL: false VALIDATE_YAML: false VALIDATE_YAML_PRETTIER: false diff --git a/docs/source/api.rst b/docs/source/api.rst index dd7c1023..2670ecb0 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -1,3 +1,5 @@ +:orphan: + .. autosummary:: :toctree: _autosummary :template: custom-module-template.rst diff --git a/docs/source/conf.py b/docs/source/conf.py index 004aa285..08ea1f84 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -111,6 +111,13 @@ # Avoid errors with self-signed certificates tls_verify = False +# Avoid warning about api.rst not in TOC +suppress_warnings = ["toc.not_included", "misc.highlighting_failure"] + +nitpick_ignore = [ + ("py:class", "gaps.config._ConfigType"), +] + # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for @@ -208,6 +215,74 @@ ), ] + +def _skip_pydantic_methods(name, obj): + return name in { + "model_dump_json", + "model_json_schema", + "model_dump", + "model_construct", + "model_copy", + "model_fields", + "model_computed_fields", + "model_rebuild", + "model_parametrized_name", + "model_post_init", + "model_validate", + "model_validate_json", + "model_validate_strings", + "copy", + "construct", + "dict", + "from_orm", + "json", + "parse_file", + "parse_obj", + "parse_raw", + "schema", + "schema_json", + "update_forward_refs", + "validate", + } and "BaseModel" in str(obj) + + +def _skip_builtin_methods(name, obj): + if name in { + "clear", + "pop", + "popitem", + "setdefault", + "update", + } and "MutableMapping" in str(obj): + return True + + if name in {"items", "keys", "values"} and "Mapping" in str(obj): + return True + + return name in {"copy", "get"} and "UserDict" in str(obj) + + +def _skip_internal_api(name, obj): + if (getattr(obj, "__doc__", None) or "").startswith("[NOT PUBLIC API]"): + return True + + return name in {"copy", "fromkeys"} and "Status" in str(obj) + + +def _skip_member(app, what, name, obj, skip, options): + if ( + _skip_internal_api(name, obj) + or _skip_builtin_methods(name, obj) + or _skip_pydantic_methods(name, obj) + ): + return True + return None + + +def setup(app): + app.connect("autodoc-skip-member", _skip_member) + + # -- Extension configuration ------------------------------------------------- autosummary_generate = True # Turn on sphinx.ext.autosummary diff --git a/gaps/batch.py b/gaps/batch.py index f72c0af4..82f682c5 100644 --- a/gaps/batch.py +++ b/gaps/batch.py @@ -63,7 +63,7 @@ def __init__(self, config): @property def job_table(self): - """pd.DataFrame: Batch job summary table""" + """pandas.DataFrame: Batch job summary table""" jobs = [] for job_tag, (arg_comb, file_set, set_tag) in self._sets.items(): job_info = {k: str(v) for k, v in arg_comb.items()} diff --git a/gaps/cli/config.py b/gaps/cli/config.py index 6f410ace..78e3d593 100644 --- a/gaps/cli/config.py +++ b/gaps/cli/config.py @@ -567,7 +567,7 @@ def run_with_status_updates( status_update_args : iterable An iterable containing the first three initializer arguments for - :class:`StatusUpdates`. + :class:`~gaps.status.StatusUpdates`. exclude : collection | None A collection (list, set, dict, etc.) of keys that should be excluded from the job status file that is written before/after diff --git a/gaps/cli/documentation.py b/gaps/cli/documentation.py index 093f08b1..5bdd54ce 100644 --- a/gaps/cli/documentation.py +++ b/gaps/cli/documentation.py @@ -389,6 +389,7 @@ class CommandDocumentation: """ REQUIRED_TAG = "[REQUIRED]" + """Tag to indicate required parameters in generated templates""" def __init__(self, *functions, skip_params=None, is_split_spatially=False): """ diff --git a/gaps/cli/preprocessing.py b/gaps/cli/preprocessing.py index e519b77b..a125754f 100644 --- a/gaps/cli/preprocessing.py +++ b/gaps/cli/preprocessing.py @@ -22,7 +22,8 @@ def split_project_points_into_ranges(config): ---------- config : dict Run config. This config must have a "project_points" input that - can be used to initialize :class:`ProjectPoints`. + can be used to initialize + :class:`gaps.project_points.ProjectPoints`. Returns ------- diff --git a/gaps/collection.py b/gaps/collection.py index 0753fea3..9c30f8da 100644 --- a/gaps/collection.py +++ b/gaps/collection.py @@ -570,7 +570,7 @@ def _check_meta(self, meta): Parameters ---------- - meta : :class:`pd.DataFrame` + meta : pandas.DataFrame DataFrame of combined meta from all files in `self.h5_files`. Duplicate GIDs are dropped and a warning is raised. @@ -897,7 +897,7 @@ def parse_meta(h5_file): Returns ------- - meta : :class:`pd.DataFrame` + meta : pandas.DataFrame Portion of meta data corresponding to sites in `h5_file`. """ with Resource(h5_file) as res: diff --git a/gaps/config.py b/gaps/config.py index 92c75892..619b5d9a 100644 --- a/gaps/config.py +++ b/gaps/config.py @@ -77,6 +77,7 @@ class JSONHandler(Handler): """JSON config file handler""" FILE_EXTENSION = "json" + """Expected file extension for JSON config files""" @classmethod def dump(cls, config, stream): @@ -98,6 +99,7 @@ class JSON5Handler(Handler): """JSON5 config file handler""" FILE_EXTENSION = "json5" + """Expected file extension for JSON5 config files""" @classmethod def dump(cls, config, stream): @@ -127,6 +129,7 @@ class YAMLHandler(Handler): """YAML config file handler""" FILE_EXTENSION = "yaml", "yml" + """Expected file extensions for YAML config files""" @classmethod def dump(cls, config, stream): @@ -148,6 +151,7 @@ class TOMLHandler(Handler): """TOML config file handler""" FILE_EXTENSION = "toml" + """Expected file extension for TOML config files""" @classmethod def dump(cls, config, stream): @@ -176,6 +180,7 @@ def _new_post_hook(cls, obj, value): obj.load = _CONFIG_HANDLER_REGISTRY[value].load obj.loads = _CONFIG_HANDLER_REGISTRY[value].loads obj.write = _CONFIG_HANDLER_REGISTRY[value].write + obj.__doc__ = f"{value} config" return obj @@ -190,7 +195,7 @@ def _new_post_hook(cls, obj, value): def config_as_str_for_docstring( - config, config_type=ConfigType.JSON, num_spaces=12 + config, config_type=str(ConfigType.JSON), num_spaces=12 ): """Convert a config into a string to be used within a docstring. @@ -202,11 +207,11 @@ def config_as_str_for_docstring( config : dict Dictionary containing the configuration to be converted into docstring format. - config_type : :class:`ConfigType`, optional + config_type : ConfigType, default="json" A :class:`ConfigType` enumeration value specifying what type - of config file to generate. By default, :attr:`ConfigType.JSON`. - num_spaces : int, optional - Number of spaces to add after a newline. By default, `12`. + of config file to generate. By default, "json". + num_spaces : int, default=12 + Number of spaces to add after a newline. By default, ``12``. Returns ------- diff --git a/gaps/hpc.py b/gaps/hpc.py index db7290d0..795ea5a3 100644 --- a/gaps/hpc.py +++ b/gaps/hpc.py @@ -29,9 +29,9 @@ class HpcJobManager(ABC): """Abstract HPC job manager framework""" # set a max job name length, will raise error if too long. - MAX_NAME_LEN = 100 + _MAX_NAME_LEN = 100 - SHELL_FILENAME_FMT = "{}.sh" + _SHELL_FILENAME_FMT = "{}.sh" def __init__(self, user=None, queue_dict=None): """ @@ -206,7 +206,7 @@ def submit( self._setup_submission(name, **kwargs) out, err = submit( - f"{self.COMMANDS.SUBMIT} {self.SHELL_FILENAME_FMT.format(name)}" + f"{self.COMMANDS.SUBMIT} {self._SHELL_FILENAME_FMT.format(name)}" ) out = self._teardown_submission(name, out, err, keep_sh=keep_sh) return out, err @@ -222,10 +222,10 @@ def _validate_command_not_none(self, command): def _validate_name_length(self, name): """Validate that the name does not exceed max length""" - if len(name) > self.MAX_NAME_LEN: + if len(name) > self._MAX_NAME_LEN: msg = ( f"Cannot submit job with name longer than " - f"{self.MAX_NAME_LEN} chars: {name!r}" + f"{self._MAX_NAME_LEN} chars: {name!r}" ) raise gapsValueError(msg) @@ -236,12 +236,12 @@ def _setup_submission(self, name, **kwargs): script = self.make_script_str(name, **kwargs) - make_sh(self.SHELL_FILENAME_FMT.format(name), script) + make_sh(self._SHELL_FILENAME_FMT.format(name), script) def _teardown_submission(self, name, out, err, keep_sh=False): """Remove submission file and mark job as submitted""" if not keep_sh: - Path(self.SHELL_FILENAME_FMT.format(name)).unlink() + Path(self._SHELL_FILENAME_FMT.format(name)).unlink() if err: warn( @@ -316,11 +316,13 @@ class PBS(HpcJobManager): """Subclass for PBS subprocess jobs""" COLUMN_HEADERS = Q_COLUMNS(NAME="Name", ID="Job id", STATUS="S") + """PBS output column header names""" - # String representing the submitted status for this manager Q_SUBMITTED_STATUS = "Q" + """String representing the submitted status for this manager""" COMMANDS = COMMANDS(SUBMIT="qsub", CANCEL="qdel") # cspell:disable-line + """Submission and cancellation command names for this manager""" def query_queue(self): """Run the PBS qstat command and return the raw stdout string. @@ -431,12 +433,15 @@ class SLURM(HpcJobManager): # cspell:disable-next-line COLUMN_HEADERS = Q_COLUMNS(NAME="NAME", ID="JOBID", STATUS="ST") + """SLURM output column names""" # String representing the submitted status for this manager Q_SUBMITTED_STATUS = "PD" + """String representing the submitted status for this manager""" # cspell:disable-next-line COMMANDS = COMMANDS(SUBMIT="sbatch", CANCEL="scancel") + """Submission and cancellation command names for this manager""" def query_queue(self): """Run the HPC queue command and return the raw stdout string. @@ -449,7 +454,7 @@ def query_queue(self): """ cmd = ( f'squeue -u {self._user} --format="%.15i %.30P ' - f'%.{self.MAX_NAME_LEN}j %.20u %.10t %.15M %.25R %q"' + f'%.{self._MAX_NAME_LEN}j %.20u %.10t %.15M %.25R %q"' ) stdout, _ = submit(cmd) return _skip_q_rows(stdout) diff --git a/gaps/pipeline.py b/gaps/pipeline.py index ac3f87f2..4dd59881 100644 --- a/gaps/pipeline.py +++ b/gaps/pipeline.py @@ -24,6 +24,7 @@ class PipelineStep: """A Pipeline Config step""" COMMAND_KEY = "command" + """Key in pipeline step dictionary that specifies the command""" _KEYS_PER_STEP = 2 def __init__(self, step_dict): @@ -61,6 +62,7 @@ class Pipeline: """gaps pipeline execution framework""" COMMANDS = {} + """Pipeline command registry""" def __init__(self, pipeline, monitor=True): """ diff --git a/gaps/project_points.py b/gaps/project_points.py index 64813eca..d8cd1bf5 100644 --- a/gaps/project_points.py +++ b/gaps/project_points.py @@ -24,14 +24,14 @@ def _parse_sites(points): Parameters ---------- - points : int | str | pd.DataFrame | slice | list + points : int | str | pandas.DataFrame | slice | list Slice specifying project points, string pointing to a project points csv, or a DataFrame containing the effective csv contents. Can also be a single integer site value. Returns ------- - df : pd.DataFrame + df : pandas.DataFrame DataFrame of sites (gids) with corresponding args Raises @@ -60,7 +60,7 @@ def __init__(self, points, **kwargs): Parameters ---------- - points : int | slice | list | tuple | str | pd.DataFrame | dict + points : int | slice | list | tuple | str | pandas.DataFrame Slice specifying project points, string pointing to a project points csv, or a DataFrame containing the effective csv contents. Can also be a single integer site value. @@ -79,14 +79,14 @@ def _parse_points(self, points, **kwargs): Parameters ---------- - points : int | str | pd.DataFrame | slice | list | dict + points : int | str | pandas.DataFrame | slice | list | dict Slice specifying project points, string pointing to a project points csv, or a DataFrame containing the effective csv contents. Can also be a single integer site value. Returns ------- - df : pd.DataFrame + df : pandas.DataFrame DataFrame of sites (gids) with corresponding args """ try: @@ -129,7 +129,7 @@ def __getitem__(self, site_id): Returns ------- - site : pd.Series + site : pandas.Series Pandas Series containing information for the site with the requested site_id. """ @@ -155,7 +155,7 @@ def __len__(self): @property def df(self): - """pd.DataFrame: Project points DataFrame of site info""" + """pandas.DataFrame: Project points DataFrame of site info""" return self._df @property @@ -209,7 +209,7 @@ def join_df(self, df2, key="gid"): Parameters ---------- - df2 : pd.DataFrame + df2 : pandas.DataFrame DataFrame to be joined to the :attr:`df` attribute (this instance of project points DataFrame). This likely contains site-specific inputs that are to be passed to parallel @@ -279,7 +279,7 @@ def from_range(cls, split_range, points, **kwargs): split_range : 2-tuple Tuple containing the start and end index (iloc, not loc). Last index is not included. - points : int | slice | list | tuple | str | pd.DataFrame | dict + points : int | slice | list | tuple | str | pandas.DataFrame Slice specifying project points, string pointing to a project points csv, or a DataFrame containing the effective csv contents. Can also be a single integer site value. diff --git a/gaps/status.py b/gaps/status.py index 62438db6..8bb932ae 100644 --- a/gaps/status.py +++ b/gaps/status.py @@ -31,19 +31,46 @@ class StatusField(CaseInsensitiveEnum): """A collection of required status fields in a status file""" JOB_ID = "job_id" + """Key for Job ID from HPC scheduler""" + JOB_STATUS = "job_status" + """Key for job status string""" + PIPELINE_INDEX = "pipeline_index" + """Key for pipeline index of the job's pipeline step""" + HARDWARE = "hardware" + """Key for hardware option that the job is running on""" + QOS = "qos" + """Key for QOS option that the job is running with""" + OUT_FILE = "out_file" + """Key for output file path of the job""" + TIME_SUBMITTED = "time_submitted" + """Key for time that the job was submitted to the HPC scheduler""" + TIME_START = "time_start" + """Key for time that the job started running on the HPC""" + TIME_END = "time_end" + """Key for time that the job finished running on the HPC""" + TOTAL_RUNTIME = "total_runtime" + """Key for total runtime of the job in seconds""" + RUNTIME_SECONDS = "runtime_seconds" + """Key for runtime of the job in seconds""" + MONITOR_PID = "monitor_pid" + """Key for PID of the monitoring process""" + STDOUT_LOG = "stdout_log" + """Key for stdout log file path of the job""" + STDOUT_ERR_LOG = "stdout_err_log" + """Key for stderr log file path of the job""" class HardwareOption(CaseInsensitiveEnum): @@ -140,11 +167,22 @@ class StatusOption(CaseInsensitiveEnum): """A collection of job status options""" NOT_SUBMITTED = "not submitted" + """Not submitted to HPC scheduler""" + SUBMITTED = "submitted" + """Submitted to HPC scheduler but not yet running""" + RUNNING = "running" + """Running on HPC""" + SUCCESSFUL = "successful" + """Finished running on HPC with a successful exit code""" + FAILED = "failed" + """Finished running on HPC with a failed exit code""" + COMPLETE = "complete" + """Pipeline is complete""" @classmethod def _new_post_hook(cls, obj, value): @@ -219,9 +257,16 @@ class Status(UserDict): StatusField.QOS.value, ] HIDDEN_SUB_DIR = ".gaps" + """Subdirectory to store the GAPd status json file""" + MONITOR_PID_FILE = "monitor_pid.json" + """JSON file that records the PID of the monitoring process""" + JOB_STATUS_FILE = "jobstatus_{}.json" + """Filename pattern for individual job status files""" + NAMED_STATUS_FILE = "{}_status.json" + """Main JSON status file containing all job statuses""" def __init__(self, status_dir): """Initialize `Status`. diff --git a/tests/test_hpc.py b/tests/test_hpc.py index 274fa842..4c17d8da 100644 --- a/tests/test_hpc.py +++ b/tests/test_hpc.py @@ -1,8 +1,4 @@ -# -*- coding: utf-8 -*- -# pylint: disable=too-many-locals -""" -GAPs HPC job managers tests. -""" +"""GAPs HPC job managers tests""" import shlex import subprocess @@ -120,7 +116,7 @@ def test_job_name_too_long(manager): """Test submission fails if name too long.""" hpc_manager = manager() with pytest.raises(gapsValueError): - hpc_manager.submit("".join(["a"] * (manager.MAX_NAME_LEN * 2))) + hpc_manager.submit("".join(["a"] * (manager._MAX_NAME_LEN * 2))) @pytest.mark.parametrize( @@ -247,7 +243,7 @@ def test_hpc_submit(manager, q_str, kwargs, expectation, add_qos, monkeypatch): name = "submit_test" cmd_cache = [] - fn_sh = Path(manager.SHELL_FILENAME_FMT.format(name)) + fn_sh = Path(manager._SHELL_FILENAME_FMT.format(name)) def _test_submit(cmd): cmd_cache.append(cmd)