diff --git a/.github/workflows/lint_test_coverage.yml b/.github/workflows/lint_test_coverage.yml index 9a0bd2e..3e057a0 100644 --- a/.github/workflows/lint_test_coverage.yml +++ b/.github/workflows/lint_test_coverage.yml @@ -1,14 +1,11 @@ -# This workflow will install Python dependencies, run tests and lint with a variety of Python versions -# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python - -name: Test Coverage Lint +name: TestCov Lint on: push: - branches-ignore: + branches: - main + - release-* pull_request: - branches: [ "main", "$default_branch" ] jobs: build: @@ -25,16 +22,16 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - - name: Install poetry and the package dependencies + - name: Dependency installation + if: github.event_name == 'pull_request' || github.event_name == 'push' run: | python -m pip install --upgrade pip - python -m pip install poetry nox nox-poetry - - name: Nox TEST session - run: | - nox -s test - - name: Nox COVERAGE session + python -m pip install poetry nox nox-poetry + - name: TestCoverage + if: github.event_name == 'pull_request' || github.event_name == 'push' run: | nox -s coverage - - name: Nox LINTING session + - name: Linting + if: github.event_name == 'pull_request' || github.event_name == 'push' run: | - nox -s lint + nox -s lint \ No newline at end of file diff --git a/.gitignore b/.gitignore index f3142f4..249255f 100644 --- a/.gitignore +++ b/.gitignore @@ -166,7 +166,7 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -# Apple +# Apple .DS_store # VSCode diff --git a/NOTICE b/NOTICE index 97e6a43..1f91b06 100644 --- a/NOTICE +++ b/NOTICE @@ -1,4 +1,4 @@ ageml Copyright 2023 The ageml Developers -This product includes software developed at the Computational Neuroimaging Laboratory of the Biobizkaia Health Research Institute, Barakaldo, Basque Country. \ No newline at end of file +This product includes software developed at the Computational Neuroimaging Laboratory of the Biobizkaia Health Research Institute, Barakaldo, Basque Country. diff --git a/bin/scripts/debug_classify.py b/bin/scripts/debug_classify.py index d5976e9..f7e5d35 100644 --- a/bin/scripts/debug_classify.py +++ b/bin/scripts/debug_classify.py @@ -1,4 +1,4 @@ import ageml.commands as commands -if __name__ == '__main__': - commands.clinical_classify() \ No newline at end of file +if __name__ == "__main__": + commands.clinical_classify() diff --git a/bin/scripts/debug_clinical.py b/bin/scripts/debug_clinical.py index 8410e0f..640ddf6 100644 --- a/bin/scripts/debug_clinical.py +++ b/bin/scripts/debug_clinical.py @@ -1,4 +1,4 @@ import ageml.commands as commands -if __name__ == '__main__': - commands.clinical_groups() \ No newline at end of file +if __name__ == "__main__": + commands.clinical_groups() diff --git a/bin/scripts/debug_factors.py b/bin/scripts/debug_factors.py index dc20cb2..6cffd0c 100644 --- a/bin/scripts/debug_factors.py +++ b/bin/scripts/debug_factors.py @@ -1,4 +1,4 @@ import ageml.commands as commands -if __name__ == '__main__': - commands.factor_correlation() \ No newline at end of file +if __name__ == "__main__": + commands.factor_correlation() diff --git a/bin/scripts/debug_model_age.py b/bin/scripts/debug_model_age.py index 8da0e5c..5f22af1 100644 --- a/bin/scripts/debug_model_age.py +++ b/bin/scripts/debug_model_age.py @@ -1,4 +1,4 @@ import ageml.commands as commands -if __name__ == '__main__': - commands.ModelAge() \ No newline at end of file +if __name__ == "__main__": + commands.ModelAge() diff --git a/docs/CONTRIBUTING.md b/docs/CONTRIBUTING.md index 2832b8a..04638b7 100644 --- a/docs/CONTRIBUTING.md +++ b/docs/CONTRIBUTING.md @@ -27,15 +27,15 @@ ageml │ ├──CONTRIBUTING.md # Contribution Guidelines. │ └──... │ -├── resources # Folder with figures and other supporting files +├── resources # Folder with figures and other supporting files │ │ ├── src # Contains all the source code of the package -│ └── ageml -│ ├── ... +│ └── ageml +│ ├── ... │ ├── my_awesome_subpkg1 │ │ ├── __init__.py -│ │ └── awesome_fun.py +│ │ └── awesome_fun.py │ └── my_awesome_subpkg2 │ ├── __init__.py │ └── awesome_fun.py @@ -49,8 +49,8 @@ ageml │ └── test_awesome_fun │ │ -├── .coverage # File to measure code coverage, percentage of tested code lines -├── README.md +├── .coverage # File to measure code coverage, percentage of tested code lines +├── README.md ├── pyproject.toml # Requirements for environment settings, packaging and so on ├── poetry.lock # Dependency for building the system ├── noxfile.py # Defines the linting, coverage, pytest sessions diff --git a/noxfile.py b/noxfile.py index 7b689ec..756c7c8 100644 --- a/noxfile.py +++ b/noxfile.py @@ -22,6 +22,6 @@ def coverage(s: Session) -> None: @session(reuse_venv=True) def lint(s: Session) -> None: - # Run pyproject-flake8 entrypoint to support reading configuration from pyproject.toml. + # Run the ruff linter, way faster than flake8 s.run("poetry", "install", external=True) - s.run("flake8") + s.run("ruff", "check") diff --git a/poetry.lock b/poetry.lock index 43af8d7..741ddf0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -136,6 +136,17 @@ files = [ [package.dependencies] pycparser = "*" +[[package]] +name = "cfgv" +version = "3.4.0" +description = "Validate configuration and produce human readable error messages." +optional = false +python-versions = ">=3.8" +files = [ + {file = "cfgv-3.4.0-py2.py3-none-any.whl", hash = "sha256:b7265b1f29fd3316bfcd2b330d63d024f2bfd8bcb8b0272f8e19a504856c48f9"}, + {file = "cfgv-3.4.0.tar.gz", hash = "sha256:e52591d4c5f5dead8e0f673fb16db7949d2cfb3f7da4582893288f0ded8fe560"}, +] + [[package]] name = "cloudpickle" version = "3.0.0" @@ -572,6 +583,20 @@ dev = ["black", "nose", "pre-commit", "pytest"] mongotrials = ["pymongo"] sparktrials = ["pyspark"] +[[package]] +name = "identify" +version = "2.6.1" +description = "File identification library for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "identify-2.6.1-py2.py3-none-any.whl", hash = "sha256:53863bcac7caf8d2ed85bd20312ea5dcfc22226800f6d6881f232d861db5a8f0"}, + {file = "identify-2.6.1.tar.gz", hash = "sha256:91478c5fb7c3aac5ff7bf9b4344f803843dc586832d5f110d672b19aa1984c98"}, +] + +[package.extras] +license = ["ukkonen"] + [[package]] name = "importlib-metadata" version = "6.8.0" @@ -969,6 +994,17 @@ doc = ["nb2plots (>=0.7)", "nbconvert (<7.9)", "numpydoc (>=1.6)", "pillow (>=9. extra = ["lxml (>=4.6)", "pydot (>=1.4.2)", "pygraphviz (>=1.11)", "sympy (>=1.10)"] test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] +[[package]] +name = "nodeenv" +version = "1.9.1" +description = "Node.js virtual environment builder" +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"}, + {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"}, +] + [[package]] name = "nox" version = "2023.4.22" @@ -1307,6 +1343,24 @@ files = [ dev = ["pre-commit", "tox"] testing = ["pytest", "pytest-benchmark"] +[[package]] +name = "pre-commit" +version = "3.8.0" +description = "A framework for managing and maintaining multi-language pre-commit hooks." +optional = false +python-versions = ">=3.9" +files = [ + {file = "pre_commit-3.8.0-py2.py3-none-any.whl", hash = "sha256:9a90a53bf82fdd8778d58085faf8d83df56e40dfe18f45b19446e26bf1b3a63f"}, + {file = "pre_commit-3.8.0.tar.gz", hash = "sha256:8bb6494d4a20423842e198980c9ecf9f96607a07ea29549e180eef9ae80fe7af"}, +] + +[package.dependencies] +cfgv = ">=2.0.0" +identify = ">=1.0.0" +nodeenv = ">=0.11.1" +pyyaml = ">=5.1" +virtualenv = ">=20.10.0" + [[package]] name = "prompt-toolkit" version = "3.0.43" @@ -1535,6 +1589,68 @@ files = [ {file = "pywin32-306-cp39-cp39-win_amd64.whl", hash = "sha256:39b61c15272833b5c329a2989999dcae836b1eed650252ab1b7bfbe1d59f30f4"}, ] +[[package]] +name = "pyyaml" +version = "6.0.2" +description = "YAML parser and emitter for Python" +optional = false +python-versions = ">=3.8" +files = [ + {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"}, + {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8824b5a04a04a047e72eea5cec3bc266db09e35de6bdfe34c9436ac5ee27d237"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7c36280e6fb8385e520936c3cb3b8042851904eba0e58d277dca80a5cfed590b"}, + {file = "PyYAML-6.0.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ec031d5d2feb36d1d1a24380e4db6d43695f3748343d99434e6f5f9156aaa2ed"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:936d68689298c36b53b29f23c6dbb74de12b4ac12ca6cfe0e047bedceea56180"}, + {file = "PyYAML-6.0.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:23502f431948090f597378482b4812b0caae32c22213aecf3b55325e049a6c68"}, + {file = "PyYAML-6.0.2-cp310-cp310-win32.whl", hash = "sha256:2e99c6826ffa974fe6e27cdb5ed0021786b03fc98e5ee3c5bfe1fd5015f42b99"}, + {file = "PyYAML-6.0.2-cp310-cp310-win_amd64.whl", hash = "sha256:a4d3091415f010369ae4ed1fc6b79def9416358877534caf6a0fdd2146c87a3e"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cc1c1159b3d456576af7a3e4d1ba7e6924cb39de8f67111c735f6fc832082774"}, + {file = "PyYAML-6.0.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1e2120ef853f59c7419231f3bf4e7021f1b936f6ebd222406c3b60212205d2ee"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d225db5a45f21e78dd9358e58a98702a0302f2659a3c6cd320564b75b86f47c"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5ac9328ec4831237bec75defaf839f7d4564be1e6b25ac710bd1a96321cc8317"}, + {file = "PyYAML-6.0.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ad2a3decf9aaba3d29c8f537ac4b243e36bef957511b4766cb0057d32b0be85"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:ff3824dc5261f50c9b0dfb3be22b4567a6f938ccce4587b38952d85fd9e9afe4"}, + {file = "PyYAML-6.0.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:797b4f722ffa07cc8d62053e4cff1486fa6dc094105d13fea7b1de7d8bf71c9e"}, + {file = "PyYAML-6.0.2-cp311-cp311-win32.whl", hash = "sha256:11d8f3dd2b9c1207dcaf2ee0bbbfd5991f571186ec9cc78427ba5bd32afae4b5"}, + {file = "PyYAML-6.0.2-cp311-cp311-win_amd64.whl", hash = "sha256:e10ce637b18caea04431ce14fabcf5c64a1c61ec9c56b071a4b7ca131ca52d44"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:c70c95198c015b85feafc136515252a261a84561b7b1d51e3384e0655ddf25ab"}, + {file = "PyYAML-6.0.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ce826d6ef20b1bc864f0a68340c8b3287705cae2f8b4b1d932177dcc76721725"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f71ea527786de97d1a0cc0eacd1defc0985dcf6b3f17bb77dcfc8c34bec4dc5"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:9b22676e8097e9e22e36d6b7bda33190d0d400f345f23d4065d48f4ca7ae0425"}, + {file = "PyYAML-6.0.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:80bab7bfc629882493af4aa31a4cfa43a4c57c83813253626916b8c7ada83476"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:0833f8694549e586547b576dcfaba4a6b55b9e96098b36cdc7ebefe667dfed48"}, + {file = "PyYAML-6.0.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:8b9c7197f7cb2738065c481a0461e50ad02f18c78cd75775628afb4d7137fb3b"}, + {file = "PyYAML-6.0.2-cp312-cp312-win32.whl", hash = "sha256:ef6107725bd54b262d6dedcc2af448a266975032bc85ef0172c5f059da6325b4"}, + {file = "PyYAML-6.0.2-cp312-cp312-win_amd64.whl", hash = "sha256:7e7401d0de89a9a855c839bc697c079a4af81cf878373abd7dc625847d25cbd8"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:efdca5630322a10774e8e98e1af481aad470dd62c3170801852d752aa7a783ba"}, + {file = "PyYAML-6.0.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:50187695423ffe49e2deacb8cd10510bc361faac997de9efef88badc3bb9e2d1"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ffe8360bab4910ef1b9e87fb812d8bc0a308b0d0eef8c8f44e0254ab3b07133"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:17e311b6c678207928d649faa7cb0d7b4c26a0ba73d41e99c4fff6b6c3276484"}, + {file = "PyYAML-6.0.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:70b189594dbe54f75ab3a1acec5f1e3faa7e8cf2f1e08d9b561cb41b845f69d5"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:41e4e3953a79407c794916fa277a82531dd93aad34e29c2a514c2c0c5fe971cc"}, + {file = "PyYAML-6.0.2-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:68ccc6023a3400877818152ad9a1033e3db8625d899c72eacb5a668902e4d652"}, + {file = "PyYAML-6.0.2-cp313-cp313-win32.whl", hash = "sha256:bc2fa7c6b47d6bc618dd7fb02ef6fdedb1090ec036abab80d4681424b84c1183"}, + {file = "PyYAML-6.0.2-cp313-cp313-win_amd64.whl", hash = "sha256:8388ee1976c416731879ac16da0aff3f63b286ffdd57cdeb95f3f2e085687563"}, + {file = "PyYAML-6.0.2-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:24471b829b3bf607e04e88d79542a9d48bb037c2267d7927a874e6c205ca7e9a"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d7fded462629cfa4b685c5416b949ebad6cec74af5e2d42905d41e257e0869f5"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d84a1718ee396f54f3a086ea0a66d8e552b2ab2017ef8b420e92edbc841c352d"}, + {file = "PyYAML-6.0.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9056c1ecd25795207ad294bcf39f2db3d845767be0ea6e6a34d856f006006083"}, + {file = "PyYAML-6.0.2-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:82d09873e40955485746739bcb8b4586983670466c23382c19cffecbf1fd8706"}, + {file = "PyYAML-6.0.2-cp38-cp38-win32.whl", hash = "sha256:43fa96a3ca0d6b1812e01ced1044a003533c47f6ee8aca31724f78e93ccc089a"}, + {file = "PyYAML-6.0.2-cp38-cp38-win_amd64.whl", hash = "sha256:01179a4a8559ab5de078078f37e5c1a30d76bb88519906844fd7bdea1b7729ff"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:688ba32a1cffef67fd2e9398a2efebaea461578b0923624778664cc1c914db5d"}, + {file = "PyYAML-6.0.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:a8786accb172bd8afb8be14490a16625cbc387036876ab6ba70912730faf8e1f"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d8e03406cac8513435335dbab54c0d385e4a49e4945d2909a581c83647ca0290"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f753120cb8181e736c57ef7636e83f31b9c0d1722c516f7e86cf15b7aa57ff12"}, + {file = "PyYAML-6.0.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b1fdb9dc17f5a7677423d508ab4f243a726dea51fa5e70992e59a7411c89d19"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0b69e4ce7a131fe56b7e4d770c67429700908fc0752af059838b1cfb41960e4e"}, + {file = "PyYAML-6.0.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a9f8c2e67970f13b16084e04f134610fd1d374bf477b17ec1599185cf611d725"}, + {file = "PyYAML-6.0.2-cp39-cp39-win32.whl", hash = "sha256:6395c297d42274772abc367baaa79683958044e5d3835486c16da75d2a694631"}, + {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"}, + {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"}, +] + [[package]] name = "pyzmq" version = "25.1.2" @@ -2056,4 +2172,4 @@ test = ["big-O", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-it [metadata] lock-version = "2.0" python-versions = ">=3.9,<3.12" -content-hash = "5f8051e81c28b101be62887047884308f24bea749c37875794efad634931f743" +content-hash = "41b6734b9f7732010a0e692318300606886c293284a03ce913191309a49063e6" diff --git a/pyproject.toml b/pyproject.toml index f998767..6bc3ddd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,13 +30,16 @@ scipy = ">=1.10" statsmodels = "0.14.0" matplotlib = "3.5" scikit-learn = "1.3" -coverage-conditional-plugin = "^0.7.0" xgboost = "^2.0.3" pillow = "^10.3.0" hpsklearn = {git = "https://github.com/hyperopt/hyperopt-sklearn", rev = "4bc286479677a0bfd2178dac4546ea268b3f3b77"} [tool.poetry.dev-dependencies] +# Testing and linting tool nox-poetry = "*" +coverage-conditional-plugin = "^0.7.0" +# Pre-commit +pre-commit = "^3.0.0" # Testing pytest = "*" pytest-cov = "*" diff --git a/setup.cfg b/setup.cfg index 6fdf970..dd60a88 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,5 +1,5 @@ [flake8] -exclude = .nox, .pytest_cache, dist, .venv, resources, data, bin, .history, +exclude = .nox, .pytest_cache, dist, .venv, resources, data, bin, .history, max-line-length = 140 statistics = True # Which LINTING rules to ignore @@ -7,13 +7,13 @@ ignore = F401, # Imported but unused F403, # Unable to detect undefined names B028, # No explicit stacklevel keyword argument found W293, # blank line contains whitespace (I don't like this rule, interferes with good function indentation) - C419, # Unnecessary list comprehension passed to all() prevents short-circuiting - rewrite as a generator + C419, # Unnecessary list comprehension passed to all() prevents short-circuiting - rewrite as a generator E702, # Multiple statements on one line (semicolon) B023, # Function definition does not bind loop variable 'some_var_name'. -per-file-ignores = +per-file-ignores = # It poses no inconvenient to violate B006 in this file. - tests/test_ageml/test_modelling.py: B006 + tests/test_ageml/test_modelling.py: B006 [coverage:run] @@ -25,4 +25,4 @@ plugins = [coverage:coverage_conditional_plugin] rules = "is_installed('django')": has-django - "not is_installed('django')": has-no-django \ No newline at end of file + "not is_installed('django')": has-no-django diff --git a/src/ageml/__main__.py b/src/ageml/__main__.py index 0b69cd1..ed92a12 100644 --- a/src/ageml/__main__.py +++ b/src/ageml/__main__.py @@ -7,5 +7,5 @@ def main(): ageml.ui.CLI() -if __name__ == '__main__': +if __name__ == "__main__": main() diff --git a/src/ageml/commands.py b/src/ageml/commands.py index f75f1a4..ca19bd9 100644 --- a/src/ageml/commands.py +++ b/src/ageml/commands.py @@ -53,32 +53,67 @@ def configure_parser(self): """Configure parser with required arguments for processing.""" # Required arguments - self.parser.add_argument("-o", "--output", metavar="DIR", required=True, - help=messages.output_long_description,) - self.parser.add_argument("-f", "--features", metavar="FILE", required=True, - help=messages.features_long_description) - + self.parser.add_argument( + "-o", + "--output", + metavar="DIR", + required=True, + help=messages.output_long_description, + ) + self.parser.add_argument( + "-f", + "--features", + metavar="FILE", + required=True, + help=messages.features_long_description, + ) + # Parameter arguments with defaults - self.parser.add_argument("-m", "--model", nargs="*", default=["linear_reg"], - help=messages.model_long_description) - self.parser.add_argument("-s", "--scaler", nargs="*", default=["standard"], - help=messages.scaler_long_description) - self.parser.add_argument("--cv", nargs="+", type=int, default=[5, 0], - help=messages.cv_long_description) - self.parser.add_argument("-fext", "--feature_extension", nargs=1, default=['0'], - help=messages.poly_feature_extension_description) - self.parser.add_argument("-ht", "--hyperparameter_tuning", nargs=1, default=['0'], - help=messages.hyperparameter_grid_description) - + self.parser.add_argument( + "-m", + "--model", + nargs="*", + default=["linear_reg"], + help=messages.model_long_description, + ) + self.parser.add_argument( + "-s", + "--scaler", + nargs="*", + default=["standard"], + help=messages.scaler_long_description, + ) + self.parser.add_argument( + "--cv", + nargs="+", + type=int, + default=[5, 0], + help=messages.cv_long_description, + ) + self.parser.add_argument( + "-fext", + "--feature_extension", + nargs=1, + default=["0"], + help=messages.poly_feature_extension_description, + ) + self.parser.add_argument( + "-ht", + "--hyperparameter_tuning", + nargs=1, + default=["0"], + help=messages.hyperparameter_grid_description, + ) + # Optional arguments - self.parser.add_argument("--covariates", metavar="FILE", - help=messages.covar_long_description) - self.parser.add_argument("--covar_name", metavar="COVAR_NAME", - help=messages.covar_name_long_description) - self.parser.add_argument("--clinical", metavar="FILE", - help=messages.clinical_long_description) - self.parser.add_argument("--systems", metavar="FILE", - help=messages.systems_long_description) + self.parser.add_argument("--covariates", metavar="FILE", help=messages.covar_long_description) + self.parser.add_argument( + "--covar_name", + metavar="COVAR_NAME", + help=messages.covar_name_long_description, + ) + self.parser.add_argument("--clinical", metavar="FILE", help=messages.clinical_long_description) + self.parser.add_argument("--systems", metavar="FILE", help=messages.systems_long_description) def configure_args(self, args): """Configure argumens with required fromatting for modelling. @@ -105,9 +140,7 @@ def configure_args(self, args): for item in args.scaler[1:]: # Check that item has one = to split if item.count("=") != 1: - raise ValueError( - "Scaler parameters must be in the format param1=value1 param2=value2 ..." - ) + raise ValueError("Scaler parameters must be in the format param1=value1 param2=value2 ...") key, value = item.split("=") value = convert(value) scaler_params[key] = value @@ -123,9 +156,7 @@ def configure_args(self, args): for item in args.model[1:]: # Check that item has one = to split if item.count("=") != 1: - raise ValueError( - "Model parameters must be in the format param1=value1 param2=value2 ..." - ) + raise ValueError("Model parameters must be in the format param1=value1 param2=value2 ...") key, value = item.split("=") value = convert(value) model_params[key] = value @@ -135,17 +166,13 @@ def configure_args(self, args): # Set hyperparameter grid search value if len(args.hyperparameter_tuning) > 1 or not args.hyperparameter_tuning[0].isdigit(): - raise ValueError( - "Hyperparameter grid points must be a non negative integer." - ) + raise ValueError("Hyperparameter grid points must be a non negative integer.") else: args.hyperparameter_tuning = args.hyperparameter_tuning[0] args.hyperparameter_tuning = int(convert(args.hyperparameter_tuning)) # Set polynomial feature extension value if len(args.feature_extension) > 1 or not args.feature_extension[0].isdigit(): - raise ValueError( - "Polynomial feature extension degree must be a non negative integer." - ) + raise ValueError("Polynomial feature extension degree must be a non negative integer.") else: args.feature_extension = args.feature_extension[0] args.feature_extension = int(convert(args.feature_extension)) @@ -178,18 +205,31 @@ def configure_parser(self): """Configure parser with required arguments for processing.""" # Required arguments - self.parser.add_argument("-o", "--output", metavar="DIR", required=True, - help=messages.output_long_description,) - self.parser.add_argument("-a", "--ages", metavar="FILE", required=True, - help=messages.ages_long_description) - self.parser.add_argument("-f", "--factors", metavar="FILE", required=True, - help=messages.factors_long_description) - + self.parser.add_argument( + "-o", + "--output", + metavar="DIR", + required=True, + help=messages.output_long_description, + ) + self.parser.add_argument( + "-a", + "--ages", + metavar="FILE", + required=True, + help=messages.ages_long_description, + ) + self.parser.add_argument( + "-f", + "--factors", + metavar="FILE", + required=True, + help=messages.factors_long_description, + ) + # Optional arguments - self.parser.add_argument("--covariates", metavar="FILE", - help=messages.covar_long_description) - self.parser.add_argument("--clinical", metavar="FILE", - help=messages.clinical_long_description) + self.parser.add_argument("--covariates", metavar="FILE", help=messages.covar_long_description) + self.parser.add_argument("--clinical", metavar="FILE", help=messages.clinical_long_description) class ClinicalGroups(Interface): @@ -218,16 +258,29 @@ def configure_parser(self): """Configure parser with required arguments for processing.""" # Required arguments - self.parser.add_argument("-o", "--output", metavar="DIR", required=True, - help=messages.output_long_description,) - self.parser.add_argument("-a", "--ages", metavar="FILE", required=True, - help=messages.ages_long_description) - self.parser.add_argument("--clinical", metavar="FILE", required=True, - help=messages.clinical_long_description) - + self.parser.add_argument( + "-o", + "--output", + metavar="DIR", + required=True, + help=messages.output_long_description, + ) + self.parser.add_argument( + "-a", + "--ages", + metavar="FILE", + required=True, + help=messages.ages_long_description, + ) + self.parser.add_argument( + "--clinical", + metavar="FILE", + required=True, + help=messages.clinical_long_description, + ) + # Optional arguments - self.parser.add_argument("--covariates", metavar="FILE", - help=messages.covar_long_description) + self.parser.add_argument("--covariates", metavar="FILE", help=messages.covar_long_description) class ClinicalClassification(Interface): @@ -259,27 +312,60 @@ def configure_parser(self): """Configure parser with required arguments for processing.""" # Required arguments - self.parser.add_argument("-o", "--output", metavar="DIR", required=True, - help=messages.output_long_description,) - self.parser.add_argument("-a", "--ages", metavar="FILE", required=True, - help=messages.ages_long_description) - self.parser.add_argument("--clinical", metavar="FILE", required=True, - help=messages.clinical_long_description) - self.parser.add_argument("--groups", nargs=2, metavar="GROUP", required=True, - help=messages.groups_long_description) - + self.parser.add_argument( + "-o", + "--output", + metavar="DIR", + required=True, + help=messages.output_long_description, + ) + self.parser.add_argument( + "-a", + "--ages", + metavar="FILE", + required=True, + help=messages.ages_long_description, + ) + self.parser.add_argument( + "--clinical", + metavar="FILE", + required=True, + help=messages.clinical_long_description, + ) + self.parser.add_argument( + "--groups", + nargs=2, + metavar="GROUP", + required=True, + help=messages.groups_long_description, + ) + # Default argument - self.parser.add_argument("--cv", nargs="+", type=int, default=[5, 0], - help=messages.cv_long_description) - self.parser.add_argument("--thr", nargs=1, type=float, default=[0.5], - help=messages.thr_long_description) - self.parser.add_argument("--ci", nargs=1, type=float, default=[0.95], - help=messages.ci_long_description) - + self.parser.add_argument( + "--cv", + nargs="+", + type=int, + default=[5, 0], + help=messages.cv_long_description, + ) + self.parser.add_argument( + "--thr", + nargs=1, + type=float, + default=[0.5], + help=messages.thr_long_description, + ) + self.parser.add_argument( + "--ci", + nargs=1, + type=float, + default=[0.95], + help=messages.ci_long_description, + ) + # Optional arguments - self.parser.add_argument("--covariates", metavar="FILE", - help=messages.covar_long_description) - + self.parser.add_argument("--covariates", metavar="FILE", help=messages.covar_long_description) + def configure_args(self, args): """Configure argumens with required fromatting for modelling. @@ -301,7 +387,7 @@ def configure_args(self, args): args.classifier_cv_split, args.classifier_seed = args.cv else: raise ValueError("Too many values to unpack") - + # Set threshold args.classifier_thr = args.thr[0] @@ -313,6 +399,7 @@ def configure_args(self, args): # Object wrappers + def model_age(): """Run ModelAge class.""" diff --git a/src/ageml/messages.py b/src/ageml/messages.py index 5c986ac..ab87324 100644 --- a/src/ageml/messages.py +++ b/src/ageml/messages.py @@ -90,10 +90,7 @@ "column is the delta. The first row should be the header for column names." ) -groups_long_description = ( - "Clinical groups to do classification. \n" - "Two groups are required. (e.g. --groups cn ad)" -) +groups_long_description = "Clinical groups to do classification. \n" "Two groups are required. (e.g. --groups cn ad)" poly_feature_extension_description = ( @@ -107,16 +104,14 @@ "(e.g. -ht 100 / --hyperparameter_tuning 100)" ) -thr_long_description = ( - "Threshold for classification. Default: 0.5 \n" - "The threshold is used for assingning hard labels. (e.g. --thr 0.5)" -) +thr_long_description = "Threshold for classification. Default: 0.5 \n" "The threshold is used for assingning hard labels. (e.g. --thr 0.5)" -ci_long_description = ("Confidence interval for classification metrics. Default: 0.95 \n") +ci_long_description = "Confidence interval for classification metrics. Default: 0.95 \n" -read_the_documentation_message = ("\nFor more information, refer to the documentation in the ageml repository:\n" - "https://github.com/compneurobilbao/ageml/tree/main/docs\n" - ) +read_the_documentation_message = ( + "\nFor more information, refer to the documentation in the ageml repository:\n" + "https://github.com/compneurobilbao/ageml/tree/main/docs\n" +) # UI information diff --git a/src/ageml/modelling.py b/src/ageml/modelling.py index b6033ed..dd97ab6 100644 --- a/src/ageml/modelling.py +++ b/src/ageml/modelling.py @@ -73,7 +73,7 @@ class AgeML: predict_age(self, X): Predict age with fitted model. """ - + # Scaler dictionary scaler_dict = { "maxabs": MaxAbsScaler, @@ -84,15 +84,19 @@ class AgeML: "robust": RobustScaler, "standard": StandardScaler, } - scaler_hyperparameters = {'maxabs': {}, - 'minmax': {}, - 'normalizer': {}, - 'power': {'method': ['yeo-johnson', 'box-cox']}, - 'quantile': {'n_quantiles': [10, 1000], - 'output_distribution': ['normal', 'uniform']}, - 'robust': {}, - 'standard': {}} - + scaler_hyperparameters = { + "maxabs": {}, + "minmax": {}, + "normalizer": {}, + "power": {"method": ["yeo-johnson", "box-cox"]}, + "quantile": { + "n_quantiles": [10, 1000], + "output_distribution": ["normal", "uniform"], + }, + "robust": {}, + "standard": {}, + } + # Model dictionary model_dict = { "linear_reg": linear_model.LinearRegression, @@ -103,76 +107,87 @@ class AgeML: "rf": RandomForestRegressor, "hyperopt": HyperoptEstimator, } - model_hyperparameter_ranges = {'ridge': {'alpha': [-3, 3]}, - - 'lasso': {'alpha': [-3, 3]}, - - 'linear_svr': {'C': [-3, 3], - 'epsilon': [-3, 3]}, - - 'xgboost': {'eta': [-3, 3], - 'gamma': [-3, 3], - 'max_depth': [0, 100], - 'min_child_weight': [0, 100], - 'max_delta_step': [0, 100], - 'subsample': [-3, 3], - 'colsample_bytree': [0.001, 1], - 'colsample_bylevel': [0.001, 1], - 'colsample_bynode': [0.001, 1], - 'lambda': [-3, 3], - 'alpha': [-3, 3]}, - - 'rf': {'n_estimators': [1, 100], - 'max_depth': [1, 100], - 'min_samples_split': [1, 100], - 'min_samples_leaf': [1, 100], - 'max_features': [1, 100], - 'min_impurity_decrease': [0, 1], - 'max_leaf_nodes': [1, 100], - 'min_weight_fraction_leaf': [-3, 3], }} - - model_hyperparameter_types = {'ridge': {'alpha': 'log'}, - - 'lasso': {'alpha': 'log'}, - - 'linear_svr': {'C': 'log', - 'epsilon': 'log'}, - - 'xgboost': {'eta': 'float', - 'gamma': 'float', - 'max_depth': 'int', - 'min_child_weight': 'int', - 'max_delta_step': 'int', - 'subsample': 'float', - 'colsample_bytree': 'float', - 'colsample_bylevel': 'float', - 'colsample_bynode': 'float', - 'lambda': 'log', - 'alpha': 'log'}, - - 'rf': {'n_estimators': 'int', - 'max_depth': 'int', - 'min_samples_split': 'int', - 'min_samples_leaf': 'int', - 'max_features': 'int', - 'min_impurity_decrease': 'log', - 'max_leaf_nodes': 'int'}} - - def __init__(self, scaler_type, scaler_params, model_type, model_params, CV_split, seed, - hyperparameter_tuning: int = 0, feature_extension: int = 0): + model_hyperparameter_ranges = { + "ridge": {"alpha": [-3, 3]}, + "lasso": {"alpha": [-3, 3]}, + "linear_svr": {"C": [-3, 3], "epsilon": [-3, 3]}, + "xgboost": { + "eta": [-3, 3], + "gamma": [-3, 3], + "max_depth": [0, 100], + "min_child_weight": [0, 100], + "max_delta_step": [0, 100], + "subsample": [-3, 3], + "colsample_bytree": [0.001, 1], + "colsample_bylevel": [0.001, 1], + "colsample_bynode": [0.001, 1], + "lambda": [-3, 3], + "alpha": [-3, 3], + }, + "rf": { + "n_estimators": [1, 100], + "max_depth": [1, 100], + "min_samples_split": [1, 100], + "min_samples_leaf": [1, 100], + "max_features": [1, 100], + "min_impurity_decrease": [0, 1], + "max_leaf_nodes": [1, 100], + "min_weight_fraction_leaf": [-3, 3], + }, + } + + model_hyperparameter_types = { + "ridge": {"alpha": "log"}, + "lasso": {"alpha": "log"}, + "linear_svr": {"C": "log", "epsilon": "log"}, + "xgboost": { + "eta": "float", + "gamma": "float", + "max_depth": "int", + "min_child_weight": "int", + "max_delta_step": "int", + "subsample": "float", + "colsample_bytree": "float", + "colsample_bylevel": "float", + "colsample_bynode": "float", + "lambda": "log", + "alpha": "log", + }, + "rf": { + "n_estimators": "int", + "max_depth": "int", + "min_samples_split": "int", + "min_samples_leaf": "int", + "max_features": "int", + "min_impurity_decrease": "log", + "max_leaf_nodes": "int", + }, + } + + def __init__( + self, + scaler_type, + scaler_params, + model_type, + model_params, + CV_split, + seed, + hyperparameter_tuning: int = 0, + feature_extension: int = 0, + ): """Initialise variables.""" # Scaler dictionary self.scaler_type = scaler_type self.scaler_dict = AgeML.scaler_dict - + # Model dictionary self.model_type = model_type self.model_dict = AgeML.model_dict # Hyperparameters and feature extension self.hyperparameter_tuning = hyperparameter_tuning self.feature_extension = feature_extension - + # Set required modelling parts self.set_scaler(scaler_type, **scaler_params) self.set_model(model_type, **model_params) @@ -256,7 +271,7 @@ def set_pipeline(self): pipe = [] if self.model is None: raise ValueError("Must set a valid model before setting pipeline.") - + # Scaler and whether it has to be optimized if self.scaler is not None and self.model_type != "hyperopt": pipe.append(("scaler", self.scaler)) @@ -449,14 +464,15 @@ def fit_age(self, X, y): # Print comparison with mean age as only predictor to have a reference of a dummy regressor dummy_rmse = np.sqrt(np.mean((y - np.mean(y)) ** 2)) dummy_mae = np.mean(np.abs(y - np.mean(y))) - print("When using mean of ages as predictor for each subject (dummy regressor):\n" - "MAE: %.2f, RMSE: %.2f" % (dummy_mae, dummy_rmse)) + print( + "When using mean of ages as predictor for each subject (dummy regressor):\n" "MAE: %.2f, RMSE: %.2f" % (dummy_mae, dummy_rmse) + ) print("Age range: %.2f" % (np.max(y) - np.min(y))) # Fit model on all data self.pipeline.fit(X, y) y_pred = self.pipeline.predict(X) - + self.pipelineFit = True self.fit_age_bias(y, y_pred) @@ -489,9 +505,9 @@ def predict_age(self, X, y=None): class Classifier: - + """Classifier of class labels based on deltas. - + This class allows the differentiation of two groups based on differences in their deltas based on a logistic regresor. @@ -537,7 +553,7 @@ def set_model(self): """Sets the model to use in the pipeline.""" self.model = linear_model.LogisticRegression() - + def set_CV_params(self, CV_split, seed=None): """Set the parameters of the Cross Validation Scheme. @@ -548,7 +564,7 @@ def set_CV_params(self, CV_split, seed=None): self.CV_split = CV_split self.seed = seed - + def set_threshold(self, thr): """Set the threshold for classification. @@ -579,12 +595,12 @@ def fit_model(self, X, y): accs, aucs, spes, sens = [], [], [], [] y = y.ravel() y_preds = np.empty(shape=y.shape) - + kf = model_selection.KFold(n_splits=self.CV_split, shuffle=True, random_state=self.seed) for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] - + # Fit the model using the training data self.model.fit(X_train, y_train) @@ -595,7 +611,7 @@ def fit_model(self, X, y): # Calculate AUC of model auc = metrics.roc_auc_score(y_test, y_pred) aucs.append(auc) - + # Calculate relevant metrics acc = metrics.accuracy_score(y_test, y_pred > self.thr) tn, fp, fn, tp = metrics.confusion_matrix(y_test, y_pred > self.thr).ravel() @@ -612,11 +628,11 @@ def fit_model(self, X, y): ci_spes = st.t.interval(confidence=self.ci_val, df=len(spes) - 1, loc=np.mean(spes), scale=st.sem(spes)) # Print results - print('Summary metrics over all CV splits (%s CI)' % (self.ci_val)) - print('AUC: %.3f [%.3f-%.3f]' % (np.mean(aucs), ci_aucs[0], ci_aucs[1])) - print('Accuracy: %.3f [%.3f-%.3f]' % (np.mean(accs), ci_accs[0], ci_accs[1])) - print('Sensitivity: %.3f [%.3f-%.3f]' % (np.mean(sens), ci_sens[0], ci_sens[1])) - print('Specificity: %.3f [%.3f-%.3f]' % (np.mean(spes), ci_spes[0], ci_spes[1])) + print("Summary metrics over all CV splits (%s CI)" % (self.ci_val)) + print("AUC: %.3f [%.3f-%.3f]" % (np.mean(aucs), ci_aucs[0], ci_aucs[1])) + print("Accuracy: %.3f [%.3f-%.3f]" % (np.mean(accs), ci_accs[0], ci_accs[1])) + print("Sensitivity: %.3f [%.3f-%.3f]" % (np.mean(sens), ci_sens[0], ci_sens[1])) + print("Specificity: %.3f [%.3f-%.3f]" % (np.mean(spes), ci_spes[0], ci_spes[1])) # Final model trained on all data self.model.fit(X, y) @@ -625,7 +641,7 @@ def fit_model(self, X, y): self.modelFit = True return y_preds - + def predict(self, X): """Predict class labels with fitted model. diff --git a/src/ageml/processing.py b/src/ageml/processing.py index 883a29a..a98daef 100644 --- a/src/ageml/processing.py +++ b/src/ageml/processing.py @@ -45,7 +45,7 @@ def covariate_correction(X, Z, beta=None): raise ValueError("NaN entrie(s) found in X.") elif beta is not None and any(np.isnan(beta.flatten())): raise ValueError("NaN entrie(s) found in Z.") - + # Check shapes if X.shape[0] != Z.shape[0]: raise ValueError("X and Z must have the same number of rows.") diff --git a/src/ageml/ui.py b/src/ageml/ui.py index d5326f8..fddf07a 100644 --- a/src/ageml/ui.py +++ b/src/ageml/ui.py @@ -228,7 +228,7 @@ def generate_model(self): self.args.model_cv_split, self.args.model_seed, self.args.hyperparameter_tuning, - self.args.feature_extension + self.args.feature_extension, ) return model @@ -239,8 +239,9 @@ def generate_classifier(self): self.args.classifier_cv_split, self.args.classifier_seed, self.args.classifier_thr, - self.args.classifier_ci) - + self.args.classifier_ci, + ) + return classifier def update_params(self): @@ -700,8 +701,7 @@ def age_distribution(self, ages_dict: dict, name=""): for j in range(i + 1, len(labels)): t_stat, p_val = stats.ttest_ind(ages[i], ages[j]) if p_val < 0.05: - warn_message = "Age distributions %s and %s are not similar: %.2f (%.2g) " % ( - labels[i], labels[j], t_stat, p_val) + warn_message = "Age distributions %s and %s are not similar: %.2f (%.2g) " % (labels[i], labels[j], t_stat, p_val) print(warn_message) warnings.warn(warn_message, category=UserWarning) @@ -728,20 +728,28 @@ def features_vs_age(self, features_dict: dict, tag, significance: float = 0.05, # Extract features X, y, feature_names = feature_extractor(df) # Covariate correction - if self.flags["covariates"] and not self.flags['covarname']: + if self.flags["covariates"] and not self.flags["covarname"]: print("Covariate effects will be subtracted from features.") X, _ = covariate_correction(X, self.df_covariates.loc[df.index].to_numpy()) # Calculate correlation between features and age corr, order, p_values = find_correlations(X, y) # Reject null hypothesis of no correlation - reject_bon, _, _, _ = multipletests(p_values, alpha=significance, method='bonferroni') - reject_fdr, _, _, _ = multipletests(p_values, alpha=significance, method='fdr_bh') + reject_bon, _, _, _ = multipletests(p_values, alpha=significance, method="bonferroni") + reject_fdr, _, _, _ = multipletests(p_values, alpha=significance, method="fdr_bh") significant = significant_markers(reject_bon, reject_fdr) # Print results for idx, order_element in enumerate(order): - print("%d.%s %s %s: %.2f (%.2g)" % (idx + 1, label, significant[order_element], - feature_names[order_element], corr[order_element], - p_values[order_element])) + print( + "%d.%s %s %s: %.2f (%.2g)" + % ( + idx + 1, + label, + significant[order_element], + feature_names[order_element], + corr[order_element], + p_values[order_element], + ) + ) # Append all the values X_list.append(X), y_list.append(y), corr_list.append(corr), order_list.append(order), significance_list.append(significant) @@ -771,7 +779,7 @@ def model_age(self, df, model, tag): raise ValueError("Not enough controls for modelling for each CV split.") # Covariate correction - if self.flags["covariates"] and not self.flags['covarname']: + if self.flags["covariates"] and not self.flags["covarname"]: print("Covariate effects will be subtracted from features.") X, beta = covariate_correction(X, self.df_covariates.loc[df.index].to_numpy()) else: @@ -822,7 +830,7 @@ def predict_age(self, df, model, tag: NameTag, beta: np.ndarray = None,): X, y, _ = feature_extractor(df) # Covariate correction - if self.flags["covariates"] and not self.flags['covarname']: + if self.flags["covariates"] and not self.flags["covarname"]: print("Covariate effects will be subtracted from features.") X, _ = covariate_correction(X, self.df_covariates.loc[df.index].to_numpy(), beta) @@ -917,8 +925,8 @@ def factors_vs_deltas(self, dict_ages, df_factors, tag, covars=None, beta=None, corrs.append(corr) # Reject null hypothesis of no correlation - reject_bon, _, _, _ = multipletests(p_values, alpha=significance, method='bonferroni') - reject_fdr, _, _, _ = multipletests(p_values, alpha=significance, method='fdr_bh') + reject_bon, _, _, _ = multipletests(p_values, alpha=significance, method="bonferroni") + reject_fdr, _, _, _ = multipletests(p_values, alpha=significance, method="fdr_bh") significant = significant_markers(reject_bon, reject_fdr) significants.append(significant) @@ -931,7 +939,7 @@ def factors_vs_deltas(self, dict_ages, df_factors, tag, covars=None, beta=None, def deltas_by_group(self, dfs, tag, significance: float = 0.05): """Calculate summary metrics of deltas by group. - + Parameters ---------- df: list of dataframes with delta information; shape=(n,m) @@ -992,8 +1000,8 @@ def deltas_by_group(self, dfs, tag, significance: float = 0.05): conf_intervals[i, j] = [ci_low, ci_upp] # Reject null hypothesis of no correlation - reject_bon, _, _, _ = multipletests(p_vals_matrix.flatten(), alpha=significance, method='bonferroni') - reject_fdr, _, _, _ = multipletests(p_vals_matrix.flatten(), alpha=significance, method='fdr_bh') + reject_bon, _, _, _ = multipletests(p_vals_matrix.flatten(), alpha=significance, method="bonferroni") + reject_fdr, _, _, _ = multipletests(p_vals_matrix.flatten(), alpha=significance, method="fdr_bh") reject_bon = reject_bon.reshape((len(deltas), len(deltas))) reject_fdr = reject_fdr.reshape((len(deltas), len(deltas))) @@ -1065,7 +1073,12 @@ def classify(self, df1, df2, groups, tag, beta: np.ndarray = None): # Apply covariate correction if self.flags["covariates"]: - Z = np.concatenate((self.df_covariates.loc[df1.index].to_numpy(), self.df_covariates.loc[df2.index].to_numpy())) + Z = np.concatenate( + ( + self.df_covariates.loc[df1.index].to_numpy(), + self.df_covariates.loc[df2.index].to_numpy(), + ) + ) X, _ = covariate_correction(X, Z, beta) # Calculate classification @@ -1314,7 +1327,7 @@ def reset_args(self): """Reset arguments to None except output directory.""" for attr_name in vars(self.args): - if attr_name != 'output': + if attr_name != "output": setattr(self.args, attr_name, None) def command_interface(self): @@ -1362,13 +1375,13 @@ def classifier_command(self): if len(self.line) == 0: error = "Must provide two arguments or None." return error - + # Set defaults - if len(self.line) == 1 and self.line[0] == 'None': + if len(self.line) == 1 and self.line[0] == "None": self.args.classifier_thr = 0.5 self.args.classifier_ci = 0.95 return error - + # Check wether items are floats for item in self.line: try: @@ -1376,7 +1389,7 @@ def classifier_command(self): except ValueError: error = "Parameters must be floats." return error - + # Set parameters if len(self.line) == 2: self.args.classifier_thr = float(self.line[0]) @@ -1385,9 +1398,9 @@ def classifier_command(self): error = "Too many values to unpack." elif len(self.line) == 1: error = "Must provide two arguments or None." - + return error - + def classification_command(self): """Run classification.""" @@ -1409,7 +1422,7 @@ def classification_command(self): # Ask for CV parameters adn classifier parameters print("CV parameters (Default: nº splits=5 and seed=0):") - self.force_command(self.cv_command, 'classifier') + self.force_command(self.cv_command, "classifier") print("Classifier parameters (Default: thr=0.5 and ci=0.95):") self.force_command(self.classifier_command) @@ -1420,7 +1433,7 @@ def classification_command(self): except Exception as e: print(e) error = "Error running classification." - + return error def clinical_command(self): @@ -1445,7 +1458,7 @@ def clinical_command(self): except Exception as e: print(e) error = "Error running clinical analysis." - + return error def covar_command(self): @@ -1481,18 +1494,18 @@ def cv_command(self): return error # Check that first argument is model or classifier - if self.line[0] not in ['model', 'classifier']: + if self.line[0] not in ["model", "classifier"]: error = "Must provide either model or classifier flag." return error - elif self.line[0] == 'model': - arg_type = 'model' - elif self.line[0] == 'classifier': - arg_type = 'classifier' + elif self.line[0] == "model": + arg_type = "model" + elif self.line[0] == "classifier": + arg_type = "classifier" # Set default values - if len(self.line) == 2 and self.line[1] == 'None': - setattr(self.args, arg_type + '_cv_split', 5) - setattr(self.args, arg_type + '_seed', 0) + if len(self.line) == 2 and self.line[1] == "None": + setattr(self.args, arg_type + "_cv_split", 5) + setattr(self.args, arg_type + "_seed", 0) return error # Check wether items are integers @@ -1503,11 +1516,11 @@ def cv_command(self): # Set CV parameters if len(self.line) == 2: - setattr(self.args, arg_type + '_cv_split', int(self.line[1])) - setattr(self.args, arg_type + '_seed', 0) + setattr(self.args, arg_type + "_cv_split", int(self.line[1])) + setattr(self.args, arg_type + "_seed", 0) elif len(self.line) == 3: - setattr(self.args, arg_type + '_cv_split', int(self.line[1])) - setattr(self.args, arg_type + '_seed', int(self.line[2])) + setattr(self.args, arg_type + "_cv_split", int(self.line[1])) + setattr(self.args, arg_type + "_seed", int(self.line[2])) else: error = "Too many values to unpack." @@ -1535,7 +1548,7 @@ def factor_correlation_command(self): except Exception as e: print(e) error = "Error running factor correlation analysis." - + return error def group_command(self): @@ -1629,7 +1642,7 @@ def model_age_command(self): """Run age modelling.""" error = None - + # Ask for input files print("Input features file path (Required):") self.force_command(self.load_command, "--features", required=True) @@ -1653,7 +1666,7 @@ def model_age_command(self): self.force_command(self.model_command) print("CV parameters (Default: nº splits=5 and seed=0):") print("Example: 10 0") - self.force_command(self.cv_command, 'model') + self.force_command(self.cv_command, "model") print("Polynomial feature extension degree. Leave blank if not desired (Default: 0, max. 3)") print("Example: 3") self.force_command(self.feature_extension_command) @@ -1668,7 +1681,7 @@ def model_age_command(self): except Exception as e: print(e) error = "Error running age modelling." - + return error def model_command(self): @@ -1804,7 +1817,7 @@ def feature_extension_command(self): if len(self.line) == 0 or self.line[0] == "None": self.args.feature_extension = 0 return error - + # Check whether items are integers if not self.line[0].isdigit(): error = "The polynomial feature extension degree must be an integer (0, 1, 2, or 3)" @@ -1830,7 +1843,7 @@ def hyperparameter_grid_command(self): if len(self.line) == 0 or self.line[0] == "None": self.args.hyperparameter_tuning = 0 return error - + # Check whether items are integers if not self.line[0].isdigit(): error = "The number of points in the hyperparameter grid must be a positive, nonzero integer." diff --git a/tests/test_ageml/test_commands.py b/tests/test_ageml/test_commands.py index abb58b4..18cad05 100644 --- a/tests/test_ageml/test_commands.py +++ b/tests/test_ageml/test_commands.py @@ -6,7 +6,12 @@ import sys import tempfile -from ageml.commands import model_age, factor_correlation, clinical_groups, clinical_classify +from ageml.commands import ( + model_age, + factor_correlation, + clinical_groups, + clinical_classify, +) # Fake data for testing @@ -14,14 +19,94 @@ def features(): df = pd.DataFrame( { - "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], - "age": [50, 55, 60, 65, 70, 75, 80, 85, 90, 57, - 53, 57, 61, 65, 69, 73, 77, 81, 85, 89], - "feature1": [1.3, 2.2, 3.9, 4.1, 5.7, 6.4, 7.5, 8.2, 9.4, 1.7, - 1.4, 2.2, 3.8, 4.5, 5.4, 6.2, 7.8, 8.2, 9.2, 2.6], - "feature2": [9.4, 8.2, 7.5, 6.4, 5.3, 4.1, 3.9, 2.2, 1.3, 9.4, - 9.3, 8.1, 7.9, 6.5, 5.0, 4.0, 3.7, 2.1, 1.4, 8.3], + "id": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + ], + "age": [ + 50, + 55, + 60, + 65, + 70, + 75, + 80, + 85, + 90, + 57, + 53, + 57, + 61, + 65, + 69, + 73, + 77, + 81, + 85, + 89, + ], + "feature1": [ + 1.3, + 2.2, + 3.9, + 4.1, + 5.7, + 6.4, + 7.5, + 8.2, + 9.4, + 1.7, + 1.4, + 2.2, + 3.8, + 4.5, + 5.4, + 6.2, + 7.8, + 8.2, + 9.2, + 2.6, + ], + "feature2": [ + 9.4, + 8.2, + 7.5, + 6.4, + 5.3, + 4.1, + 3.9, + 2.2, + 1.3, + 9.4, + 9.3, + 8.1, + 7.9, + 6.5, + 5.0, + 4.0, + 3.7, + 2.1, + 1.4, + 8.3, + ], } ) df.set_index("id", inplace=True) @@ -32,12 +117,72 @@ def features(): def factors(): df = pd.DataFrame( { - "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], - "factor1": [1.3, 2.2, 3.9, 4.1, 5.7, 6.4, 7.5, 8.2, 9.4, 1.3, - 1.3, 2.2, 3.9, 4.1, 5.7, 6.4, 7.5, 8.2, 9.4, 2.2], - "factor2": [0.1, 1.3, 2.2, 3.9, 4.1, 5.7, 6.4, 7.5, 8.2, 9.4, - 4.7, 3.7, 2.3, 1.2, 0.9, 0.3, 0.2, 0.1, 0.1, 0.1], + "id": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + ], + "factor1": [ + 1.3, + 2.2, + 3.9, + 4.1, + 5.7, + 6.4, + 7.5, + 8.2, + 9.4, + 1.3, + 1.3, + 2.2, + 3.9, + 4.1, + 5.7, + 6.4, + 7.5, + 8.2, + 9.4, + 2.2, + ], + "factor2": [ + 0.1, + 1.3, + 2.2, + 3.9, + 4.1, + 5.7, + 6.4, + 7.5, + 8.2, + 9.4, + 4.7, + 3.7, + 2.3, + 1.2, + 0.9, + 0.3, + 0.2, + 0.1, + 0.1, + 0.1, + ], } ) df.set_index("id", inplace=True) @@ -102,13 +247,22 @@ def test_model_age(temp_dir, features): features_data_path = create_csv(features, temp_dir.name) # Create systems arguments - sys.argv = ["", - "-o", temp_dir.name, - "-f", features_data_path, - "-m", "linear_reg", "fit_intercept=True", - "-s", "standard", - "--cv", "5", "0"] - + sys.argv = [ + "", + "-o", + temp_dir.name, + "-f", + features_data_path, + "-m", + "linear_reg", + "fit_intercept=True", + "-s", + "standard", + "--cv", + "5", + "0", + ] + # Run function model_age() @@ -121,11 +275,8 @@ def test_factor_correlation(temp_dir, ages, factors): factors_data_path = create_csv(factors, temp_dir.name) # Create systems arguments - sys.argv = ["", - "-o", temp_dir.name, - "-a", age_data_path, - "-f", factors_data_path] - + sys.argv = ["", "-o", temp_dir.name, "-a", age_data_path, "-f", factors_data_path] + # Run function factor_correlation() @@ -138,11 +289,16 @@ def test_clinical_groups(temp_dir, ages, clinical): clinical_data_path = create_csv(clinical, temp_dir.name) # Create systems arguments - sys.argv = ["", - "-o", temp_dir.name, - "-a", age_data_path, - "--clinical", clinical_data_path] - + sys.argv = [ + "", + "-o", + temp_dir.name, + "-a", + age_data_path, + "--clinical", + clinical_data_path, + ] + # Run function clinical_groups() @@ -155,11 +311,18 @@ def test_clinical_classify(temp_dir, ages, clinical): clinical_data_path = create_csv(clinical, temp_dir.name) # Create systems arguments - sys.argv = ["", - "-o", temp_dir.name, - "-a", age_data_path, - "--clinical", clinical_data_path, - "--groups", "CN", "group1"] - + sys.argv = [ + "", + "-o", + temp_dir.name, + "-a", + age_data_path, + "--clinical", + clinical_data_path, + "--groups", + "CN", + "group1", + ] + # Run function clinical_classify() diff --git a/tests/test_ageml/test_modelling.py b/tests/test_ageml/test_modelling.py index 735a298..803e66e 100644 --- a/tests/test_ageml/test_modelling.py +++ b/tests/test_ageml/test_modelling.py @@ -1,5 +1,4 @@ import pytest -import os import numpy as np import ageml.modelling as modelling diff --git a/tests/test_ageml/test_processing.py b/tests/test_ageml/test_processing.py index e52bc77..42972ac 100644 --- a/tests/test_ageml/test_processing.py +++ b/tests/test_ageml/test_processing.py @@ -11,16 +11,27 @@ def test_find_correlations(): corrs_expected = np.array([1, 1, -1]) order_expected = np.array([2, 1, 0]) p_values_expected = np.array([0.0, 0.0, 0.0]) - + assert np.allclose(corrs, corrs_expected, rtol=1e-10) is True assert np.array_equal(order, order_expected) is True assert np.allclose(p_values, p_values_expected, atol=1e-7) is True -@pytest.mark.parametrize('X, Y, exception_msg', [ - (np.array([[2, 4, np.nan], [4, 8, -12], [6, 12, -18], [8, 16, -24]]), np.array([1, 2, 3, 4]), "NaN entrie(s) found in X."), - (np.array([[2, 4, -6], [4, 8, -12], [6, 12, -18], [8, 16, -24]]), np.array([1, 2, 3, np.nan]), "NaN entrie(s) found in Y.") -]) +@pytest.mark.parametrize( + "X, Y, exception_msg", + [ + ( + np.array([[2, 4, np.nan], [4, 8, -12], [6, 12, -18], [8, 16, -24]]), + np.array([1, 2, 3, 4]), + "NaN entrie(s) found in X.", + ), + ( + np.array([[2, 4, -6], [4, 8, -12], [6, 12, -18], [8, 16, -24]]), + np.array([1, 2, 3, np.nan]), + "NaN entrie(s) found in Y.", + ), + ], +) def test_find_correlations_nans(X, Y, exception_msg): with pytest.raises(ValueError) as exc_info: processing.find_correlations(X, Y) @@ -29,7 +40,6 @@ def test_find_correlations_nans(X, Y, exception_msg): def test_covariate_correction(): - # Variables for testing X = np.array([[2, 4, -6], [4, 8, -12], [6, 12, -18]]) Z = np.array([1, 2, 3]).reshape(-1, 1) diff --git a/tests/test_ageml/test_ui.py b/tests/test_ageml/test_ui.py index e916fe7..e32d7f9 100644 --- a/tests/test_ageml/test_ui.py +++ b/tests/test_ageml/test_ui.py @@ -42,14 +42,94 @@ def __init__(self): def features(): df = pd.DataFrame( { - "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], - "age": [50, 55, 60, 65, 70, 75, 80, 85, 90, 57, - 53, 57, 61, 65, 69, 73, 77, 81, 85, 89], - "feature1": [1.3, 2.2, 3.9, 4.1, 5.7, 6.4, 7.5, 8.2, 9.4, 1.7, - 1.4, 2.2, 3.8, 4.5, 5.4, 6.2, 7.8, 8.2, 9.2, 2.6], - "feature2": [9.4, 8.2, 7.5, 6.4, 5.3, 4.1, 3.9, 2.2, 1.3, 9.4, - 9.3, 8.1, 7.9, 6.5, 5.0, 4.0, 3.7, 2.1, 1.4, 8.3], + "id": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + ], + "age": [ + 50, + 55, + 60, + 65, + 70, + 75, + 80, + 85, + 90, + 57, + 53, + 57, + 61, + 65, + 69, + 73, + 77, + 81, + 85, + 89, + ], + "feature1": [ + 1.3, + 2.2, + 3.9, + 4.1, + 5.7, + 6.4, + 7.5, + 8.2, + 9.4, + 1.7, + 1.4, + 2.2, + 3.8, + 4.5, + 5.4, + 6.2, + 7.8, + 8.2, + 9.2, + 2.6, + ], + "feature2": [ + 9.4, + 8.2, + 7.5, + 6.4, + 5.3, + 4.1, + 3.9, + 2.2, + 1.3, + 9.4, + 9.3, + 8.1, + 7.9, + 6.5, + 5.0, + 4.0, + 3.7, + 2.1, + 1.4, + 8.3, + ], } ) df.set_index("id", inplace=True) @@ -60,12 +140,72 @@ def features(): def factors(): df = pd.DataFrame( { - "id": [1, 2, 3, 4, 5, 6, 7, 8, 9, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20], - "factor1": [1.3, 2.2, 3.9, 4.1, 5.7, 6.4, 7.5, 8.2, 9.4, 1.3, - 1.3, 2.2, 3.9, 4.1, 5.7, 6.4, 7.5, 8.2, 9.4, 2.2], - "factor2": [0.1, 1.3, 2.2, 3.9, 4.1, 5.7, 6.4, 7.5, 8.2, 9.4, - 4.7, 3.7, 2.3, 1.2, 0.9, 0.3, 0.2, 0.1, 0.1, 0.1], + "id": [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + ], + "factor1": [ + 1.3, + 2.2, + 3.9, + 4.1, + 5.7, + 6.4, + 7.5, + 8.2, + 9.4, + 1.3, + 1.3, + 2.2, + 3.9, + 4.1, + 5.7, + 6.4, + 7.5, + 8.2, + 9.4, + 2.2, + ], + "factor2": [ + 0.1, + 1.3, + 2.2, + 3.9, + 4.1, + 5.7, + 6.4, + 7.5, + 8.2, + 9.4, + 4.7, + 3.7, + 2.3, + 1.2, + 0.9, + 0.3, + 0.2, + 0.1, + 0.1, + 0.1, + ], } ) df.set_index("id", inplace=True) @@ -88,7 +228,7 @@ def covariates(): @pytest.fixture def systems(): - return 'pottongosystem:feature1\nmondongsystem:feature2' + return "pottongosystem:feature1\nmondongsystem:feature2" @pytest.fixture @@ -167,7 +307,7 @@ def create_txt(txt, path): letters = string.ascii_lowercase txt_name = "".join(random.choice(letters) for i in range(20)) + ".txt" file_path = os.path.join(path, txt_name) - with open(file_path, 'w') as f: + with open(file_path, "w") as f: f.write(txt) return file_path @@ -226,7 +366,7 @@ def test_interface_setup(dummy_interface): def test_load_csv(dummy_interface, features): features_path = create_csv(features, dummy_interface.dir_path) dummy_interface.args.features = features_path - data = dummy_interface.load_csv('features') + data = dummy_interface.load_csv("features") # Check that the data is a pandas dataframe assert isinstance(data, pd.core.frame.DataFrame) @@ -444,7 +584,10 @@ def test_load_data_different_indexes_warning(dummy_interface, features, clinical expected = "Subjects in dataframe features not in dataframe clinical: [%d]" % (4) assert warn_record.list[0].message.args[0] == expected assert isinstance(warn_record.list[1].message, UserWarning) - expected = "Subjects in dataframe clinical not in dataframe features: [%d, %d]" % (2, 3) + expected = "Subjects in dataframe clinical not in dataframe features: [%d, %d]" % ( + 2, + 3, + ) assert warn_record.list[1].message.args[0] == expected @@ -548,7 +691,7 @@ def test_run_age_cov(dummy_interface, features, covariates): dummy_interface.args.covar_name = "sex" # Run the modelling pipeline dummy_interface.run_age() - + # Check for output dir assert os.path.exists(dummy_interface.dir_path) @@ -562,7 +705,7 @@ def test_run_age_cov(dummy_interface, features, covariates): # Print files in path svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) - + # Check for the existence of the output CSV csv_path = os.path.join(dummy_interface.dir_path, f"model_age/predicted_age_{dummy_interface.args.covar_name}.csv") @@ -587,7 +730,7 @@ def test_run_age_cov_clinical(dummy_interface, features, covariates, clinical): dummy_interface.args.clinical = clinical_path # Run the modelling pipeline dummy_interface.run_age() - + # Check for output dir assert os.path.exists(dummy_interface.dir_path) @@ -600,7 +743,7 @@ def test_run_age_cov_clinical(dummy_interface, features, covariates, clinical): "features_vs_age_controls_all"] svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) - + # Check for the existence of the output CSV csv_path = os.path.join(dummy_interface.dir_path, f"model_age/predicted_age_{dummy_interface.args.covar_name}.csv") @@ -620,10 +763,10 @@ def test_run_age_systems(dummy_interface, systems, features): dummy_interface.args.systems = systems_path # Run the modelling pipeline dummy_interface.run_age() - + # Check for output dir assert os.path.exists(dummy_interface.dir_path) - + # Systems names system_names = list(dummy_interface.dict_systems.keys()) figs = ["age_distribution_controls"] @@ -634,7 +777,7 @@ def test_run_age_systems(dummy_interface, systems, features): # Check existance of figures svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) - + # Check existence of output CSV csv_path = os.path.join(dummy_interface.dir_path, "model_age/predicted_age_multisystem.csv") @@ -657,10 +800,10 @@ def test_run_age_systems_clinical(dummy_interface, systems, features, clinical): dummy_interface.args.clinical = clinical_path # Run the modelling pipeline dummy_interface.run_age() - + # Check for output dir assert os.path.exists(dummy_interface.dir_path) - + # Systems names system_names = list(dummy_interface.dict_systems.keys()) figs = ["age_distribution_controls"] @@ -671,7 +814,7 @@ def test_run_age_systems_clinical(dummy_interface, systems, features, clinical): # Check existance of figures svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) - + # Check existence of output CSV csv_path = os.path.join(dummy_interface.dir_path, "model_age/predicted_age_multisystem.csv") @@ -696,10 +839,10 @@ def test_run_age_cov_and_systems(dummy_interface, systems, features, covariates) dummy_interface.args.systems = systems_path # Run the modelling pipeline dummy_interface.run_age() - + # Check for output dir assert os.path.exists(dummy_interface.dir_path) - + # Systems names system_names = list(dummy_interface.dict_systems.keys()) figs = ["age_distribution_controls"] @@ -712,7 +855,7 @@ def test_run_age_cov_and_systems(dummy_interface, systems, features, covariates) # Check existance of figures svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) - + # Check existence of output CSV csv_path = os.path.join(dummy_interface.dir_path, f"model_age/predicted_age_{dummy_interface.args.covar_name}_multisystem.csv") @@ -740,10 +883,10 @@ def test_run_age_cov_and_systems_clinical(dummy_interface, systems, features, co dummy_interface.args.clinical = clinical_path # Run the modelling pipeline dummy_interface.run_age() - + # Check for output dir assert os.path.exists(dummy_interface.dir_path) - + # Systems names system_names = list(dummy_interface.dict_systems.keys()) figs = ["age_distribution_controls"] @@ -756,7 +899,7 @@ def test_run_age_cov_and_systems_clinical(dummy_interface, systems, features, co # Check existance of figures svg_paths = [os.path.join(dummy_interface.dir_path, f"model_age/figures/{fig}.png") for fig in figs] assert all([os.path.exists(svg_path) for svg_path in svg_paths]) - + # Check existence of output CSV csv_path = os.path.join(dummy_interface.dir_path, f"model_age/predicted_age_{dummy_interface.args.covar_name}_multisystem.csv") @@ -880,8 +1023,8 @@ def test_run_classification(dummy_interface, ages, clinical): # Run the classification pipeline ages_path = create_csv(ages, dummy_interface.dir_path) clinical_path = create_csv(clinical, dummy_interface.dir_path) - dummy_interface.args.group1 = 'cn' - dummy_interface.args.group2 = 'group1' + dummy_interface.args.group1 = "cn" + dummy_interface.args.group2 = "group1" dummy_interface.args.ages = ages_path dummy_interface.args.clinical = clinical_path dummy_interface.run_classification() @@ -905,8 +1048,8 @@ def test_run_classification_systems(dummy_interface, ages_multisystem, clinical) # Run the classification pipeline ages_path = create_csv(ages_multisystem, dummy_interface.dir_path) clinical_path = create_csv(clinical, dummy_interface.dir_path) - dummy_interface.args.group1 = 'cn' - dummy_interface.args.group2 = 'group1' + dummy_interface.args.group1 = "cn" + dummy_interface.args.group2 = "group1" dummy_interface.args.ages = ages_path dummy_interface.args.clinical = clinical_path dummy_interface.run_classification() @@ -930,7 +1073,6 @@ def test_run_classification_systems(dummy_interface, ages_multisystem, clinical) def test_classification_group_not_given(dummy_interface, ages, clinical): - # Run create classification pipeline with no groups ages_path = create_csv(ages, dummy_interface.dir_path) clinical_path = create_csv(clinical, dummy_interface.dir_path) @@ -950,14 +1092,14 @@ def test_classifcation_group_not_in_columns(dummy_interface, ages, clinical): clinical_path = create_csv(clinical, dummy_interface.dir_path) dummy_interface.args.ages = ages_path dummy_interface.args.clinical = clinical_path - dummy_interface.args.group1 = 'cn' - dummy_interface.args.group2 = 'group3' - + dummy_interface.args.group1 = "cn" + dummy_interface.args.group2 = "group3" + # Run classification and capture error with pytest.raises(ValueError) as exc_info: dummy_interface.run_classification() assert exc_info.type == ValueError - error_msg = "Classes must be one of the following: ['%s', '%s']" % ('cn', 'group1') + error_msg = "Classes must be one of the following: ['%s', '%s']" % ("cn", "group1") assert exc_info.value.args[0] == error_msg @@ -1014,9 +1156,7 @@ def test_force_command_CLI(dummy_cli, monkeypatch): # Test when no input is given and not required monkeypatch.setattr("builtins.input", lambda _: "") - error = dummy_cli.force_command( - dummy_cli.load_command, "--systems", required=False - ) + error = dummy_cli.force_command(dummy_cli.load_command, "--systems", required=False) assert error is None assert dummy_cli.line == ["--systems", "None"] @@ -1091,14 +1231,32 @@ def test_classification_command_CLI(dummy_cli, ages, clinical, monkeypatch, caps clinical_path = create_csv(clinical, tempDir.name) # Test command - responses = ["classification", ages_path, clinical_path, "cn group1", "", "", "", "q"] + responses = [ + "classification", + ages_path, + clinical_path, + "cn group1", + "", + "", + "", + "q", + ] monkeypatch.setattr("builtins.input", lambda _: responses.pop(0)) dummy_cli.command_interface() captured = capsys.readouterr().out.split("\n")[:-1] - assert captured[-1] == 'Finished classification.' + assert captured[-1] == "Finished classification." # Test command with invalid input like incorrect groups - responses = ["classification", ages_path, clinical_path, "cn group2", "", "", "", "q"] + responses = [ + "classification", + ages_path, + clinical_path, + "cn group2", + "", + "", + "", + "q", + ] monkeypatch.setattr("builtins.input", lambda _: responses.pop(0)) dummy_cli.command_interface() captured = capsys.readouterr().out.split("\n")[:-1] @@ -1124,7 +1282,7 @@ def test_clinical_command_CLI(dummy_cli, ages, clinical, monkeypatch, capsys): monkeypatch.setattr("builtins.input", lambda _: responses.pop(0)) dummy_cli.command_interface() captured = capsys.readouterr().out.split("\n")[:-1] - assert captured[-1] == 'Finished clinical analysis.' + assert captured[-1] == "Finished clinical analysis." # Test command with invalid input like incorrect file responses = ["clinical", ages_path, ages_path, "", "q"] @@ -1224,7 +1382,7 @@ def test_factor_correlation_command_CLI(dummy_cli, ages, factors, monkeypatch, c dummy_cli.command_interface() captured = capsys.readouterr().out.split("\n")[:-1] print(captured) - assert captured[-1] == 'Finished factor correlation analysis.' + assert captured[-1] == "Finished factor correlation analysis." # Test command with invalid input like incorrect file responses = ["factor_correlation", ages_path, empty_path, "", "", "q"] @@ -1330,18 +1488,46 @@ def test_model_age_command_CLI(dummy_cli, features, monkeypatch, capsys): monkeypatch.setattr("builtins.input", lambda _: responses.pop(0)) dummy_cli.command_interface() captured = capsys.readouterr().out.split("\n")[:-1] - assert captured[-1] == 'Finished running age modelling.' + assert captured[-1] == "Finished running age modelling." # Test command with invalid input like incorrect model parameters - responses = ["model_age", features_path, "", "", "", "", "", "linear_reg fitIntercept=True", "", "", "", "", "q"] + responses = [ + "model_age", + features_path, + "", + "", + "", + "", + "", + "linear_reg fitIntercept=True", + "", + "", + "", + "", + "q", + ] monkeypatch.setattr("builtins.input", lambda _: responses.pop(0)) dummy_cli.command_interface() captured = capsys.readouterr().out.split("\n")[:-1] print(captured) assert "Model parameters are not valid for linear_reg model. Check them in the sklearn documentation." in captured - + # Test command with hyperparameter optimization and feature_extension - responses = ["model_age", features_path, "", "", "", "", "", "linear_svr", "", "2", "3", "", "q"] + responses = [ + "model_age", + features_path, + "", + "", + "", + "", + "", + "linear_svr", + "", + "2", + "3", + "", + "q", + ] monkeypatch.setattr("builtins.input", lambda _: responses.pop(0)) dummy_cli.command_interface() captured = capsys.readouterr().out.split("\n")[:-1] @@ -1392,7 +1578,7 @@ def test_model_command_CLI(dummy_cli): assert error is None assert dummy_cli.args.model_type == "linear_reg" assert dummy_cli.args.model_params == {"fit_intercept": True} - + # Test passing correctly formated, but invalid sklearn model params dummy_cli.line = "linear_reg my_super_fake_intercept=True" error = dummy_cli.model_command() @@ -1472,12 +1658,12 @@ def test_scaler_command_CLI(dummy_cli): assert error is None assert dummy_cli.args.scaler_type == "standard" assert dummy_cli.args.scaler_params == {"with_mean": 0} - + # Test passing correctly formated, but invalid sklearn scaler params dummy_cli.line = "standard my_super_fake_mean=0" error = dummy_cli.scaler_command() assert error == "Scaler parameters are not valid for standard scaler. Check them in the sklearn documentation." - + # Test passing correctly formated, but invalid sklearn scaler params in another type of scaler dummy_cli.line = "minmax my_super_fake_mean=0" error = dummy_cli.scaler_command() @@ -1487,18 +1673,18 @@ def test_scaler_command_CLI(dummy_cli): def test_hyperparameter_tuning_CLI(dummy_cli): dummy_cli.args.model_type = "linear_svr" dummy_cli.args.model_params = {"C": 1, "epsilon": 0.1} - + # Test no hyperparameters dummy_cli.line = "" error = dummy_cli.hyperparameter_grid_command() assert error is None assert dummy_cli.args.hyperparameter_tuning == 0 - + # Test passing too many arguments dummy_cli.line = "1 2 3" error = dummy_cli.hyperparameter_grid_command() assert error == "Must provide only one integer, or none." - + # Test passing non integer arguments dummy_cli.line = "1.5" error = dummy_cli.hyperparameter_grid_command() @@ -1514,12 +1700,12 @@ def test_feature_extension_CLI(dummy_cli): error = dummy_cli.feature_extension_command() assert error is None assert dummy_cli.args.feature_extension == 0 - + # Test passing too many arguments dummy_cli.line = "1 2 3" error = dummy_cli.feature_extension_command() assert error == "Must provide only one integer, or none." - + # Test passing non integer arguments dummy_cli.line = "1.5" error = dummy_cli.feature_extension_command() @@ -1527,7 +1713,7 @@ def test_feature_extension_CLI(dummy_cli): dummy_cli.line = "mondong" error = dummy_cli.feature_extension_command() assert error == "The polynomial feature extension degree must be an integer (0, 1, 2, or 3)" - + # Test with a correct argument dummy_cli.line = "2" error = dummy_cli.feature_extension_command() diff --git a/tests/test_ageml/test_visualizer.py b/tests/test_ageml/test_visualizer.py index 6880a9b..09eae45 100644 --- a/tests/test_ageml/test_visualizer.py +++ b/tests/test_ageml/test_visualizer.py @@ -1,12 +1,8 @@ import pytest import os import shutil -import pandas as pd from statsmodels.stats.multitest import multipletests -import ageml.modelling as modelling -import ageml.ui as ui -import ageml.utils as utils from ageml.utils import significant_markers, NameTag import ageml.visualizer as viz from ageml.datasets import SyntheticData @@ -58,12 +54,11 @@ def test_features_vs_age(dummy_viz, np_test_data): X, Y = np_test_data[:, :3], np_test_data[:, -1] corr, order, p_values = find_correlations(X, Y) # Reject null hypothesis of no correlation - reject_bon, _, _, _ = multipletests(p_values, alpha=0.05, method='bonferroni') - reject_fdr, _, _, _ = multipletests(p_values, alpha=0.05, method='fdr_bh') + reject_bon, _, _, _ = multipletests(p_values, alpha=0.05, method="bonferroni") + reject_fdr, _, _, _ = multipletests(p_values, alpha=0.05, method="fdr_bh") significant = significant_markers(reject_bon, reject_fdr) - dummy_viz.features_vs_age([X], [Y], [corr], [order], [significant], ["X1", "X2", "X3"], tag=NameTag(), - labels=["all"]) - + dummy_viz.features_vs_age([X], [Y], [corr], [order], [significant], ["X1", "X2", "X3"], tag=NameTag(), labels=["all"]) + # Check file existence svg_path = os.path.join(dummy_viz.dir, "figures/features_vs_age_controls.png") assert os.path.exists(svg_path) @@ -104,7 +99,7 @@ def test_factors_vs_deltas(dummy_viz): corrs = [[0.5, 0.6, 0.7, 0.8, 0.9]] groups = ["Group 1"] labels = ["factor1", "factor2", "factor3", "factor4", "factor5"] - markers = [['', '*', '', '*', '**']] + markers = [["", "*", "", "*", "**"]] # Plot dummy_viz.factors_vs_deltas(corrs, groups, labels, markers, tag=NameTag()) # Check file existence