Skip to content

Commit

Permalink
[ENH] Fix failing unit tests (#109)
Browse files Browse the repository at this point in the history
* Brings in monotonic constraints to decision trees
* Brings in missing-value support for decision trees and random forests
* Brings up to speed as of 08/16/23 scikit-learn:main

---------

Signed-off-by: Adam Li <adam2392@gmail.com>
  • Loading branch information
adam2392 committed Aug 16, 2023
1 parent b44c951 commit 3b124e3
Show file tree
Hide file tree
Showing 80 changed files with 1,127 additions and 894 deletions.
36 changes: 16 additions & 20 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -1,13 +1,6 @@
# See: https://circleci.com/blog/deploying-documentation-to-github-pages-with-continuous-integration/
version: 2.1

# Aliases to reuse
_defaults: &defaults
docker:
# CircleCI maintains a library of pre-built images
# documented at https://circleci.com/docs/2.0/circleci-images/
- image: cimg/python:3.9

# document commands used by downstream jobs
commands:
check-skip:
Expand Down Expand Up @@ -70,7 +63,10 @@ commands:
jobs:
# Build scikit-tree from source
build_scikit_tree:
<<: *defaults
docker:
# CircleCI maintains a library of pre-built images
# documented at https://circleci.com/doc/2.0/circleci-images/
- image: cimg/python:3.9
steps:
- checkout
- check-skip
Expand Down Expand Up @@ -110,7 +106,10 @@ jobs:
- .

build_docs:
<<: *defaults
docker:
# CircleCI maintains a library of pre-built images
# documented at https://circleci.com/doc/2.0/circleci-images/
- image: cimg/python:3.9
steps:
- attach_workspace:
at: ~/
Expand All @@ -131,15 +130,15 @@ jobs:
python ./spin docs
- store_artifacts:
path: docs/_build/html
path: doc/_build/html
destination: dev

- store_artifacts:
path: docs/_build/html_stable/
path: doc/_build/html_stable/
destination: stable

- persist_to_workspace:
root: docs/_build
root: doc/_build
paths:
- html
- html_stable
Expand All @@ -151,7 +150,7 @@ jobs:
- checkout

- attach_workspace:
at: docs/_build
at: doc/_build

- restore_cache:
keys:
Expand All @@ -174,10 +173,10 @@ jobs:
command: |
if [ "${CIRCLE_BRANCH}" == "main" ]; then
echo "Deploying dev docs for ${CIRCLE_BRANCH}.";
gh-pages --dotfiles --message "docs updates [skip ci] (${CIRCLE_BUILD_NUM})" --dist docs/_build/html --dest ./dev
gh-pages --dotfiles --message "docs updates [skip ci] (${CIRCLE_BUILD_NUM})" --dist doc/_build/html --dest ./dev
else
echo "Deploying stable docs for ${CIRCLE_BRANCH}.";
gh-pages --dotfiles --message "docs updates [skip ci] (${CIRCLE_BUILD_NUM})" --dist docs/_build/html --dest ./stable
gh-pages --dotfiles --message "docs updates [skip ci] (${CIRCLE_BUILD_NUM})" --dist doc/_build/html --dest ./stable
fi;
- save_cache:
Expand All @@ -186,16 +185,13 @@ jobs:
- ~/sktree

workflows:
default:
build-docs:
jobs:
- build_scikit_tree:
name: build_scikit_tree
- build_scikit_tree
- build_docs:
name: build_docs
requires:
- build_scikit_tree
- docs-deploy:
name: docs-deploy
requires:
- build_docs
filters:
Expand Down
3 changes: 2 additions & 1 deletion .codespellignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
raison
nd
parth
parth
ot
6 changes: 3 additions & 3 deletions .flake8
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@ exclude =
.pytest_cache
.circleci
paper
docs/_build
docs/generated
docs/auto_examples
doc/_build
doc/generated
doc/auto_examples
validation
build
build-install
Expand Down
18 changes: 10 additions & 8 deletions .github/workflows/circle_artifacts.yml
Original file line number Diff line number Diff line change
@@ -1,25 +1,27 @@
name: CircleCI artifacts redirector
on: [status]

permissions: read-all

# Restrict the permissions granted to the use of secrets.GITHUB_TOKEN in this
# github actions workflow:
# https://docs.github.com/en/actions/security-guides/automatic-token-authentication
permissions:
statuses: write

jobs:
circleci_artifacts_redirector_job:
runs-on: ubuntu-latest
if: "${{ github.event.context == 'ci/circleci: build_docs' }}"
permissions:
statuses: write
runs-on: ubuntu-20.04
if: "github.repository == 'neurodata/scikit-tree' && github.event.context == 'ci/circleci: build_docs'"
name: Run CircleCI artifacts redirector
steps:
- name: GitHub Action step
id: step1
uses: larsoner/circleci-artifacts-redirector-action@master
with:
api-token: ${{ secrets.CIRCLECI_TOKEN }}
repo-token: ${{ secrets.GITHUB_TOKEN }}
api-token: ${{ secrets.CIRCLE_TOKEN }}
artifact-path: 0/dev/index.html
circleci-jobs: build_docs
job-title: Check the rendered docs here!

- name: Check the URL
if: github.event.status != 'pending'
run: |
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ jobs:
matrix:
os: [ubuntu-22.04, macos-latest]
python-version: [3.9, "3.10", "3.11"]
poetry-version: [1.3.0]
poetry-version: [1.5.0]
runs-on: ${{ matrix.os }}
defaults:
run:
Expand Down Expand Up @@ -84,7 +84,7 @@ jobs:
- name: Setup compiler cache
uses: actions/cache@v3
id: cache-ccache
id: cache-ccachev1
# Reference: https://docs.github.com/en/actions/guides/caching-dependencies-to-speed-up-workflows#matching-a-cache-key
# NOTE: The caching strategy is modeled in a way that it will always have a unique cache key for each workflow run
# (even if the same workflow is run multiple times). The restore keys are not unique and for a partial match, they will
Expand Down
10 changes: 5 additions & 5 deletions .github/workflows/pr_checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,14 +37,14 @@ jobs:
then
exit 0
fi
all_changelogs=$(cat ./docs/whats_new/v*.rst)
all_changelogs=$(cat ./doc/whats_new/v*.rst)
if [[ "$all_changelogs" =~ :pr:\`$PR_NUMBER\` ]]
then
echo "Changelog has been updated."
# If the pull request is milestoned check the correspondent changelog
if exist -f ./docs/whats_new/v${TAGGED_MILESTONE:0:4}.rst
if exist -f ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst
then
expected_changelog=$(cat ./docs/whats_new/v${TAGGED_MILESTONE:0:4}.rst)
expected_changelog=$(cat ./doc/whats_new/v${TAGGED_MILESTONE:0:4}.rst)
if [[ "$expected_changelog" =~ :pr:\`$PR_NUMBER\` ]]
then
echo "Changelog and milestone correspond."
Expand All @@ -58,8 +58,8 @@ jobs:
else
echo "A Changelog entry is missing."
echo ""
echo "Please add an entry to the changelog at 'docs/whats_new/v*.rst'"
echo "to docsument your change assuming that the PR will be merged"
echo "Please add an entry to the changelog at 'doc/whats_new/v*.rst'"
echo "to document your change assuming that the PR will be merged"
echo "in time for the next release of scikit-tree."
echo ""
echo "Look at other entries in that file for inspiration and please"
Expand Down
16 changes: 8 additions & 8 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,17 @@ sktree/_lib/sklearn/
*.png

# Sphinx documentation
docs/_build/
docs/generated/
docs/auto_examples/
docs/auto_tutorials/
docs/modules/generated/
docs/sphinxext/cachedir
doc/_build/
doc/generated/
doc/auto_examples/
doc/auto_tutorials/
doc/modules/generated/
doc/sphinxext/cachedir
pip-log.txt
.coverage
tags
docs/coverages
docs/samples
doc/coverages
doc/samples
cover
examples/*.jpg

Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "sktree/_lib/sklearn"]
path = sktree/_lib/sklearn_fork
url = https://github.com/neurodata/scikit-learn
branch = v1.3
branch = submodulev3
18 changes: 11 additions & 7 deletions .spin/cmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ def get_git_revision_hash(submodule) -> str:
@click.option("--build-dir", default="build", help="Build directory; default is `$PWD/build`")
@click.option("--clean", is_flag=True, help="Clean previously built docs before building")
@click.option("--noplot", is_flag=True, help="Build docs without plots")
def docs(build_dir, clean=False, noplot=False):
@click.pass_context
def docs(ctx, build_dir, clean=False, noplot=False):
"""📖 Build documentation"""
if clean:
doc_dir = "./docs/_build"
Expand All @@ -31,12 +32,13 @@ def docs(build_dir, clean=False, noplot=False):

util.run(["pip", "install", "-q", "-r", "doc_requirements.txt"])

os.environ["SPHINXOPTS"] = "-W"
os.environ["PYTHONPATH"] = f'{site_path}{os.sep}:{os.environ.get("PYTHONPATH", "")}'
if noplot:
util.run(["make", "-C", "docs", "clean", "html-noplot"], replace=True)
else:
util.run(["make", "-C", "docs", "clean", "html"], replace=True)
ctx.invoke(meson.docs)
# os.environ["SPHINXOPTS"] = "-W"
# os.environ["PYTHONPATH"] = f'{site_path}{os.sep}:{os.environ.get("PYTHONPATH", "")}'
# if noplot:
# util.run(["make", "-C", "docs", "clean", "html-noplot"], replace=True)
# else:
# util.run(["make", "-C", "docs", "clean", "html"], replace=True)


@click.command()
Expand All @@ -52,6 +54,8 @@ def coverage(ctx):
def setup_submodule(forcesubmodule=False):
"""Build scikit-tree using submodules.
git submodule set-branch -b submodulev2 sktree/_lib/sklearn
git submodule update --recursive --remote
To update submodule wrt latest commits:
Expand Down
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
[![Main](https://github.com/neurodata/scikit-tree/actions/workflows/main.yml/badge.svg?branch=main)](https://github.com/neurodata/scikit-tree/actions/workflows/main.yml)
[![Checked with mypy](http://www.mypy-lang.org/static/mypy_badge.svg)](http://mypy-lang.org/)
[![codecov](https://codecov.io/gh/neurodata/scikit-tree/branch/main/graph/badge.svg?token=H1reh7Qwf4)](https://codecov.io/gh/neurodata/scikit-tree)
[![PyPI Download count](https://pepy.tech/badge/scikit-tree)](https://pepy.tech/project/scikit-tree)
[![Latest PyPI release](https://img.shields.io/pypi/v/scikit-tree.svg)](https://pypi.org/project/scikit-tree/)

scikit-tree
===========
Expand Down
2 changes: 1 addition & 1 deletion build_requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
meson
meson-python
cython
cython>=3.0
ninja
numpy
scikit-learn>=1.3
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
26 changes: 25 additions & 1 deletion docs/api.rst → doc/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,30 @@ API Documentation
:no-members:
:no-inherited-members:

Scikit-learn Tree Estimators
----------------------------
We provide a drop-in replacement for the scikit-learn tree estimators
with **experimental** features that we have developed. These estimators
are still compatible with the scikit-learn API. These estimators all have
the capability of binning features, which theoretically will improve runtime
significantly for high-dimensional and high-sample size data.

Use at your own risk! We have not tested these estimators extensively, compared
to the scikit-learn estimators.

.. automodule:: sktree._lib.sklearn.ensemble
:members:
:show-inheritance:

.. currentmodule:: sktree
.. autosummary::
:toctree: generated/

RandomForestClassifier
RandomForestRegressor
ExtraTreesClassifier
ExtraTreesRegressor

Supervised
----------
Decision-tree models are traditionally implemented with axis-aligned splits and
Expand Down Expand Up @@ -84,7 +108,7 @@ provide a natural way to compute neighbors based on the splits. We provide
an API for extracting the nearest neighbors from a tree-model. This provides
an API-like interface similar to :class:`~sklearn.neighbors.NearestNeighbors`.

.. currentmodule:: sktree.neighbors
.. currentmodule:: sktree
.. autosummary::
:toctree: generated/

Expand Down
9 changes: 8 additions & 1 deletion docs/conf.py → doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,13 @@
)
sys.path.insert(0, os.path.abspath("sphinxext"))
import sktree
from sktree._lib.sklearn.ensemble._forest import ExtraTreesClassifier # noqa
from sktree._lib.sklearn.ensemble._forest import ExtraTreesRegressor # noqa
from sktree._lib.sklearn.ensemble._forest import RandomForestClassifier # noqa
from sktree._lib.sklearn.ensemble._forest import RandomForestRegressor # noqa

sys.path.append(os.path.abspath(os.path.join(curdir, "..", "sktree")))
sys.path.append(os.path.abspath(os.path.join(curdir, "..", "sktree/_lib")))

# -- project information -----------------------------------------------------
# https://www.sphinx-doc.org/en/master/usage/configuration.html#project-information
Expand All @@ -45,7 +50,7 @@
# https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration

# If your documentation needs a minimal Sphinx version, state it here.
needs_sphinx = "5.0"
needs_sphinx = "6.0"

# The document name of the “root” document, that is, the document that contains
# the root toctree directive.
Expand Down Expand Up @@ -166,6 +171,7 @@
"UnsupervisedObliqueRandomForest": "sktree.ensemble.UnsupervisedObliqueRandomForest",
"DecisionTreeClassifier": "sklearn.tree.DecisionTreeClassifier",
"DecisionTreeRegressor": "sklearn.tree.DecisionTreeRegressor",
"ExtraTreeRegressor": "sklearn.tree.ExtraTreeRegressor",
"pipeline.Pipeline": "sklearn.pipeline.Pipeline",
# "sklearn_fork.inspection.permutation_importance": "sklearn.inspection.permutation_importance",
}
Expand Down Expand Up @@ -221,6 +227,7 @@
"n_neighbors",
"one",
"joblib.parallel_backend",
"length",
}

# validation
Expand Down
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit 3b124e3

Please sign in to comment.