Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MAINT Clean up Cython files #321

Merged
merged 17 commits into from
Sep 9, 2024
5 changes: 5 additions & 0 deletions .spin/cmds.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import click
from spin import util
from spin.cmds import meson
from spin.cmds.meson import build_dir_option


def get_git_revision_hash(submodule) -> str:
Expand Down Expand Up @@ -145,14 +146,18 @@ def setup_submodule(forcesubmodule=False):
@click.option(
"--forcesubmodule", is_flag=True, help="Force submodule pull.", envvar="FORCE_SUBMODULE"
)
@build_dir_option
@click.pass_context
def build(
ctx,
*,
meson_args,
jobs=None,
clean=False,
verbose=False,
gcov=False,
quiet=False,
build_dir=None,
forcesubmodule=False,
):
"""Build treeple using submodules.
Expand Down
2 changes: 1 addition & 1 deletion build_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,5 +8,5 @@ click
rich-click
doit
pydevtool
spin
spin>=0.12
build
2 changes: 1 addition & 1 deletion meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ project(
license: 'PolyForm Noncommercial 1.0.0',
meson_version: '>= 1.1.0',
default_options: [
'c_std=c99',
'c_std=c11',
'cpp_std=c++14',
],
)
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ build = [
'twine',
'meson',
'meson-python',
'spin',
'spin>=0.12',
'doit',
'scikit-learn>=1.5.0',
'Cython>=3.0.10',
Expand Down
4 changes: 3 additions & 1 deletion treeple/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
# https://github.com/ContinuumIO/anaconda-issues/issues/11294
os.environ.setdefault("KMP_INIT_AT_FORK", "FALSE")


try:
# This variable is injected in the __builtins__ by the build
# process. It is used to enable importing subpackages of sklearn when
Expand Down Expand Up @@ -64,7 +65,8 @@
msg = """Error importing treeple: you cannot import treeple while
being in treeple source directory; please exit the treeple source
tree first and relaunch your Python interpreter."""
raise ImportError(msg) from e
raise Exception(e)
# raise ImportError(msg) from e

__all__ = [
"_lib",
Expand Down
19 changes: 19 additions & 0 deletions treeple/_lib/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -94,3 +94,22 @@ foreach ext: extensions
subdir: 'treeple/_lib/sklearn/utils/',
)
endforeach


# python_sources = [
# '__init__.py',
# ]

# py.install_sources(
# python_sources,
# subdir: 'treeple/_lib' # Folder relative to site-packages to install to
# )

# tempita = files('./sklearn/_build_utils/tempita.py')

# # Copy all the .py files to the install dir, rather than using
# # py.install_sources and needing to list them explicitely one by one
# # install_subdir('sklearn', install_dir: py.get_install_dir())
# install_subdir('sklearn', install_dir: join_paths(py.get_install_dir(), 'treeple/_lib'))

# subdir('sklearn')
2 changes: 1 addition & 1 deletion treeple/_lib/sklearn_fork
Submodule sklearn_fork updated 216 files
8 changes: 6 additions & 2 deletions treeple/ensemble/_honest_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -720,8 +720,12 @@ def oob_samples_(self):
oob_samples.append(_oob_samples)
return oob_samples

def _more_tags(self):
return {"multioutput": False}
def __sklearn_tags__(self):
# XXX: nans should be supportable in HRF
tags = super().__sklearn_tags__()
tags.classifier_tags.multi_output = False
tags.input_tags.allow_nan = False
return tags

def decision_path(self, X):
"""
Expand Down
12 changes: 8 additions & 4 deletions treeple/ensemble/_unsupervised_forest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,12 @@
)
from sklearn.metrics import calinski_harabasz_score
from sklearn.utils.parallel import Parallel, delayed
from sklearn.utils.validation import _check_sample_weight, check_is_fitted, check_random_state
from sklearn.utils.validation import (
_check_sample_weight,
check_is_fitted,
check_random_state,
validate_data,
)

from .._lib.sklearn.ensemble._forest import BaseForest
from .._lib.sklearn.tree._tree import DTYPE
Expand Down Expand Up @@ -85,10 +90,9 @@ def fit(self, X, y=None, sample_weight=None):
self : object
Returns the instance itself.
"""
self._validate_params()

# Validate or convert input data
X = self._validate_data(
X = validate_data(
self,
X,
dtype=DTYPE, # accept_sparse="csc",
)
Expand Down
1 change: 1 addition & 0 deletions treeple/meson.build
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ scikit_learn_cython_args = [
'-X language_level=3', '-X boundscheck=' + boundscheck, '-X wraparound=False',
'-X initializedcheck=False', '-X nonecheck=False', '-X cdivision=True',
'-X profile=False',
'-X embedsignature=True',
# Needed for cython imports across subpackages, e.g. cluster pyx that
# cimports metrics pxd
'--include-dir', meson.global_build_root(),
Expand Down
15 changes: 11 additions & 4 deletions treeple/neighbors.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,9 @@
from sklearn.base import BaseEstimator, MetaEstimatorMixin
from sklearn.exceptions import NotFittedError
from sklearn.neighbors import NearestNeighbors
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_is_fitted, validate_data

from treeple.tree import DecisionTreeClassifier
from treeple.tree._neighbors import _compute_distance_matrix, compute_forest_similarity_matrix


Expand All @@ -31,13 +32,19 @@
The number of parallel jobs to run for neighbors, by default None.
"""

def __init__(self, estimator, n_neighbors=5, radius=1.0, algorithm="auto", n_jobs=None):
def __init__(self, estimator=None, n_neighbors=5, radius=1.0, algorithm="auto", n_jobs=None):
self.estimator = estimator
self.n_neighbors = n_neighbors
self.algorithm = algorithm
self.radius = radius
self.n_jobs = n_jobs

def get_estimator(self):
if self.estimator is not None:
return DecisionTreeClassifier(random_state=0)
else:
return copy(self.estimator)

Check warning on line 46 in treeple/neighbors.py

View check run for this annotation

Codecov / codecov/patch

treeple/neighbors.py#L46

Added line #L46 was not covered by tests

def fit(self, X, y=None):
"""Fit the nearest neighbors estimator from the training dataset.

Expand All @@ -56,9 +63,9 @@
self : object
Fitted estimator.
"""
X, y = self._validate_data(X, y, accept_sparse="csc")
X, y = validate_data(self, X, y, accept_sparse="csc")

self.estimator_ = copy(self.estimator)
self.estimator_ = self.get_estimator()
try:
check_is_fitted(self.estimator_)
except NotFittedError:
Expand Down
82 changes: 64 additions & 18 deletions treeple/tree/_classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from sklearn.cluster import AgglomerativeClustering
from sklearn.utils import check_random_state
from sklearn.utils._param_validation import Interval
from sklearn.utils.validation import check_is_fitted
from sklearn.utils.validation import check_is_fitted, validate_data

from .._lib.sklearn.tree import (
BaseDecisionTree,
Expand Down Expand Up @@ -216,7 +216,7 @@
if check_input:
# TODO: allow X to be sparse
check_X_params = dict(dtype=DTYPE) # , accept_sparse="csc"
X = self._validate_data(X, validate_separately=(check_X_params))
X = validate_data(self, X, validate_separately=(check_X_params))
if issparse(X):
X.sort_indices()

Expand Down Expand Up @@ -378,6 +378,13 @@
predict_labels = cluster.fit_predict(affinity_matrix)
return predict_labels

def __sklearn_tags__(self):
# XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
# However, for MORF it is not supported
tags = super().__sklearn_tags__()
tags.input_tags.allow_nan = False
return tags


class UnsupervisedObliqueDecisionTree(UnsupervisedDecisionTree):
"""Unsupervised oblique decision tree.
Expand Down Expand Up @@ -577,6 +584,13 @@
builder.build(self.tree_, X, sample_weight)
return self

def __sklearn_tags__(self):
# XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
# However, for MORF it is not supported
tags = super().__sklearn_tags__()
tags.input_tags.allow_nan = False
return tags


class ObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
"""An oblique decision tree classifier.
Expand Down Expand Up @@ -820,7 +834,7 @@

tree_type = "oblique"

_parameter_constraints = {
_parameter_constraints: dict = {
**DecisionTreeClassifier._parameter_constraints,
"feature_combinations": [
Interval(Real, 1.0, None, closed="left"),
Expand Down Expand Up @@ -1070,6 +1084,13 @@
self._prune_tree()
return self

def __sklearn_tags__(self):
# XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
# However, for MORF it is not supported
tags = super().__sklearn_tags__()
tags.input_tags.allow_nan = False
return tags


class ObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
"""An oblique decision tree Regressor.
Expand Down Expand Up @@ -1283,7 +1304,7 @@

tree_type = "oblique"

_parameter_constraints = {
_parameter_constraints: dict = {
**DecisionTreeRegressor._parameter_constraints,
"feature_combinations": [
Interval(Real, 1.0, None, closed="left"),
Expand Down Expand Up @@ -1450,6 +1471,13 @@
builder.build(self.tree_, X, y, sample_weight, None)
return self

def __sklearn_tags__(self):
# XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
# However, for MORF it is not supported
tags = super().__sklearn_tags__()
tags.input_tags.allow_nan = False
return tags


class PatchObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
"""A oblique decision tree classifier that operates over patches of data.
Expand Down Expand Up @@ -1684,7 +1712,7 @@
"""

tree_type = "oblique"
_parameter_constraints = {
_parameter_constraints: dict = {
**DecisionTreeClassifier._parameter_constraints,
"min_patch_dims": ["array-like", None],
"max_patch_dims": ["array-like", None],
Expand Down Expand Up @@ -1798,8 +1826,8 @@
self.feature_combinations_ = 1

if self.feature_weight is not None:
self.feature_weight = self._validate_data(
self.feature_weight, ensure_2d=True, dtype=DTYPE
self.feature_weight = validate_data(
self, self.feature_weight, ensure_2d=True, dtype=DTYPE
)
if self.feature_weight.shape != X.shape:
raise ValueError(
Expand Down Expand Up @@ -1927,11 +1955,13 @@

return self

def _more_tags(self):
def __sklearn_tags__(self):
# XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
# However, for MORF it is not supported
allow_nan = False
return {"multilabel": True, "allow_nan": allow_nan}
tags = super().__sklearn_tags__()
tags.classifier_tags.multi_label = True
tags.input_tags.allow_nan = False
return tags

@property
def _inheritable_fitted_attribute(self):
Expand Down Expand Up @@ -2166,7 +2196,7 @@
"""

tree_type = "oblique"
_parameter_constraints = {
_parameter_constraints: dict = {
**DecisionTreeRegressor._parameter_constraints,
"min_patch_dims": ["array-like", None],
"max_patch_dims": ["array-like", None],
Expand Down Expand Up @@ -2277,8 +2307,8 @@
self.feature_combinations_ = 1

if self.feature_weight is not None:
self.feature_weight = self._validate_data(
self.feature_weight, ensure_2d=True, dtype=DTYPE
self.feature_weight = validate_data(

Check warning on line 2310 in treeple/tree/_classes.py

View check run for this annotation

Codecov / codecov/patch

treeple/tree/_classes.py#L2310

Added line #L2310 was not covered by tests
self, self.feature_weight, ensure_2d=True, dtype=DTYPE
)
if self.feature_weight.shape != X.shape:
raise ValueError(
Expand Down Expand Up @@ -2407,11 +2437,13 @@

return self

def _more_tags(self):
def __sklearn_tags__(self):
# XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
# However, for MORF it is not supported
allow_nan = False
return {"multilabel": True, "allow_nan": allow_nan}
tags = super().__sklearn_tags__()
tags.regressor_tags.multi_label = True
tags.input_tags.allow_nan = False
return tags


class ExtraObliqueDecisionTreeClassifier(SimMatrixMixin, DecisionTreeClassifier):
Expand Down Expand Up @@ -2669,7 +2701,7 @@

tree_type = "oblique"

_parameter_constraints = {
_parameter_constraints: dict = {
**DecisionTreeClassifier._parameter_constraints,
"feature_combinations": [
Interval(Real, 1.0, None, closed="left"),
Expand Down Expand Up @@ -2846,6 +2878,13 @@
"feature_combinations_",
]

def __sklearn_tags__(self):
# XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
# However, for MORF it is not supported
tags = super().__sklearn_tags__()
tags.input_tags.allow_nan = False
return tags


class ExtraObliqueDecisionTreeRegressor(SimMatrixMixin, DecisionTreeRegressor):
"""An oblique decision tree Regressor.
Expand Down Expand Up @@ -3069,7 +3108,7 @@
-0.26552594, -0.00642017, -0.07108117, -0.40726765, -0.40315294])
"""

_parameter_constraints = {
_parameter_constraints: dict = {
**DecisionTreeRegressor._parameter_constraints,
"feature_combinations": [
Interval(Real, 1.0, None, closed="left"),
Expand Down Expand Up @@ -3237,3 +3276,10 @@
builder.build(self.tree_, X, y, sample_weight)

return self

def __sklearn_tags__(self):
# XXX: nans should be supportable in SPORF by just using RF-like splits on missing values
# However, for MORF it is not supported
tags = super().__sklearn_tags__()
tags.input_tags.allow_nan = False
return tags
Loading
Loading