Skip to content

Commit

Permalink
Add _backend to project structure
Browse files Browse the repository at this point in the history
  • Loading branch information
breimanntools committed Oct 5, 2023
1 parent 8e6c259 commit 4615df3
Show file tree
Hide file tree
Showing 219 changed files with 577 additions and 544 deletions.
33 changes: 23 additions & 10 deletions aaanalysis/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,27 @@
from aaanalysis.data_handling import load_dataset, load_scales
from aaanalysis.feature_engineering import AAclust, AAclustPlot, CPP, CPPPlot, SequenceFeature, SplitRange
from aaanalysis.pu_learning import dPULearn
from aaanalysis.plotting import (plot_get_clist, plot_get_cmap, plot_get_cdict,
from .data_handling import load_dataset, load_scales
from .feature_engineering import AAclust, AAclustPlot, CPP, CPPPlot, SequenceFeature, SplitRange
from .pu_learning import dPULearn
from .plotting import (plot_get_clist, plot_get_cmap, plot_get_cdict,
plot_settings, plot_legend, plot_gcfs)
from aaanalysis.config import options
from .config import options

__all__ = ["load_dataset", "load_scales",
"AAclust", "AAclustPlot",
"CPP", "CPPPlot", "SequenceFeature", "SplitRange",
"dPULearn", "plot_get_clist", "plot_get_cmap", "plot_get_cdict",
"plot_settings", "plot_legend", "plot_gcfs", "options"]
__all__ = [
"load_dataset",
"load_scales",
"AAclust",
"AAclustPlot",
"CPP",
"CPPPlot",
"SequenceFeature",
"SplitRange",
"dPULearn",
"plot_get_clist",
"plot_get_cmap",
"plot_get_cdict",
"plot_settings",
"plot_legend",
"plot_gcfs",
"options"
]


Binary file modified aaanalysis/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file modified aaanalysis/_utils/__pycache__/utils_cpp.cpython-39.pyc
Binary file not shown.
2 changes: 1 addition & 1 deletion aaanalysis/_utils/utils_cpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
"\nwhere all numbers should be non-negative integers, and N/C means N or C."\
.format(STR_SEGMENT, STR_PATTERN, STR_PERIODIC_PATTERN)


# TODO to CPP backend
# II Main Functions
# General check functions
def check_color(name=None, val=None, accept_none=False):
Expand Down
9 changes: 6 additions & 3 deletions aaanalysis/data_handling/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
from aaanalysis.data_handling.load_dataset_ import load_dataset
from aaanalysis.data_handling.load_scales_ import load_scales
from ._load_dataset import load_dataset
from ._load_scales import load_scales

__all__ = ["load_dataset", "load_scales"]
__all__ = [
"load_dataset",
"load_scales"
]
Binary file modified aaanalysis/data_handling/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

# II Main Functions
class TreeModel:
"""A wrapper for Tree based prediction models and Tree explainers from SHAP package to
"""A wrapper for Tree based prediction models and Tree explainable_ai from SHAP package to
explain prediction (typically binary classification) results at global and individual level"""
def __init__(self, model=None):
""""""
Expand All @@ -33,7 +33,7 @@ def add_feat_import(self, df_feat=None):


class ShapModel:
"""A wrapper for Tree explainers from SHAP package"""
"""A wrapper for Tree explainable_ai from SHAP package"""
def __init__(self, model=None):
""""""

Expand Down
21 changes: 13 additions & 8 deletions aaanalysis/feature_engineering/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
from aaanalysis.feature_engineering.aaclust_ import AAclust
from aaanalysis.feature_engineering.aaclust_plot_ import AAclustPlot
from aaanalysis.feature_engineering.cpp.feature import SequenceFeature
from aaanalysis.feature_engineering.cpp.feature import SplitRange
from aaanalysis.feature_engineering.cpp_plot_ import CPPPlot
from aaanalysis.feature_engineering.cpp_ import CPP
from ._aaclust import AAclust
from ._aaclust_plot import AAclustPlot
from ._cpp_plot import CPPPlot
from ._cpp import CPP
from ._backend.cpp.feature import SequenceFeature, SplitRange


__all__ = ["AAclust", "AAclustPlot", "CPP", "CPPPlot", "SequenceFeature", "SplitRange"]
__all__ = [
"AAclust",
"AAclustPlot",
"SequenceFeature",
"SplitRange",
"CPP",
"CPPPlot",
]
Binary file modified aaanalysis/feature_engineering/__pycache__/__init__.cpython-39.pyc
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,11 @@
from aaanalysis.template_classes import Wrapper
import aaanalysis.utils as ut

from aaanalysis.feature_engineering.aaclust._aaclust import (estimate_lower_bound_n_clusters, optimize_n_clusters, merge_clusters,
compute_centers, compute_medoids)
from aaanalysis.feature_engineering.aaclust._aaclust_bic import bic_score
from aaanalysis.feature_engineering.aaclust._aaclust_comp_corr import compute_correlation
from aaanalysis.feature_engineering.aaclust._aaclust_name_clusters import name_clusters

from ._backend.aaclust.aaclust_fit import estimate_lower_bound_n_clusters, optimize_n_clusters, merge_clusters
from ._backend.aaclust.aaclust_eval import bic_score
from ._backend.aaclust.aaclust_methods import (compute_centers, compute_medoids,
name_clusters,
compute_correlation)

# I Helper Functions
# Check parameter functions
Expand Down Expand Up @@ -159,7 +158,7 @@ def fit(self,
X: ut.ArrayLike2D,
n_clusters: Optional[int] = None,
on_center: bool = True,
min_th: float = 0,
min_th: float = 0.3,
merge_metric: Union[str, None] = "euclidean",
names: Optional[List[str]] = None) -> "AAclust":
"""
Expand Down Expand Up @@ -229,8 +228,7 @@ def fit(self,
check_match_X_n_clusters(X=X, n_clusters=n_clusters, accept_none=True)
check_match_X_names(X=X, names=names, accept_none=True)

args = dict(model=self.model_class, model_kwargs=self._model_kwargs, min_th=min_th, on_center=on_center,
verbose=self._verbose)
args = dict(model=self.model_class, model_kwargs=self._model_kwargs, min_th=min_th, on_center=on_center)

# Clustering using given clustering models
if n_clusters is not None:
Expand All @@ -240,15 +238,21 @@ def fit(self,
# Clustering using AAclust algorithm
else:
# Step 1.: Estimation of lower bound of k (number of clusters)
if self._verbose:
ut.print_out("1. Estimation of lower bound of k (number of clusters)", end="")
n_clusters_lb = estimate_lower_bound_n_clusters(X, **args)
# Step 2. Optimization of k by recursive clustering
if self._verbose:
objective_fct = "min_cor_center" if on_center else "min_cor_all"
ut.print_out(f"2. Optimization of k by recursive clustering ({objective_fct}, min_th={min_th}, k={n_clusters_lb})", end="")
n_clusters = optimize_n_clusters(X, n_clusters=n_clusters_lb, **args)
self.model = self.model_class(n_clusters=n_clusters, **self._model_kwargs)
labels = self.model.fit(X).labels_.tolist()
# Step 3. Cluster merging (optional)
if merge_metric is not None:
labels = merge_clusters(X, labels=labels, min_th=min_th, on_center=on_center,
metric=merge_metric, verbose=self._verbose)
if self._verbose:
ut.print_out(f"3. Cluster merging (k={len(labels)})", end="")
labels = merge_clusters(X, labels=labels, min_th=min_th, on_center=on_center, metric=merge_metric)
n_clusters = len(set(labels))

# Obtain cluster centers and medoids
Expand Down
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
"""
This is a script for the plotting class of AAclust.
"""
import time
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
from typing import Optional, Dict, Union, List, Tuple, Type
from sklearn.base import TransformerMixin
import matplotlib.pyplot as plt
import seaborn as sns

import aaanalysis as aa
import aaanalysis.utils as ut
from aaanalysis.feature_engineering.aaclust_plot._aaclust_plot_eval import plot_eval

from ._backend.aaclust_plot.aaclust_plot_eval import plot_eval


# I Helper Functions
def _get_components(data=None, model_class=None):
Expand Down
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
23 changes: 23 additions & 0 deletions aaanalysis/feature_engineering/_backend/aaclust/_utils_aaclust.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""
This is a script for utility functions for aaclust object.
"""
import numpy as np


# II Main Functions
def _cluster_center(X):
"""Compute cluster center (i.e., arithmetical mean over all data points/observations of a cluster)"""
return X.mean(axis=0)[np.newaxis, :]


def _cluster_medoid(X):
"""Obtain cluster medoids (i.e., scale closest to cluster center used as representative scale for a cluster)"""
# Create new array with cluster center and given array
center_X = np.concatenate([_cluster_center(X), X], axis=0)
# Get index for scale with the highest correlation with cluster center
ind_max = np.corrcoef(center_X)[0, 1:].argmax()
return ind_max




Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
"""
This is a script for computing the Bayesian Information Criterion (BIC).
This is a script for computing the Bayesian Information Criterion (BIC) used in the AAclust.eval() method.
"""
import numpy as np
from scipy.spatial import distance
from aaanalysis.feature_engineering.aaclust._aaclust import compute_centers

from .aaclust_methods import compute_centers

# I Helper Functions


# II Main function
def bic_score(X, labels=None):
"""Computes the BIC metric for given clusters.
Expand Down Expand Up @@ -53,5 +55,4 @@ def bic_score(X, labels=None):

bic_components = size_clusters * (log_size_clusters - log_n_samples) - 0.5 * size_clusters * n_features * log_bcv - 0.5 * (size_clusters - 1) * n_features
bic = np.sum(bic_components) - const_term

return bic
Loading

0 comments on commit 4615df3

Please sign in to comment.