Skip to content

Commit

Permalink
Prepare v2.3.0 (#21)
Browse files Browse the repository at this point in the history
* fix handling of nan values in compress_memberships

* make black

* add publish workflow
  • Loading branch information
OlivierBinette authored Nov 29, 2023
1 parent 691ee7e commit 282bb8b
Show file tree
Hide file tree
Showing 29 changed files with 178 additions and 219 deletions.
27 changes: 27 additions & 0 deletions .github/workflows/publish.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
name: Publish Python package to PyPI

on:
push:
tags:
- 'v*' # Trigger the workflow on push tags like v1.0.0

jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.12' # Use the version appropriate for your project
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install build twine
- name: Build and publish
env:
TWINE_USERNAME: __token__
TWINE_PASSWORD: ${{ secrets.PYPI_TOKEN }}
run: |
python -m build
twine upload dist/*
5 changes: 5 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,11 @@
Changelog
=========

2.3.0 (November 29, 2023)
-------------------------

* Fix handling of NaN values in `compress_memberships()`

2.2.1 (November 8, 2023)
------------------------
* Small fixes to paper and documentation.
Expand Down
1 change: 1 addition & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#
import os
import sys

import er_evaluation

sys.path.insert(0, os.path.abspath(".."))
Expand Down
6 changes: 3 additions & 3 deletions er_evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
__version__ = "2.2.1"
__version__ = "2.3.0"

import er_evaluation.data_structures
import er_evaluation.datasets
import er_evaluation.error_analysis
import er_evaluation.estimators
import er_evaluation.metrics
import er_evaluation.plots
import er_evaluation.utils
import er_evaluation.summary
import er_evaluation.utils
from er_evaluation.data_structures import *
from er_evaluation.datasets import *
from er_evaluation.error_analysis import *
from er_evaluation.estimators import *
from er_evaluation.metrics import *
from er_evaluation.plots import *
from er_evaluation.utils import *
from er_evaluation.summary import *
from er_evaluation.utils import *

__all__ = (
er_evaluation.data_structures.__all__
Expand Down
24 changes: 5 additions & 19 deletions er_evaluation/data_structures/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,25 +56,11 @@
└─3─┘ 5
"""
from er_evaluation.data_structures._data_structures import (
MembershipVector,
compress_memberships,
clusters_to_graph,
clusters_to_membership,
clusters_to_pairs,
graph_to_clusters,
graph_to_membership,
graph_to_pairs,
isclusters,
isgraph,
ismembership,
ispairs,
membership_to_clusters,
membership_to_graph,
membership_to_pairs,
pairs_to_clusters,
pairs_to_graph,
pairs_to_membership,
)
MembershipVector, clusters_to_graph, clusters_to_membership,
clusters_to_pairs, compress_memberships, graph_to_clusters,
graph_to_membership, graph_to_pairs, isclusters, isgraph, ismembership,
ispairs, membership_to_clusters, membership_to_graph, membership_to_pairs,
pairs_to_clusters, pairs_to_graph, pairs_to_membership)

__all__ = [
"compress_memberships",
Expand Down
20 changes: 10 additions & 10 deletions er_evaluation/data_structures/_data_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,21 @@ def compress_memberships(*memberships):
List of Series with int codes for index and values. Index are compatible accross the Series.
Examples:
>>> membership = pd.Series(["c1", "c1", "c1", "c2", "c2", "c3"], index=[0,1,2,3,4,5])
>>> membership = pd.Series([None, "c1", "c1", "c2", "c2", "c3"], index=[0,1,2,3,4,5])
>>> compressed, = compress_memberships(membership)
>>> compressed
0 0
1 0
2 0
3 1
4 1
5 2
Name: 0, dtype: int8
0 NaN
1 0.0
2 0.0
3 1.0
4 1.0
5 2.0
Name: 0, dtype: float64
"""
compressed = pd.concat(memberships, axis=1)
compressed.index = pd.Categorical(compressed.index).codes
for col in compressed.columns:
compressed[col] = pd.Categorical(compressed[col]).codes
codes = pd.Categorical(compressed[col]).codes
compressed[col] = np.where(compressed[col].isna(), np.nan, codes)

return [compressed[col] for col in compressed.columns]

Expand Down
13 changes: 6 additions & 7 deletions er_evaluation/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,12 @@
The :py:meth:`load_rldata10000_disambiguations` and :py:meth:`load_rldata10000` return ground truth disambiguation, toy predicted disambiguations, and the full RLdata1000 dataframe.
"""

from er_evaluation.datasets.patentsview import load_pv_data, load_pv_disambiguations
from er_evaluation.datasets.rldata import (
load_rldata500,
load_rldata500_disambiguations,
load_rldata10000,
load_rldata10000_disambiguations,
)
from er_evaluation.datasets.patentsview import (load_pv_data,
load_pv_disambiguations)
from er_evaluation.datasets.rldata import (load_rldata500,
load_rldata500_disambiguations,
load_rldata10000,
load_rldata10000_disambiguations)

__all__ = [
"load_pv_data",
Expand Down
30 changes: 8 additions & 22 deletions er_evaluation/error_analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,29 +85,15 @@
The key advantage of working with the record error table is that it allows sensitivity analyses to be performed. Since all cluster error metrics and representative performance estimators can be computed directly from the record error table, uncertainty regarding error rates can be propagated from the record error table into cluster error metrics and into performance estimates.
"""
from er_evaluation.error_analysis._cluster_error import (
count_extra,
count_missing,
error_indicator,
error_metrics,
expected_extra,
expected_missing,
expected_relative_extra,
expected_relative_missing,
expected_size_difference,
splitting_entropy,
)
count_extra, count_missing, error_indicator, error_metrics, expected_extra,
expected_missing, expected_relative_extra, expected_relative_missing,
expected_size_difference, splitting_entropy)
from er_evaluation.error_analysis._record_error import (
cluster_sizes_from_table,
error_indicator_from_table,
error_metrics_from_table,
expected_extra_from_table,
expected_missing_from_table,
expected_relative_extra_from_table,
expected_relative_missing_from_table,
expected_size_difference_from_table,
pred_cluster_sizes_from_table,
record_error_table,
)
cluster_sizes_from_table, error_indicator_from_table,
error_metrics_from_table, expected_extra_from_table,
expected_missing_from_table, expected_relative_extra_from_table,
expected_relative_missing_from_table, expected_size_difference_from_table,
pred_cluster_sizes_from_table, record_error_table)
from er_evaluation.error_analysis._subgroup_discovery import fit_dt_regressor

__all__ = [
Expand Down
9 changes: 3 additions & 6 deletions er_evaluation/error_analysis/_cluster_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@

from er_evaluation.data_structures import MembershipVector
from er_evaluation.error_analysis._record_error import (
error_metrics_from_table,
expected_relative_missing_from_table,
expected_size_difference_from_table,
record_error_table,
error_indicator_from_table,
)
error_indicator_from_table, error_metrics_from_table,
expected_relative_missing_from_table, expected_size_difference_from_table,
record_error_table)
from er_evaluation.utils import relevant_prediction_subset


Expand Down
6 changes: 3 additions & 3 deletions er_evaluation/error_analysis/_subgroup_discovery.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from sklearn.tree import DecisionTreeRegressor
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeRegressor


def fit_dt_regressor(
Expand Down
28 changes: 11 additions & 17 deletions er_evaluation/estimators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,18 @@
**Note:** In order to obtain representative performance estimators, the set of predicted clusters given as an argument to estimator functions should cover the entire population of interest. Typically, this set of predicted clusters will be much larger than the set of sampled clusters.
"""
from er_evaluation.estimators._estimators import (
b_cubed_precision_estimator,
b_cubed_recall_estimator,
cluster_f_estimator,
cluster_precision_estimator,
cluster_recall_estimator,
estimates_table,
pairwise_f_estimator,
pairwise_precision_estimator,
pairwise_recall_estimator,
)
from er_evaluation.estimators._estimators import (b_cubed_precision_estimator,
b_cubed_recall_estimator,
cluster_f_estimator,
cluster_precision_estimator,
cluster_recall_estimator,
estimates_table,
pairwise_f_estimator,
pairwise_precision_estimator,
pairwise_recall_estimator)
from er_evaluation.estimators._summary_estimators import (
avg_cluster_size_estimator,
homonymy_rate_estimator,
matching_rate_estimator,
name_variation_estimator,
summary_estimates_table,
)
avg_cluster_size_estimator, homonymy_rate_estimator,
matching_rate_estimator, name_variation_estimator, summary_estimates_table)

__all__ = [
"b_cubed_precision_estimator",
Expand Down
23 changes: 8 additions & 15 deletions er_evaluation/estimators/_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,16 @@
from scipy.special import comb

from er_evaluation.data_structures import MembershipVector
from er_evaluation.error_analysis import (
record_error_table,
)
from er_evaluation.error_analysis import record_error_table
from er_evaluation.estimators._utils import (_parse_weights,
ratio_of_means_estimator,
validate_prediction_sample,
validate_weights)
from er_evaluation.estimators.from_table import (
pairwise_f_estimator_from_table,
cluster_precision_estimator_from_table,
cluster_recall_estimator_from_table,
cluster_f_estimator_from_table,
b_cubed_precision_estimator_from_table,
b_cubed_recall_estimator_from_table,
)
from er_evaluation.estimators._utils import (
validate_prediction_sample,
_parse_weights,
validate_weights,
ratio_of_means_estimator,
)
b_cubed_recall_estimator_from_table, cluster_f_estimator_from_table,
cluster_precision_estimator_from_table,
cluster_recall_estimator_from_table, pairwise_f_estimator_from_table)
from er_evaluation.utils import expand_grid


Expand Down
12 changes: 5 additions & 7 deletions er_evaluation/estimators/_summary_estimators.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import pandas as pd

from er_evaluation.estimators._utils import (
ratio_of_means_estimator,
validate_prediction_sample,
_parse_weights,
validate_weights,
)
from er_evaluation.summary import cluster_sizes
from er_evaluation.data_structures import MembershipVector
from er_evaluation.estimators._utils import (_parse_weights,
ratio_of_means_estimator,
validate_prediction_sample,
validate_weights)
from er_evaluation.summary import cluster_sizes
from er_evaluation.utils import expand_grid


Expand Down
3 changes: 2 additions & 1 deletion er_evaluation/estimators/_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import logging
import functools
import logging

import numpy as np
import pandas as pd

Expand Down
14 changes: 6 additions & 8 deletions er_evaluation/estimators/from_table.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
from er_evaluation.error_analysis import (
cluster_sizes_from_table,
error_indicator_from_table,
expected_missing_from_table,
expected_relative_extra_from_table,
expected_relative_missing_from_table,
expected_size_difference_from_table,
)
from er_evaluation.error_analysis import (cluster_sizes_from_table,
error_indicator_from_table,
expected_missing_from_table,
expected_relative_extra_from_table,
expected_relative_missing_from_table,
expected_size_difference_from_table)
from er_evaluation.estimators._utils import ratio_of_means_estimator


Expand Down
25 changes: 8 additions & 17 deletions er_evaluation/metrics/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,23 +18,14 @@
- Records with NA cluster identifier in the reference or predicted clusterings are dropped.
- The metrics in this module do not provide representative performance estimates. They are only useful for comparing two clusterings, such as a. For representative performance estimates, see the :mod:`er_evaluation.estimators` module.
"""
from er_evaluation.metrics._metrics import (
adjusted_rand_score,
b_cubed_f,
b_cubed_precision,
b_cubed_recall,
cluster_completeness,
cluster_f,
cluster_homogeneity,
cluster_precision,
cluster_recall,
cluster_v_measure,
metrics_table,
pairwise_f,
pairwise_precision,
pairwise_recall,
rand_score,
)
from er_evaluation.metrics._metrics import (adjusted_rand_score, b_cubed_f,
b_cubed_precision, b_cubed_recall,
cluster_completeness, cluster_f,
cluster_homogeneity,
cluster_precision, cluster_recall,
cluster_v_measure, metrics_table,
pairwise_f, pairwise_precision,
pairwise_recall, rand_score)

__all__ = [
"adjusted_rand_score",
Expand Down
10 changes: 4 additions & 6 deletions er_evaluation/metrics/_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,10 @@
from scipy.special import comb

from er_evaluation.data_structures import MembershipVector
from er_evaluation.error_analysis import (
error_indicator,
expected_relative_extra_from_table,
expected_relative_missing_from_table,
record_error_table,
)
from er_evaluation.error_analysis import (error_indicator,
expected_relative_extra_from_table,
expected_relative_missing_from_table,
record_error_table)
from er_evaluation.summary import number_of_links
from er_evaluation.utils import expand_grid

Expand Down
Loading

0 comments on commit 282bb8b

Please sign in to comment.