From dd8e2d8e0fa354af047aadba570da28b392a5a48 Mon Sep 17 00:00:00 2001 From: Leland McInnes Date: Mon, 5 Aug 2024 12:55:09 -0400 Subject: [PATCH 1/5] Faile early on generating branch data if no MST --- hdbscan/hdbscan_.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/hdbscan/hdbscan_.py b/hdbscan/hdbscan_.py index 6ce07964..de7ab9be 100644 --- a/hdbscan/hdbscan_.py +++ b/hdbscan/hdbscan_.py @@ -1299,6 +1299,12 @@ def generate_branch_detection_data(self): branches within clusters. This data is only useful if you are intending to use functions from ``hdbscan.branches``. """ + if self._min_spanning_tree is None: + raise ValueError("Branch prediction requires a minimum spanning tree; please re-run " + "with `branch_repdiction_data=True` or at least `gen_min_spanning_tree=True` " + "and this this function to generate the required information for branch " + "branch detection. + ) if self.metric in FAST_METRICS: min_samples = self.min_samples or self.min_cluster_size if self.metric in KDTREE_VALID_METRICS: From 9a3009af36da9b862265bbecd05be3d18508e34a Mon Sep 17 00:00:00 2001 From: Leland McInnes Date: Mon, 5 Aug 2024 13:08:28 -0400 Subject: [PATCH 2/5] Don't use the github edtior without ehcekcing settings... --- hdbscan/hdbscan_.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/hdbscan/hdbscan_.py b/hdbscan/hdbscan_.py index de7ab9be..142134f2 100644 --- a/hdbscan/hdbscan_.py +++ b/hdbscan/hdbscan_.py @@ -1299,12 +1299,12 @@ def generate_branch_detection_data(self): branches within clusters. This data is only useful if you are intending to use functions from ``hdbscan.branches``. """ - if self._min_spanning_tree is None: - raise ValueError("Branch prediction requires a minimum spanning tree; please re-run " - "with `branch_repdiction_data=True` or at least `gen_min_spanning_tree=True` " - "and this this function to generate the required information for branch " - "branch detection. - ) + if self._min_spanning_tree is None: + raise ValueError("Branch prediction requires a minimum spanning tree; please re-run " + "with `branch_repdiction_data=True` or at least `gen_min_spanning_tree=True` " + "and this this function to generate the required information for branch " + "branch detection. + ) if self.metric in FAST_METRICS: min_samples = self.min_samples or self.min_cluster_size if self.metric in KDTREE_VALID_METRICS: From 5ca46810751ab52aaa3dcd55fa0a914baf2d157e Mon Sep 17 00:00:00 2001 From: Leland McInnes Date: Mon, 5 Aug 2024 13:17:03 -0400 Subject: [PATCH 3/5] Typo --- hdbscan/hdbscan_.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hdbscan/hdbscan_.py b/hdbscan/hdbscan_.py index 142134f2..181df732 100644 --- a/hdbscan/hdbscan_.py +++ b/hdbscan/hdbscan_.py @@ -1303,7 +1303,7 @@ def generate_branch_detection_data(self): raise ValueError("Branch prediction requires a minimum spanning tree; please re-run " "with `branch_repdiction_data=True` or at least `gen_min_spanning_tree=True` " "and this this function to generate the required information for branch " - "branch detection. + "branch detection." ) if self.metric in FAST_METRICS: min_samples = self.min_samples or self.min_cluster_size From d629df9aa5e1b29047c292406ba2720bbf34b1ac Mon Sep 17 00:00:00 2001 From: Leland McInnes Date: Mon, 5 Aug 2024 13:57:05 -0400 Subject: [PATCH 4/5] Update test_branches.py to early error of no MST --- hdbscan/tests/test_branches.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hdbscan/tests/test_branches.py b/hdbscan/tests/test_branches.py index 5a6d9a36..2d878962 100644 --- a/hdbscan/tests/test_branches.py +++ b/hdbscan/tests/test_branches.py @@ -184,9 +184,7 @@ def test_branch_detection_data_with_unsupported_input(): def test_generate_branch_detection_data(): """Generate branch detection data function does not re-generate MST.""" c = HDBSCAN(min_cluster_size=5).fit(X) - c.generate_branch_detection_data() - assert c.branch_detection_data_ is not None - assert_raises(AttributeError, lambda: c.minimum_spanning_tree_) + assert_raises(ValueError, c.generate_branch_detection_data) # --- Detecting Branches @@ -287,8 +285,6 @@ def test_badargs(): c = HDBSCAN(min_cluster_size=5, branch_detection_data=True).fit(X) c_nofit = HDBSCAN(min_cluster_size=5, branch_detection_data=True) c_nobranch = HDBSCAN(min_cluster_size=5, gen_min_span_tree=True).fit(X) - c_nomst = HDBSCAN(min_cluster_size=5).fit(X) - c_nomst.generate_branch_detection_data() assert_raises(AttributeError, detect_branches_in_clusters, "fail") assert_raises(AttributeError, detect_branches_in_clusters, None) From 0c642fbcfacd1e1dca7363000452088a792894da Mon Sep 17 00:00:00 2001 From: Leland McInnes Date: Mon, 5 Aug 2024 15:31:34 -0400 Subject: [PATCH 5/5] Missed a nomst bad arg case --- hdbscan/tests/test_branches.py | 1 - 1 file changed, 1 deletion(-) diff --git a/hdbscan/tests/test_branches.py b/hdbscan/tests/test_branches.py index 2d878962..c9bd281c 100644 --- a/hdbscan/tests/test_branches.py +++ b/hdbscan/tests/test_branches.py @@ -291,7 +291,6 @@ def test_badargs(): assert_raises(AttributeError, detect_branches_in_clusters, "fail") assert_raises(ValueError, detect_branches_in_clusters, c_nofit) assert_raises(AttributeError, detect_branches_in_clusters, c_nobranch) - assert_raises(ValueError, detect_branches_in_clusters, c_nomst) assert_raises(ValueError, detect_branches_in_clusters, c, min_branch_size=-1) assert_raises(ValueError, detect_branches_in_clusters, c, min_branch_size=0) assert_raises(ValueError, detect_branches_in_clusters, c, min_branch_size=1)