From 927a145e7c7e030a7e95764ef22f159b6d4859c1 Mon Sep 17 00:00:00 2001 From: Assaf Toledo Date: Wed, 4 May 2022 11:52:20 +0300 Subject: [PATCH] small fix --- examples/ex1_sib_cluster_20ng.py | 3 --- src/sib/clustering_utils.py | 7 ++----- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/examples/ex1_sib_cluster_20ng.py b/examples/ex1_sib_cluster_20ng.py index b4a9062..ac7d9fa 100644 --- a/examples/ex1_sib_cluster_20ng.py +++ b/examples/ex1_sib_cluster_20ng.py @@ -91,12 +91,9 @@ clusters = clustering_utils.get_clusters(sib.labels_) cluster_key_terms = clustering_utils.get_key_terms(vectors, clusters, p_value_threshold=0.01, top_k=15) -cluster_key_texts = clustering_utils.get_ket_texts(vectors, clusters, cluster_key_terms, 2) p_value_analysis_end_t = time() print("P-value analysis time: %.3f secs." % (p_value_analysis_end_t - p_value_analysis_start_t)) -print(cluster_key_texts) - # step 6 - align the generated clusters to the original classes # this is done only for having a more informative report label_enrichment = clustering_utils.get_enriched_labels(gold_labels, sib.labels_, diff --git a/src/sib/clustering_utils.py b/src/sib/clustering_utils.py index b575ad8..0d82a78 100644 --- a/src/sib/clustering_utils.py +++ b/src/sib/clustering_utils.py @@ -68,10 +68,7 @@ def get_binary_vectors(vectors): return binary_vectors -def get_key_terms(vectors, labels, p_value_threshold, top_k): - # map from cluster id to cluster elements - clusters = get_clusters(labels) - +def get_key_terms(vectors, clusters, p_value_threshold, top_k): # perform document-level analysis. count every token only once per document. binary_vectors = get_binary_vectors(vectors) p_values = calc_p_value_doc_level(binary_vectors, clusters) @@ -91,7 +88,7 @@ def get_key_terms(vectors, labels, p_value_threshold, top_k): return result -def get_ket_texts(vectors, clusters, key_terms, top_k): +def get_key_texts(vectors, clusters, key_terms, top_k): binary_vectors = get_binary_vectors(vectors) result = {} for cluster_id, cluster_key_terms in key_terms.items():