Skip to content

Commit

Permalink
small fix
Browse files Browse the repository at this point in the history
  • Loading branch information
assaftibm committed May 4, 2022
1 parent 83ae8d2 commit 927a145
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 8 deletions.
3 changes: 0 additions & 3 deletions examples/ex1_sib_cluster_20ng.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,12 +91,9 @@
clusters = clustering_utils.get_clusters(sib.labels_)
cluster_key_terms = clustering_utils.get_key_terms(vectors, clusters,
p_value_threshold=0.01, top_k=15)
cluster_key_texts = clustering_utils.get_ket_texts(vectors, clusters, cluster_key_terms, 2)
p_value_analysis_end_t = time()
print("P-value analysis time: %.3f secs." % (p_value_analysis_end_t - p_value_analysis_start_t))

print(cluster_key_texts)

# step 6 - align the generated clusters to the original classes
# this is done only for having a more informative report
label_enrichment = clustering_utils.get_enriched_labels(gold_labels, sib.labels_,
Expand Down
7 changes: 2 additions & 5 deletions src/sib/clustering_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,7 @@ def get_binary_vectors(vectors):
return binary_vectors


def get_key_terms(vectors, labels, p_value_threshold, top_k):
# map from cluster id to cluster elements
clusters = get_clusters(labels)

def get_key_terms(vectors, clusters, p_value_threshold, top_k):
# perform document-level analysis. count every token only once per document.
binary_vectors = get_binary_vectors(vectors)
p_values = calc_p_value_doc_level(binary_vectors, clusters)
Expand All @@ -91,7 +88,7 @@ def get_key_terms(vectors, labels, p_value_threshold, top_k):
return result


def get_ket_texts(vectors, clusters, key_terms, top_k):
def get_key_texts(vectors, clusters, key_terms, top_k):
binary_vectors = get_binary_vectors(vectors)
result = {}
for cluster_id, cluster_key_terms in key_terms.items():
Expand Down

0 comments on commit 927a145

Please sign in to comment.