diff --git a/setup.py b/setup.py index f5d45f2..1025e24 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,6 @@ 'scikit-learn >= 1.2.0', 'gensim >= 4.0.0', 'umap-learn >= 0.5.1', - 'hdbscan >= 0.8.27', 'wordcloud', ], extras_require={ diff --git a/top2vec/Top2Vec.py b/top2vec/Top2Vec.py index 712169c..f278796 100644 --- a/top2vec/Top2Vec.py +++ b/top2vec/Top2Vec.py @@ -9,11 +9,10 @@ from gensim.parsing.preprocessing import strip_tags from gensim.models.phrases import Phrases import umap -import hdbscan from wordcloud import WordCloud import matplotlib.pyplot as plt from joblib import dump, load -from sklearn.cluster import dbscan +from sklearn.cluster import dbscan, HDBSCAN import tempfile from sklearn.feature_extraction.text import CountVectorizer from sklearn.preprocessing import normalize @@ -1384,7 +1383,7 @@ def compute_topics(self, labels = cluster.fit_predict(umap_embedding) else: - cluster = hdbscan.HDBSCAN(**hdbscan_args).fit(umap_embedding) + cluster = HDBSCAN(**hdbscan_args).fit(umap_embedding) labels = cluster.labels_ # calculate topic vectors from dense areas of documents