Skip to content

Commit

Permalink
Fix deprecated call to awsome_cossim_topn
Browse files Browse the repository at this point in the history
  • Loading branch information
RUrlus committed Apr 15, 2024
1 parent 5d0734b commit 4d8afee
Showing 1 changed file with 9 additions and 10 deletions.
19 changes: 9 additions & 10 deletions polyfuzz/models/_utils.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
import importlib.util

import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from typing import List
from sklearn.neighbors import NearestNeighbors
from sklearn.metrics.pairwise import cosine_similarity as scikit_cosine_similarity

try:
from sparse_dot_topn import awesome_cossim_topn
_HAVE_SPARSE_DOT = True
except ImportError:
_HAVE_SPARSE_DOT = False
_HAVE_SPARSE_DOT = importlib.util.find_spec("sparse_dot_topn") is not None
if _HAVE_SPARSE_DOT:
from sparse_dot_topn import sp_matmul_topn


def cosine_similarity(from_vector: np.ndarray,
Expand Down Expand Up @@ -69,17 +69,16 @@ def cosine_similarity(from_vector: np.ndarray,

similarities = [np.round(1 - distances[:, i], 3) for i in range(distances.shape[1])]

# Fast, but does has some installation issues
# Fast
elif _HAVE_SPARSE_DOT and method == "sparse":
if isinstance(to_vector, np.ndarray):
to_vector = csr_matrix(to_vector)
if isinstance(from_vector, np.ndarray):
from_vector = csr_matrix(from_vector)

# There is a bug with awesome_cossim_topn that when to_vector and from_vector
# have the same shape, setting topn to 1 does not work. Apparently, you need
# to it at least to 2 for it to work
similarity_matrix = awesome_cossim_topn(from_vector, to_vector.T, top_n+1, min_similarity)
similarity_matrix = sp_matmul_topn(
from_vector, to_vector, top_n=top_n, threshold=min_similarity, sort=True
)

if to_list is None:
similarity_matrix = similarity_matrix.tolil()
Expand Down

0 comments on commit 4d8afee

Please sign in to comment.