Skip to content

Commit

Permalink
Merge pull request #26 from ARBML/zaidalyafeai-patch-4
Browse files Browse the repository at this point in the history
discard faulty embeddings
  • Loading branch information
zaidalyafeai authored Dec 21, 2024
2 parents 9be41eb + e9672de commit 83bee40
Showing 1 changed file with 8 additions and 3 deletions.
11 changes: 8 additions & 3 deletions utils/clusters_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,15 @@ def compute_clusters(embeddings: List[List[float]]) -> List[int]:

def compute_reduced_embeddings(embeddings: List[List[float]]) -> List[List[float]]:
tsne_model = TSNE(n_components=2, random_state=42)
new_embeddings = []
for emb in embeddings:
print(len(emb))
embeddings = np.asarray(embeddings, dtype=object)
if len(emb) != 384:
new_embeddings.append([0]*384)
else:
new_embeddings.append(emb)

embeddings = np.asarray(new_embeddings, dtype=object)
print(embeddings.shape)
tsne_data = tsne_model.fit_transform(embeddings)
tsne_data = tsne_model.fit_transform(new_embeddings)

return (tsne_data - tsne_data.min()).tolist()

0 comments on commit 83bee40

Please sign in to comment.