Skip to content

Commit

Permalink
Updated exact scores to be consistent with approximate scores
Browse files Browse the repository at this point in the history
  • Loading branch information
ankane committed Sep 4, 2024
1 parent e2d6488 commit a355cff
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 14 deletions.
7 changes: 4 additions & 3 deletions lib/searchkick/query.rb
Original file line number Diff line number Diff line change
Expand Up @@ -927,7 +927,8 @@ def set_knn(payload, knn, per_page, offset)
query_value: vector,
space_type: space_type
}
}
},
boost: distance == "cosine" ? 0.5 : 1.0
}
}
else
Expand All @@ -947,9 +948,9 @@ def set_knn(payload, knn, per_page, offset)
source =
case distance
when "cosine"
"cosineSimilarity(params.query_vector, params.field) + 1.0"
"(cosineSimilarity(params.query_vector, params.field) + 1.0) * 0.5"
when "euclidean"
"1 / (1 + l2norm(params.query_vector, params.field))"
"double l2 = l2norm(params.query_vector, params.field); 1 / (1 + l2 * l2)"
else
raise ArgumentError, "Unknown distance: #{distance}"
end
Expand Down
14 changes: 3 additions & 11 deletions test/knn_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,7 @@ def test_basic_exact
assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], exact: true}

scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3], exact: true}).hits.map { |v| v["_score"] }
# TODO match approximate
assert_in_delta 2, scores[0]
assert_in_delta 1, scores[0]
assert_in_delta 0, scores[1]
end

Expand Down Expand Up @@ -72,7 +71,6 @@ def test_euclidean
assert_order "*", ["A", "B"], knn: {field: :factors, vector: [1, 2, 3]}

scores = Product.search(knn: {field: :factors, vector: [1, 2, 3]}).hits.map { |v| v["_score"] }
# TODO return distance
assert_in_delta 1.0 / (1 + 0), scores[0]
assert_in_delta 1.0 / (1 + 5**2), scores[1]
end
Expand All @@ -82,13 +80,8 @@ def test_euclidean_exact
assert_order "*", ["A", "B"], knn: {field: :embedding, vector: [1, 2, 3], distance: "euclidean"}

scores = Product.search(knn: {field: :embedding, vector: [1, 2, 3], distance: "euclidean"}).hits.map { |v| v["_score"] }
# TODO return distance
assert_in_delta 1.0 / (1 + 0), scores[0]
if Searchkick.opensearch?
assert_in_delta 1.0 / (1 + 5**2), scores[1]
else
assert_in_delta 1.0 / (1 + 5), scores[1]
end
assert_in_delta 1.0 / (1 + 5**2), scores[1]
end

def test_unindexed
Expand All @@ -98,8 +91,7 @@ def test_unindexed
assert_order "*", ["A", "B"], knn: {field: :vector, vector: [1, 2, 3], distance: "cosine"}

scores = Product.search(knn: {field: :vector, vector: [1, 2, 3], distance: "cosine"}).hits.map { |v| v["_score"] }
# TODO match approximate
assert_in_delta 2, scores[0]
assert_in_delta 1, scores[0]
assert_in_delta 0, scores[1]

error = assert_raises(ArgumentError) do
Expand Down

0 comments on commit a355cff

Please sign in to comment.