Conversation
Greptile SummaryThis PR adds comprehensive recall testing capabilities and optimizes existing DML/DQL tests. The changes include enhanced distance calculations for low-precision types (FP16/INT8), new helper functions for generating recall-specific test data, and a new test file with extensive recall validation logic. Critical Issues:
Positive Changes:
Confidence Score: 2/5
Important Files Changed
Last reviewed commit: ec8cde3 |
| print("dim,vec1,vec2:\n") | ||
| print(dim,vec1,vec2) |
There was a problem hiding this comment.
remove debug print statements before committing
| print("dim,vec1,vec2:\n") | |
| print(dim,vec1,vec2) | |
| # Process dimension intersection for sparse vectors |
|
|
||
| DEFAULT_VECTOR_DIMENSION = 128 | ||
|
|
||
| VECTOR_DIMENSION_1024 = 4 |
There was a problem hiding this comment.
VECTOR_DIMENSION_1024 = 4 appears incorrect - should this be 1024?
| VECTOR_DIMENSION_1024 = 4 | |
| VECTOR_DIMENSION_1024 = 1024 |
| VectorSchema( | ||
| v, | ||
| k, | ||
| dimension=DVECTOR_DIMENSION_1024, |
There was a problem hiding this comment.
DVECTOR_DIMENSION_1024 is undefined - should be VECTOR_DIMENSION_1024
| dimension=DVECTOR_DIMENSION_1024, | |
| dimension=VECTOR_DIMENSION_1024, |
| (True, True, IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, )), | ||
|
|
||
| (True, True, HnswIndexParam(metric_type=MetricType.COSINE, m=24, ef_construction=150, )), | ||
| (True, True, HnswIndexParam(metric_type=MetricType.L2, m=32, ef_construction=200, )), | ||
| (True, True, FlatIndexParam(metric_type=MetricType.COSINE, )), | ||
| (True, True, FlatIndexParam(metric_type=MetricType.L2, )), |
There was a problem hiding this comment.
tuples in this list will never match vector_index_param (which is just an IndexParam object, not a tuple). remove the tuple wrappers or fix the comparison logic
| (True, True, IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, )), | |
| (True, True, HnswIndexParam(metric_type=MetricType.COSINE, m=24, ef_construction=150, )), | |
| (True, True, HnswIndexParam(metric_type=MetricType.L2, m=32, ef_construction=200, )), | |
| (True, True, FlatIndexParam(metric_type=MetricType.COSINE, )), | |
| (True, True, FlatIndexParam(metric_type=MetricType.L2, )), | |
| IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, ), | |
| HnswIndexParam(metric_type=MetricType.COSINE, m=24, ef_construction=150, ), | |
| HnswIndexParam(metric_type=MetricType.L2, m=32, ef_construction=200, ), | |
| FlatIndexParam(metric_type=MetricType.COSINE, ), | |
| FlatIndexParam(metric_type=MetricType.L2, ), |
| doc_fields = {} | ||
| doc_vectors = {} | ||
| doc_fields, doc_vectors = generate_vectordict(i, schema) | ||
| doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors) | ||
| return doc | ||
|
|
||
|
|
||
| def generate_update_doc(i: int, schema: CollectionSchema) -> Doc: | ||
| doc_fields = {} | ||
| doc_vectors = {} |
There was a problem hiding this comment.
duplicate variable declarations - remove lines 110-111
| doc_fields = {} | |
| doc_vectors = {} | |
| doc_fields, doc_vectors = generate_vectordict(i, schema) | |
| doc = Doc(id=str(i), fields=doc_fields, vectors=doc_vectors) | |
| return doc | |
| def generate_update_doc(i: int, schema: CollectionSchema) -> Doc: | |
| doc_fields = {} | |
| doc_vectors = {} | |
| def generate_vectordict_recall(i: int, schema: CollectionSchema) -> Doc: | |
| doc_fields = {} | |
| doc_vectors = {} |
| doc_fields = {} | ||
| doc_vectors = {} | ||
| doc_fields = {} | ||
| doc_vectors = {} |
There was a problem hiding this comment.
duplicate variable declarations - remove lines 173-174
| doc_fields = {} | |
| doc_vectors = {} | |
| doc_fields = {} | |
| doc_vectors = {} | |
| def generate_vectordict_update(i: int, schema: CollectionSchema) -> Doc: | |
| doc_fields = {} | |
| doc_vectors = {} |
| ) | ||
| assert hasattr(found_doc, "score") | ||
| assert found_doc.score >= 0.0 | ||
| #assert found_doc.score >= 0.0 |
There was a problem hiding this comment.
verify this assertion should be removed or if it's hiding a real issue that needs fixing
|
|
||
| from zvec.model.schema import FieldSchema, VectorSchema | ||
| from zvec.extension import RrfReRanker, WeightedReRanker, QwenReRanker | ||
| from distance_helper import * |
| from distance_helper import * | ||
|
|
||
| from zvec import StatusCode | ||
| from distance_helper import * |
|
|
||
| from zvec import StatusCode | ||
| from distance_helper import * | ||
| from fixture_helper import * |
| from zvec import StatusCode | ||
| from distance_helper import * | ||
| from fixture_helper import * | ||
| from doc_helper import * |
| from distance_helper import * | ||
| from fixture_helper import * | ||
| from doc_helper import * | ||
| from params_helper import * |
|
|
||
| import pytest | ||
|
|
||
| from zvec.typing import DataType, StatusCode, MetricType, QuantizeType |
| import pytest | ||
|
|
||
| from zvec.typing import DataType, StatusCode, MetricType, QuantizeType | ||
| from zvec.model import Collection, Doc, VectorQuery |
| from zvec.model.param import ( | ||
| CollectionOption, | ||
| InvertIndexParam, | ||
| HnswIndexParam, | ||
| FlatIndexParam, | ||
| IVFIndexParam, | ||
| HnswQueryParam, | ||
| IVFQueryParam, | ||
| ) |
| IVFQueryParam, | ||
| ) | ||
|
|
||
| from zvec.model.schema import FieldSchema, VectorSchema |
Add recall-related test cases and optimize historical DML/DQL test cases.