diff --git a/test/collection/test_config.py b/test/collection/test_config.py index 892f9d0f0..011833eec 100644 --- a/test/collection/test_config.py +++ b/test/collection/test_config.py @@ -19,7 +19,6 @@ Multi2VecField, VectorDistances, ) - from weaviate.collections.classes.config_vectors import _VectorConfigCreate DEFAULTS = { @@ -2053,6 +2052,24 @@ def test_config_with_named_vectors( } }, ), + ( + [Configure.MultiVectors.multi2vec_weaviate(name="test", image_field="prop")], + { + "test": { + "vectorizer": { + "multi2multivec-weaviate": { + "imageFields": ["prop"], + } + }, + "vectorIndexConfig": { + "multivector": { + "enabled": True, + }, + }, + "vectorIndexType": "hnsw", + } + }, + ), ( [Configure.Vectors.text2vec_gpt4all(name="test", source_properties=["prop"])], { diff --git a/weaviate/collections/classes/config_vectorizers.py b/weaviate/collections/classes/config_vectorizers.py index 250ad5d26..f597f04b3 100644 --- a/weaviate/collections/classes/config_vectorizers.py +++ b/weaviate/collections/classes/config_vectorizers.py @@ -75,6 +75,7 @@ WeaviateModel: TypeAlias = Literal[ "Snowflake/snowflake-arctic-embed-l-v2.0", "Snowflake/snowflake-arctic-embed-m-v1.5" ] +WeaviateMultimodalModel: TypeAlias = Literal["ModernVBERT/colmodernvbert"] class Vectorizers(str, Enum): @@ -131,6 +132,7 @@ class Vectorizers(str, Enum): MULTI2VEC_COHERE = "multi2vec-cohere" MULTI2VEC_JINAAI = "multi2vec-jinaai" MULTI2MULTI_JINAAI = "multi2multivec-jinaai" + MULTI2MULTI_WEAVIATE = "multi2multivec-weaviate" MULTI2VEC_BIND = "multi2vec-bind" MULTI2VEC_PALM = "multi2vec-palm" # change to google once 1.27 is the lowest supported version MULTI2VEC_VOYAGEAI = "multi2vec-voyageai" @@ -507,6 +509,20 @@ def _to_dict(self) -> Dict[str, Any]: return ret_dict +class _Multi2MultiVecWeaviateConfig(_Multi2VecBase): + vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( + default=Vectorizers.MULTI2MULTI_WEAVIATE, frozen=True, exclude=True + ) + baseURL: Optional[AnyHttpUrl] + model: Optional[str] + + def _to_dict(self) -> Dict[str, Any]: + ret_dict = super()._to_dict() + if self.baseURL is not None: + ret_dict["baseURL"] = self.baseURL.unicode_string() + return ret_dict + + class _Multi2VecClipConfig(_Multi2VecBase): vectorizer: Union[Vectorizers, _EnumLikeStr] = Field( default=Vectorizers.MULTI2VEC_CLIP, frozen=True, exclude=True diff --git a/weaviate/collections/classes/config_vectors.py b/weaviate/collections/classes/config_vectors.py index d2b308b06..365dc4dd0 100644 --- a/weaviate/collections/classes/config_vectors.py +++ b/weaviate/collections/classes/config_vectors.py @@ -37,9 +37,11 @@ VoyageModel, VoyageMultimodalModel, WeaviateModel, + WeaviateMultimodalModel, _Img2VecNeuralConfig, _map_multi2vec_fields, _Multi2MultiVecJinaConfig, + _Multi2MultiVecWeaviateConfig, _Multi2VecAWSConfig, _Multi2VecBindConfig, _Multi2VecClipConfig, @@ -287,6 +289,43 @@ def multi2vec_jinaai( ), ) + @staticmethod + def multi2vec_weaviate( + *, + image_field: str, + name: Optional[str] = None, + encoding: Optional[_MultiVectorEncodingConfigCreate] = None, + quantizer: Optional[_QuantizerConfigCreate] = None, + base_url: Optional[AnyHttpUrl] = None, + model: Optional[Union[WeaviateMultimodalModel, str]] = None, + multi_vector_config: Optional[_MultiVectorConfigCreate] = None, + vector_index_config: Optional[_VectorIndexConfigCreate] = None, + ) -> _VectorConfigCreate: + """Create a vector using the `multi2multivec-weaviate` module. + + Args: + image_field: The image field to use in vectorization. + name: The name of the vector. + encoding: The type of multi-vector encoding to use in the vector index. Defaults to `None`, which uses the server-defined default. + quantizer: The quantizer to use for the vector index. If not provided, no quantization will be applied. + base_url: The base URL to use where API requests should go. Defaults to `None`, which uses the server-defined default. + model: The model to use. Defaults to `None`, which uses the server-defined default. + multi_vector_config: The configuration for the multi-vector index. Use `wvc.config.Configure.VectorIndex.MultiVector` to create a multi-vector configuration. None by default + vector_index_config: The configuration for Weaviate's vector index. Use `wvc.config.Configure.VectorIndex` to create a vector index configuration. None by default + """ + return _VectorConfigCreate( + name=name, + vectorizer=_Multi2MultiVecWeaviateConfig( + baseURL=base_url, + model=model, + imageFields=_map_multi2vec_fields([image_field]), + textFields=None, + ), + vector_index_config=_IndexWrappers.multi( + vector_index_config, quantizer, multi_vector_config, encoding + ), + ) + class _Vectors: @staticmethod