diff --git a/packages/rs-dpp/src/data_contract/document_type/class_methods/try_from_schema/v0/mod.rs b/packages/rs-dpp/src/data_contract/document_type/class_methods/try_from_schema/v0/mod.rs index ac5c6258cc1..878416720ad 100644 --- a/packages/rs-dpp/src/data_contract/document_type/class_methods/try_from_schema/v0/mod.rs +++ b/packages/rs-dpp/src/data_contract/document_type/class_methods/try_from_schema/v0/mod.rs @@ -8,6 +8,8 @@ use crate::consensus::basic::data_contract::{ use crate::consensus::ConsensusError; use crate::data_contract::document_type::index::Index; use crate::data_contract::document_type::index_level::IndexLevel; +#[cfg(feature = "validation")] +use crate::data_contract::document_type::property::ArrayItemType; use crate::data_contract::document_type::property::DocumentProperty; #[cfg(feature = "validation")] use crate::data_contract::document_type::property::DocumentPropertyType; @@ -410,7 +412,10 @@ impl DocumentTypeV0 { } // Validate indexed properties - index.properties.iter().try_for_each(|index_property| { + // Track array properties for validation + let mut array_property_position: Option = None; + + index.properties.iter().enumerate().try_for_each(|(position, index_property)| { // Do not allow to index already indexed system properties if NOT_ALLOWED_SYSTEM_PROPERTIES .contains(&index_property.name.as_str()) @@ -449,9 +454,92 @@ impl DocumentTypeV0 { // Validate indexed property type match &property_definition.property_type { - // Array and objects aren't supported for indexing yet - DocumentPropertyType::Array(_) - | DocumentPropertyType::Object(_) + // Arrays with indexable scalar element types are supported + DocumentPropertyType::Array(array_item_type) => { + // Validate that array item type is indexable + match array_item_type { + // String arrays must have bounded element length + ArrayItemType::String(_, max_len) => { + if max_len.is_none() + || max_len.unwrap() as u16 + > MAX_INDEXED_STRING_PROPERTY_LENGTH + { + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexedPropertyConstraintError::new( + name.to_owned(), + index.name.to_owned(), + index_property.name.to_owned(), + "maxLength".to_string(), + format!( + "array string items should have maxLength less or equal {}", + MAX_INDEXED_STRING_PROPERTY_LENGTH + ), + ) + .into(), + ))); + } + } + // ByteArray elements must have bounded size + ArrayItemType::ByteArray(_, max_size) => { + if max_size.is_none() + || max_size.unwrap() as u16 + > MAX_INDEXED_BYTE_ARRAY_PROPERTY_LENGTH + { + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexedPropertyConstraintError::new( + name.to_owned(), + index.name.to_owned(), + index_property.name.to_owned(), + "maxItems".to_string(), + format!( + "array byteArray items should have maxItems less or equal {}", + MAX_INDEXED_BYTE_ARRAY_PROPERTY_LENGTH + ), + ) + .into(), + ))); + } + } + // These scalar types are allowed + ArrayItemType::Integer + | ArrayItemType::Number + | ArrayItemType::Identifier + | ArrayItemType::Date => {} + // Boolean arrays don't make sense for indexing + ArrayItemType::Boolean => { + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexPropertyTypeError::new( + name.to_owned(), + index.name.to_owned(), + index_property.name.to_owned(), + "array of boolean".to_string(), + ) + .into(), + ))); + } + } + + // Check if we already have an array property in this index + if array_property_position.is_some() { + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexedPropertyConstraintError::new( + name.to_owned(), + index.name.to_owned(), + index_property.name.to_owned(), + "array".to_string(), + "only one array property is allowed per index".to_string(), + ) + .into(), + ))); + } + + // Record the position of the array property + array_property_position = Some(position); + + Ok(()) + } + // Objects and variable type arrays aren't supported for indexing + DocumentPropertyType::Object(_) | DocumentPropertyType::VariableTypeArray(_) => { Err(ProtocolError::ConsensusError(Box::new( InvalidIndexPropertyTypeError::new( @@ -509,6 +597,23 @@ impl DocumentTypeV0 { Ok(()) } })?; + + // If there's an array property, it must be at the last position + if let Some(pos) = array_property_position { + if pos != index.properties.len() - 1 { + let array_prop_name = &index.properties[pos].name; + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexedPropertyConstraintError::new( + name.to_owned(), + index.name.to_owned(), + array_prop_name.to_owned(), + "position".to_string(), + "array property must be the last property in a compound index".to_string(), + ) + .into(), + ))); + } + } } Ok((index.name.clone(), index)) diff --git a/packages/rs-dpp/src/data_contract/document_type/class_methods/try_from_schema/v1/mod.rs b/packages/rs-dpp/src/data_contract/document_type/class_methods/try_from_schema/v1/mod.rs index b07517e04a1..adf4c135856 100644 --- a/packages/rs-dpp/src/data_contract/document_type/class_methods/try_from_schema/v1/mod.rs +++ b/packages/rs-dpp/src/data_contract/document_type/class_methods/try_from_schema/v1/mod.rs @@ -8,6 +8,8 @@ use crate::consensus::basic::data_contract::{ use crate::consensus::ConsensusError; use crate::data_contract::document_type::index::Index; use crate::data_contract::document_type::index_level::IndexLevel; +#[cfg(feature = "validation")] +use crate::data_contract::document_type::property::ArrayItemType; use crate::data_contract::document_type::property::DocumentProperty; #[cfg(feature = "validation")] use crate::data_contract::document_type::property::DocumentPropertyType; @@ -427,7 +429,10 @@ impl DocumentTypeV1 { } // Validate indexed properties - index.properties.iter().try_for_each(|index_property| { + // Track array properties for validation + let mut array_property_position: Option = None; + + index.properties.iter().enumerate().try_for_each(|(position, index_property)| { // Do not allow to index already indexed system properties if NOT_ALLOWED_SYSTEM_PROPERTIES .contains(&index_property.name.as_str()) @@ -466,9 +471,92 @@ impl DocumentTypeV1 { // Validate indexed property type match &property_definition.property_type { - // Array and objects aren't supported for indexing yet - DocumentPropertyType::Array(_) - | DocumentPropertyType::Object(_) + // Arrays with indexable scalar element types are supported + DocumentPropertyType::Array(array_item_type) => { + // Validate that array item type is indexable + match array_item_type { + // String arrays must have bounded element length + ArrayItemType::String(_, max_len) => { + if max_len.is_none() + || max_len.unwrap() as u16 + > MAX_INDEXED_STRING_PROPERTY_LENGTH + { + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexedPropertyConstraintError::new( + name.to_owned(), + index.name.to_owned(), + index_property.name.to_owned(), + "maxLength".to_string(), + format!( + "array string items should have maxLength less or equal {}", + MAX_INDEXED_STRING_PROPERTY_LENGTH + ), + ) + .into(), + ))); + } + } + // ByteArray elements must have bounded size + ArrayItemType::ByteArray(_, max_size) => { + if max_size.is_none() + || max_size.unwrap() as u16 + > MAX_INDEXED_BYTE_ARRAY_PROPERTY_LENGTH + { + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexedPropertyConstraintError::new( + name.to_owned(), + index.name.to_owned(), + index_property.name.to_owned(), + "maxItems".to_string(), + format!( + "array byteArray items should have maxItems less or equal {}", + MAX_INDEXED_BYTE_ARRAY_PROPERTY_LENGTH + ), + ) + .into(), + ))); + } + } + // These scalar types are allowed + ArrayItemType::Integer + | ArrayItemType::Number + | ArrayItemType::Identifier + | ArrayItemType::Date => {} + // Boolean arrays don't make sense for indexing + ArrayItemType::Boolean => { + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexPropertyTypeError::new( + name.to_owned(), + index.name.to_owned(), + index_property.name.to_owned(), + "array of boolean".to_string(), + ) + .into(), + ))); + } + } + + // Check if we already have an array property in this index + if array_property_position.is_some() { + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexedPropertyConstraintError::new( + name.to_owned(), + index.name.to_owned(), + index_property.name.to_owned(), + "array".to_string(), + "only one array property is allowed per index".to_string(), + ) + .into(), + ))); + } + + // Record the position of the array property + array_property_position = Some(position); + + Ok(()) + } + // Objects and variable type arrays aren't supported for indexing + DocumentPropertyType::Object(_) | DocumentPropertyType::VariableTypeArray(_) => { Err(ProtocolError::ConsensusError(Box::new( InvalidIndexPropertyTypeError::new( @@ -526,6 +614,23 @@ impl DocumentTypeV1 { Ok(()) } })?; + + // If there's an array property, it must be at the last position + if let Some(pos) = array_property_position { + if pos != index.properties.len() - 1 { + let array_prop_name = &index.properties[pos].name; + return Err(ProtocolError::ConsensusError(Box::new( + InvalidIndexedPropertyConstraintError::new( + name.to_owned(), + index.name.to_owned(), + array_prop_name.to_owned(), + "position".to_string(), + "array property must be the last property in a compound index".to_string(), + ) + .into(), + ))); + } + } } Ok((index.name.clone(), index)) diff --git a/packages/rs-dpp/src/data_contract/document_type/mod.rs b/packages/rs-dpp/src/data_contract/document_type/mod.rs index aadb6eedbfc..c60cb6c32d3 100644 --- a/packages/rs-dpp/src/data_contract/document_type/mod.rs +++ b/packages/rs-dpp/src/data_contract/document_type/mod.rs @@ -66,6 +66,7 @@ pub(crate) mod property_names { pub const MINIMUM: &str = "minimum"; pub const ENUM: &str = "enum"; pub const MAXIMUM: &str = "maximum"; + pub const ITEMS: &str = "items"; pub const MIN_ITEMS: &str = "minItems"; pub const MAX_ITEMS: &str = "maxItems"; pub const MIN_LENGTH: &str = "minLength"; diff --git a/packages/rs-dpp/src/data_contract/document_type/property/array.rs b/packages/rs-dpp/src/data_contract/document_type/property/array.rs index 7768044f2d0..47df76c8187 100644 --- a/packages/rs-dpp/src/data_contract/document_type/property/array.rs +++ b/packages/rs-dpp/src/data_contract/document_type/property/array.rs @@ -1,8 +1,18 @@ +use crate::data_contract::document_type::DocumentPropertyType; use crate::data_contract::errors::DataContractError; use crate::ProtocolError; -use integer_encoding::VarInt; +use integer_encoding::{VarInt, VarIntReader}; use platform_value::Value; use serde::{Deserialize, Serialize}; +use std::io::{BufRead, Read}; + +/// Maximum string length allowed during deserialization of array elements. +/// This prevents DoS attacks via huge length values in corrupted/malicious data. +pub const MAX_STRING_LENGTH_FOR_DESERIALIZATION: usize = 65536; // 64 KB + +/// Maximum byte array length allowed during deserialization of array elements. +/// This prevents DoS attacks via huge length values in corrupted/malicious data. +pub const MAX_BYTE_ARRAY_LENGTH_FOR_DESERIALIZATION: usize = 65536; // 64 KB #[derive(Serialize, Deserialize, Debug, PartialEq, Eq, Clone)] pub enum ArrayItemType { @@ -251,6 +261,187 @@ impl ArrayItemType { } } } + + /// Encodes an array element value for use as an index tree key. + /// This encoding is compatible with the scalar type encoding used for tree keys. + /// Unlike `encode_value_ref_with_size`, this does NOT prepend a length prefix, + /// making it suitable for index key comparisons and tree traversal. + pub fn encode_element_for_tree_keys(&self, value: &Value) -> Result, ProtocolError> { + if value.is_null() { + return Ok(vec![]); + } + match self { + ArrayItemType::String(_, _) => { + let value_as_text = value.as_text().ok_or_else(get_field_type_matching_error)?; + let vec = value_as_text.as_bytes().to_vec(); + if vec.is_empty() { + // we don't want to collide with the definition of an empty string + Ok(vec![0]) + } else { + Ok(vec) + } + } + ArrayItemType::Date => { + let value_as_i64: i64 = value.to_integer().map_err(ProtocolError::ValueError)?; + if value_as_i64 < 0 { + return Err(ProtocolError::DataContractError( + DataContractError::ValueWrongType( + "date timestamp cannot be negative".to_string(), + ), + )); + } + // Use the same encoding as DocumentPropertyType::encode_date_timestamp + // which uses encode_u64 with sign-bit flip for proper lexicographic ordering + Ok(DocumentPropertyType::encode_date_timestamp( + value_as_i64 as u64, + )) + } + ArrayItemType::Integer => { + let value_as_i64: i64 = value.to_integer().map_err(ProtocolError::ValueError)?; + // Use encode_i64 which flips sign bit for proper lexicographic ordering + Ok(DocumentPropertyType::encode_i64(value_as_i64)) + } + ArrayItemType::Number => { + let value_as_f64 = value.to_float().map_err(ProtocolError::ValueError)?; + // Use encode_float which handles sign bit and negative value ordering + Ok(DocumentPropertyType::encode_float(value_as_f64)) + } + ArrayItemType::ByteArray(_, _) => { + let bytes = value.to_binary_bytes()?; + if bytes.is_empty() { + // we don't want to collide with the definition of null + Ok(vec![0]) + } else { + Ok(bytes) + } + } + ArrayItemType::Identifier => { + let bytes = value.to_identifier_bytes()?; + Ok(bytes) + } + ArrayItemType::Boolean => { + let value_as_boolean = value.as_bool().ok_or_else(get_field_type_matching_error)?; + if value_as_boolean { + Ok(vec![1]) + } else { + Ok(vec![0]) + } + } + } + } + + /// Reads a single array element value from a buffer. + /// This is the inverse of `encode_value_ref_with_size`. + pub fn read_from(&self, buf: &mut R) -> Result { + match self { + ArrayItemType::String(_, _) => { + let string_len: usize = buf.read_varint().map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading varint for string length in array".to_string(), + ) + })?; + // Validate string length to prevent DoS via huge allocations + if string_len > MAX_STRING_LENGTH_FOR_DESERIALIZATION { + return Err(DataContractError::CorruptedSerialization(format!( + "string length {} exceeds maximum allowed {}", + string_len, MAX_STRING_LENGTH_FOR_DESERIALIZATION + ))); + } + let mut string_bytes = vec![0u8; string_len]; + buf.read_exact(&mut string_bytes).map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading string bytes in array".to_string(), + ) + })?; + let string_value = String::from_utf8(string_bytes).map_err(|_| { + DataContractError::CorruptedSerialization( + "invalid UTF-8 in array string".to_string(), + ) + })?; + Ok(Value::Text(string_value)) + } + ArrayItemType::Date => { + let mut date_bytes = [0u8; 8]; + buf.read_exact(&mut date_bytes).map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading date bytes in array".to_string(), + ) + })?; + let date_value = f64::from_be_bytes(date_bytes); + Ok(Value::Float(date_value)) + } + ArrayItemType::Integer => { + let mut int_bytes = [0u8; 8]; + buf.read_exact(&mut int_bytes).map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading integer bytes in array".to_string(), + ) + })?; + let int_value = i64::from_be_bytes(int_bytes); + Ok(Value::I64(int_value)) + } + ArrayItemType::Number => { + let mut num_bytes = [0u8; 8]; + buf.read_exact(&mut num_bytes).map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading number bytes in array".to_string(), + ) + })?; + let num_value = f64::from_be_bytes(num_bytes); + Ok(Value::Float(num_value)) + } + ArrayItemType::ByteArray(_, _) => { + let bytes_len: usize = buf.read_varint().map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading varint for byte array length in array".to_string(), + ) + })?; + // Validate byte array length to prevent DoS via huge allocations + if bytes_len > MAX_BYTE_ARRAY_LENGTH_FOR_DESERIALIZATION { + return Err(DataContractError::CorruptedSerialization(format!( + "byte array length {} exceeds maximum allowed {}", + bytes_len, MAX_BYTE_ARRAY_LENGTH_FOR_DESERIALIZATION + ))); + } + let mut bytes = vec![0u8; bytes_len]; + buf.read_exact(&mut bytes).map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading byte array bytes in array".to_string(), + ) + })?; + Ok(Value::Bytes(bytes)) + } + ArrayItemType::Identifier => { + let id_len: usize = buf.read_varint().map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading varint for identifier length in array".to_string(), + ) + })?; + if id_len != 32 { + return Err(DataContractError::CorruptedSerialization(format!( + "expected 32 bytes for identifier in array, got {}", + id_len + ))); + } + let mut id_bytes = [0u8; 32]; + buf.read_exact(&mut id_bytes).map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading identifier bytes in array".to_string(), + ) + })?; + Ok(Value::Identifier(id_bytes)) + } + ArrayItemType::Boolean => { + let mut bool_byte = [0u8; 1]; + buf.read_exact(&mut bool_byte).map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading boolean byte in array".to_string(), + ) + })?; + Ok(Value::Bool(bool_byte[0] != 0)) + } + } + } } fn get_field_type_matching_error() -> ProtocolError { diff --git a/packages/rs-dpp/src/data_contract/document_type/property/mod.rs b/packages/rs-dpp/src/data_contract/document_type/property/mod.rs index d295ee7b385..d7338fac7e2 100644 --- a/packages/rs-dpp/src/data_contract/document_type/property/mod.rs +++ b/packages/rs-dpp/src/data_contract/document_type/property/mod.rs @@ -11,7 +11,7 @@ use crate::data_contract::config::DataContractConfig; use crate::data_contract::document_type::property_names; use crate::prelude::TimestampMillis; use crate::ProtocolError; -use array::ArrayItemType; +pub use array::ArrayItemType; use byteorder::{BigEndian, ReadBytesExt, WriteBytesExt}; use indexmap::IndexMap; use integer_encoding::{VarInt, VarIntReader}; @@ -26,6 +26,11 @@ use serde::Serialize; pub mod array; +/// Maximum number of array items allowed during deserialization. +/// This prevents DoS attacks via huge array lengths in corrupted/malicious data. +/// Indexed arrays in contracts must have maxItems ≤ 255, so 256 provides a safe upper bound. +pub const MAX_ARRAY_ITEMS_FOR_DESERIALIZATION: usize = 256; + // This struct will be changed in future to support more validation logic and serialization // It will become versioned and it will be introduced by a new document type version // @append_only @@ -754,11 +759,33 @@ impl DocumentPropertyType { Ok((Some(Value::Map(values)), false)) } } - DocumentPropertyType::Array(_array_field_type) => Err(DataContractError::Unsupported( - "serialization of arrays not yet supported".to_string(), - )), + DocumentPropertyType::Array(array_item_type) => { + // Read the array length + let array_len: usize = buf.read_varint().map_err(|_| { + DataContractError::CorruptedSerialization( + "error reading varint for array length".to_string(), + ) + })?; + + // Validate array length to prevent DoS via huge allocations + if array_len > MAX_ARRAY_ITEMS_FOR_DESERIALIZATION { + return Err(DataContractError::CorruptedSerialization(format!( + "array length {} exceeds maximum allowed {}", + array_len, MAX_ARRAY_ITEMS_FOR_DESERIALIZATION + ))); + } + + // Read each element + let mut elements = Vec::with_capacity(array_len); + for _ in 0..array_len { + let element = array_item_type.read_from(buf)?; + elements.push(element); + } + + Ok((Some(Value::Array(elements)), false)) + } DocumentPropertyType::VariableTypeArray(_) => Err(DataContractError::Unsupported( - "serialization of variable type arrays not yet supported".to_string(), + "deserialization of variable type arrays not yet supported".to_string(), )), } } @@ -1224,13 +1251,16 @@ impl DocumentPropertyType { "we should never try encoding an object".to_string(), ), )), - DocumentPropertyType::Array(_) | DocumentPropertyType::VariableTypeArray(_) => { - Err(ProtocolError::DataContractError( - DataContractError::EncodingDataStructureNotSupported( - "we should never try encoding an array".to_string(), - ), - )) + DocumentPropertyType::Array(item_type) => { + // For Contains queries, we encode a single element (not the whole array) + // using the array's item type + item_type.encode_element_for_tree_keys(value) } + DocumentPropertyType::VariableTypeArray(_) => Err(ProtocolError::DataContractError( + DataContractError::EncodingDataStructureNotSupported( + "we should never try encoding a variable type array".to_string(), + ), + )), } } @@ -2323,28 +2353,81 @@ impl DocumentPropertyType { max_length: value_map.get_optional_integer(property_names::MAX_LENGTH)?, }), "array" => { - // Only handling bytearrays for v1 - // Return an error if it is not a byte array - let Some(is_byte_array) = - value_map.get_optional_bool(property_names::BYTE_ARRAY)? - else { - return Err(DataContractError::InvalidContractStructure( - "only byte arrays are supported now".to_string(), - )); - }; + // Check if this is an array with items (indexed array type) + let items_map: Option> = + value_map.get_optional_str_value_map(property_names::ITEMS)?; + + if let Some(items_map) = items_map { + // This is an array with typed items (like string[], integer[], etc.) + let item_type_str = items_map.get_str(property_names::TYPE)?; + let array_item_type = match item_type_str { + "string" => { + let min_length = + items_map.get_optional_integer(property_names::MIN_LENGTH)?; + let max_length = + items_map.get_optional_integer(property_names::MAX_LENGTH)?; + ArrayItemType::String(min_length, max_length) + } + "integer" => ArrayItemType::Integer, + "number" => ArrayItemType::Number, + "boolean" => ArrayItemType::Boolean, + "date" => ArrayItemType::Date, + "array" => { + // Nested byte array + let is_byte_array = + items_map.get_optional_bool(property_names::BYTE_ARRAY)?; + if is_byte_array == Some(true) { + let min_size = + items_map.get_optional_integer(property_names::MIN_ITEMS)?; + let max_size = + items_map.get_optional_integer(property_names::MAX_ITEMS)?; + match items_map + .get_optional_str(property_names::CONTENT_MEDIA_TYPE)? + { + Some("application/x.dash.dpp.identifier") => { + ArrayItemType::Identifier + } + Some(_) | None => ArrayItemType::ByteArray(min_size, max_size), + } + } else { + return Err(DataContractError::InvalidContractStructure( + "nested arrays must be byte arrays".to_string(), + )); + } + } + _ => { + return Err(DataContractError::InvalidContractStructure(format!( + "unsupported array item type: {}", + item_type_str + ))); + } + }; + DocumentPropertyType::Array(array_item_type) + } else { + // Fallback to byte array handling + let Some(is_byte_array) = + value_map.get_optional_bool(property_names::BYTE_ARRAY)? + else { + return Err(DataContractError::InvalidContractStructure( + "array must have either 'items' or 'byteArray' defined".to_string(), + )); + }; - if !is_byte_array { - return Err(DataContractError::InvalidContractStructure( - "byteArray should always be true if defined".to_string(), - )); - } + if !is_byte_array { + return Err(DataContractError::InvalidContractStructure( + "byteArray should always be true if defined".to_string(), + )); + } - match value_map.get_optional_str(property_names::CONTENT_MEDIA_TYPE)? { - Some("application/x.dash.dpp.identifier") => DocumentPropertyType::Identifier, - Some(_) | None => DocumentPropertyType::ByteArray(ByteArrayPropertySizes { - min_size: value_map.get_optional_integer(property_names::MIN_ITEMS)?, - max_size: value_map.get_optional_integer(property_names::MAX_ITEMS)?, - }), + match value_map.get_optional_str(property_names::CONTENT_MEDIA_TYPE)? { + Some("application/x.dash.dpp.identifier") => { + DocumentPropertyType::Identifier + } + Some(_) | None => DocumentPropertyType::ByteArray(ByteArrayPropertySizes { + min_size: value_map.get_optional_integer(property_names::MIN_ITEMS)?, + max_size: value_map.get_optional_integer(property_names::MAX_ITEMS)?, + }), + } } } "object" => Self::Object(Default::default()), diff --git a/packages/rs-dpp/src/document/document_methods/get_raw_array_elements_for_document_type/mod.rs b/packages/rs-dpp/src/document/document_methods/get_raw_array_elements_for_document_type/mod.rs new file mode 100644 index 00000000000..d603a0e861c --- /dev/null +++ b/packages/rs-dpp/src/document/document_methods/get_raw_array_elements_for_document_type/mod.rs @@ -0,0 +1,3 @@ +mod v0; + +pub(in crate::document) use v0::DocumentGetRawArrayElementsForDocumentTypeV0; diff --git a/packages/rs-dpp/src/document/document_methods/get_raw_array_elements_for_document_type/v0/mod.rs b/packages/rs-dpp/src/document/document_methods/get_raw_array_elements_for_document_type/v0/mod.rs new file mode 100644 index 00000000000..ed21e29d7f1 --- /dev/null +++ b/packages/rs-dpp/src/document/document_methods/get_raw_array_elements_for_document_type/v0/mod.rs @@ -0,0 +1,71 @@ +use crate::data_contract::document_type::accessors::DocumentTypeV0Getters; +use crate::data_contract::document_type::DocumentPropertyType; +use crate::data_contract::document_type::DocumentTypeRef; +use crate::document::DocumentV0Getters; +use crate::version::PlatformVersion; +use crate::ProtocolError; +use platform_value::btreemap_extensions::BTreeValueMapPathHelper; +use platform_value::Value; +use std::collections::HashSet; + +pub trait DocumentGetRawArrayElementsForDocumentTypeV0: DocumentV0Getters { + /// Return array element values for an indexed array property. + /// Each element is encoded for use as an index tree key. + /// Returns an empty Vec if the field is not an array, is missing, or is empty. + /// Duplicate elements are deduplicated. + fn get_raw_array_elements_for_document_type_v0( + &self, + key_path: &str, + document_type: DocumentTypeRef, + _platform_version: &PlatformVersion, + ) -> Result>, ProtocolError> { + // Get the property definition to determine the array item type + let property = document_type.flattened_properties().get(key_path); + + let array_item_type = match property { + Some(prop) => match &prop.property_type { + DocumentPropertyType::Array(item_type) => item_type, + _ => { + // Not an array property - return empty + return Ok(vec![]); + } + }, + None => { + // Property not found - return empty + return Ok(vec![]); + } + }; + + // Get the array value from the document + let array_value = self.properties().get_optional_at_path(key_path)?; + + match array_value { + None => Ok(vec![]), // Field not present + Some(Value::Array(elements)) => { + // Encode each element and deduplicate + let mut seen: HashSet> = HashSet::new(); + let mut result: Vec> = Vec::with_capacity(elements.len()); + + for element in elements { + // Skip null elements + if element.is_null() { + continue; + } + + let encoded = array_item_type.encode_element_for_tree_keys(element)?; + + // Deduplicate + if seen.insert(encoded.clone()) { + result.push(encoded); + } + } + + Ok(result) + } + Some(_) => { + // Field exists but is not an array - this shouldn't happen if schema is valid + Ok(vec![]) + } + } + } +} diff --git a/packages/rs-dpp/src/document/document_methods/mod.rs b/packages/rs-dpp/src/document/document_methods/mod.rs index 6b3f779dde3..977e42e6686 100644 --- a/packages/rs-dpp/src/document/document_methods/mod.rs +++ b/packages/rs-dpp/src/document/document_methods/mod.rs @@ -3,11 +3,13 @@ use crate::data_contract::DataContract; use crate::version::PlatformVersion; use crate::ProtocolError; +mod get_raw_array_elements_for_document_type; mod get_raw_for_contract; mod get_raw_for_document_type; mod hash; mod is_equal_ignoring_timestamps; +pub(in crate::document) use get_raw_array_elements_for_document_type::*; pub(in crate::document) use get_raw_for_contract::*; pub(in crate::document) use get_raw_for_document_type::*; pub(in crate::document) use hash::*; @@ -33,6 +35,16 @@ pub trait DocumentMethodsV0 { platform_version: &PlatformVersion, ) -> Result>, ProtocolError>; + /// Return array element values for an indexed array property. + /// Each element is encoded for use as an index tree key. + /// Returns an empty Vec if the field is not an array, is missing, or is empty. + fn get_raw_array_elements_for_document_type( + &self, + key_path: &str, + document_type: DocumentTypeRef, + platform_version: &PlatformVersion, + ) -> Result>, ProtocolError>; + fn hash( &self, contract: &DataContract, diff --git a/packages/rs-dpp/src/document/mod.rs b/packages/rs-dpp/src/document/mod.rs index 4e2155c5e76..631bd403643 100644 --- a/packages/rs-dpp/src/document/mod.rs +++ b/packages/rs-dpp/src/document/mod.rs @@ -34,8 +34,9 @@ pub const INITIAL_REVISION: u64 = 1; use crate::data_contract::document_type::DocumentTypeRef; use crate::data_contract::DataContract; use crate::document::document_methods::{ - DocumentGetRawForContractV0, DocumentGetRawForDocumentTypeV0, DocumentHashV0Method, - DocumentIsEqualIgnoringTimestampsV0, DocumentMethodsV0, + DocumentGetRawArrayElementsForDocumentTypeV0, DocumentGetRawForContractV0, + DocumentGetRawForDocumentTypeV0, DocumentHashV0Method, DocumentIsEqualIgnoringTimestampsV0, + DocumentMethodsV0, }; use crate::document::errors::DocumentError; use crate::version::PlatformVersion; @@ -143,6 +144,36 @@ impl DocumentMethodsV0 for Document { } } + fn get_raw_array_elements_for_document_type( + &self, + key_path: &str, + document_type: DocumentTypeRef, + platform_version: &PlatformVersion, + ) -> Result>, ProtocolError> { + match self { + Document::V0(document_v0) => { + match platform_version + .dpp + .document_versions + .document_method_versions + .get_raw_array_elements_for_document_type + { + 0 => document_v0.get_raw_array_elements_for_document_type_v0( + key_path, + document_type, + platform_version, + ), + version => Err(ProtocolError::UnknownVersionMismatch { + method: "DocumentMethodV0::get_raw_array_elements_for_document_type" + .to_string(), + known_versions: vec![0], + received: version, + }), + } + } + } + } + fn hash( &self, contract: &DataContract, diff --git a/packages/rs-dpp/src/document/v0/mod.rs b/packages/rs-dpp/src/document/v0/mod.rs index 2bb65198182..0b14a7a43e0 100644 --- a/packages/rs-dpp/src/document/v0/mod.rs +++ b/packages/rs-dpp/src/document/v0/mod.rs @@ -19,8 +19,8 @@ use std::fmt; use platform_value::Value; use crate::document::document_methods::{ - DocumentGetRawForContractV0, DocumentGetRawForDocumentTypeV0, DocumentHashV0Method, - DocumentIsEqualIgnoringTimestampsV0, + DocumentGetRawArrayElementsForDocumentTypeV0, DocumentGetRawForContractV0, + DocumentGetRawForDocumentTypeV0, DocumentHashV0Method, DocumentIsEqualIgnoringTimestampsV0, }; use crate::identity::TimestampMillis; @@ -177,6 +177,10 @@ impl DocumentGetRawForDocumentTypeV0 for DocumentV0 { //automatically done } +impl DocumentGetRawArrayElementsForDocumentTypeV0 for DocumentV0 { + //automatically done +} + impl DocumentHashV0Method for DocumentV0 { //automatically done } diff --git a/packages/rs-drive/src/drive/document/delete/remove_indices_for_index_level_for_contract_operations/v0/mod.rs b/packages/rs-drive/src/drive/document/delete/remove_indices_for_index_level_for_contract_operations/v0/mod.rs index f92a34005d3..881f68155a0 100644 --- a/packages/rs-drive/src/drive/document/delete/remove_indices_for_index_level_for_contract_operations/v0/mod.rs +++ b/packages/rs-drive/src/drive/document/delete/remove_indices_for_index_level_for_contract_operations/v0/mod.rs @@ -4,6 +4,8 @@ use grovedb::EstimatedLayerCount::{ApproximateElements, PotentiallyAtMaxElements use grovedb::EstimatedLayerSizes::AllSubtrees; use grovedb::{EstimatedLayerInformation, TransactionArg, TreeType}; +use dpp::data_contract::document_type::accessors::DocumentTypeV0Getters; +use dpp::data_contract::document_type::DocumentPropertyType; use dpp::data_contract::document_type::IndexLevel; use grovedb::EstimatedSumTrees::NoSumTrees; @@ -13,7 +15,7 @@ use crate::util::type_constants::DEFAULT_HASH_SIZE_U8; use crate::util::storage_flags::StorageFlags; -use crate::util::object_size_info::DriveKeyInfo::KeyRef; +use crate::util::object_size_info::DriveKeyInfo::{Key, KeyRef}; use crate::drive::Drive; use crate::util::object_size_info::{DocumentAndContractInfo, DocumentInfoV0Methods, PathInfo}; @@ -87,17 +89,17 @@ impl Drive { let mut sub_level_index_path_info = index_path_info.clone(); let index_property_key = KeyRef(name.as_bytes()); - let document_index_field = document_and_contract_info - .owned_document_info - .document_info - .get_raw_for_document_type( - name, - document_type, - document_and_contract_info.owned_document_info.owner_id, - Some((sub_level, event_id)), - platform_version, - )? - .unwrap_or_default(); + // Check if this property is an array type + let is_array_property = document_type + .flattened_properties() + .get(name) + .map(|prop| { + matches!( + prop.property_type, + DocumentPropertyType::Array(_) | DocumentPropertyType::VariableTypeArray(_) + ) + }) + .unwrap_or(false); sub_level_index_path_info.push(index_property_key)?; @@ -128,30 +130,82 @@ impl Drive { ); } - // Iteration 1. the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId//toUserId - // Iteration 2. the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId//toUserId//accountReference - - any_fields_null |= document_index_field.is_empty(); - all_fields_null &= document_index_field.is_empty(); + if is_array_property { + // Handle array property - remove index entries for each element + let array_elements = document_and_contract_info + .owned_document_info + .document_info + .get_raw_array_elements_for_document_type( + name, + document_type, + platform_version, + )?; + + if array_elements.is_empty() { + // Empty array has zero elements to remove, so nothing to do. + // This differs from null scalars which have an entry with an empty key. + // No recursion needed since there are no elements to pair with sub-level fields. + continue; + } - // we push the actual value of the index path - sub_level_index_path_info.push(document_index_field)?; - // Iteration 1. the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId//toUserId// - // Iteration 2. the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId//toUserId//accountReference/ - self.remove_indices_for_index_level_for_contract_operations_v0( - document_and_contract_info, - sub_level_index_path_info, - sub_level, - any_fields_null, - all_fields_null, - storage_flags, - previous_batch_operations, - estimated_costs_only_with_layer_info, - event_id, - transaction, - batch_operations, - platform_version, - )?; + // For each array element, remove the index entry + for element_value in array_elements { + let element_key = Key(element_value); + let mut element_path_info = sub_level_index_path_info.clone(); + + // Push element value to path and recurse + element_path_info.push(element_key)?; + + self.remove_indices_for_index_level_for_contract_operations_v0( + document_and_contract_info, + element_path_info, + sub_level, + any_fields_null, + false, // Not all fields null since we have an element + storage_flags, + previous_batch_operations, + estimated_costs_only_with_layer_info, + event_id, + transaction, + batch_operations, + platform_version, + )?; + } + } else { + // Handle scalar property - existing logic + let document_index_field = document_and_contract_info + .owned_document_info + .document_info + .get_raw_for_document_type( + name, + document_type, + document_and_contract_info.owned_document_info.owner_id, + Some((sub_level, event_id)), + platform_version, + )? + .unwrap_or_default(); + + any_fields_null |= document_index_field.is_empty(); + all_fields_null &= document_index_field.is_empty(); + + // Push the actual value of the index path + sub_level_index_path_info.push(document_index_field)?; + + self.remove_indices_for_index_level_for_contract_operations_v0( + document_and_contract_info, + sub_level_index_path_info, + sub_level, + any_fields_null, + all_fields_null, + storage_flags, + previous_batch_operations, + estimated_costs_only_with_layer_info, + event_id, + transaction, + batch_operations, + platform_version, + )?; + } } Ok(()) } diff --git a/packages/rs-drive/src/drive/document/delete/remove_indices_for_top_index_level_for_contract_operations/mod.rs b/packages/rs-drive/src/drive/document/delete/remove_indices_for_top_index_level_for_contract_operations/mod.rs index c22ab61d614..e7688952bbe 100644 --- a/packages/rs-drive/src/drive/document/delete/remove_indices_for_top_index_level_for_contract_operations/mod.rs +++ b/packages/rs-drive/src/drive/document/delete/remove_indices_for_top_index_level_for_contract_operations/mod.rs @@ -29,7 +29,7 @@ impl Drive { /// # Returns /// * `Ok(())` if the operation was successful. /// * `Err(DriveError::UnknownVersionMismatch)` if the drive version does not match known versions. - pub(super) fn remove_indices_for_top_index_level_for_contract_operations( + pub(crate) fn remove_indices_for_top_index_level_for_contract_operations( &self, document_and_contract_info: &DocumentAndContractInfo, previous_batch_operations: &Option<&mut Vec>, diff --git a/packages/rs-drive/src/drive/document/delete/remove_indices_for_top_index_level_for_contract_operations/v0/mod.rs b/packages/rs-drive/src/drive/document/delete/remove_indices_for_top_index_level_for_contract_operations/v0/mod.rs index 050982c266f..34340b4890f 100644 --- a/packages/rs-drive/src/drive/document/delete/remove_indices_for_top_index_level_for_contract_operations/v0/mod.rs +++ b/packages/rs-drive/src/drive/document/delete/remove_indices_for_top_index_level_for_contract_operations/v0/mod.rs @@ -11,6 +11,7 @@ use crate::drive::document::unique_event_id; use crate::util::type_constants::DEFAULT_HASH_SIZE_U8; use crate::drive::Drive; +use crate::util::object_size_info::DriveKeyInfo::Key; use crate::util::object_size_info::{DocumentAndContractInfo, DocumentInfoV0Methods, PathInfo}; use crate::error::fee::FeeError; @@ -20,6 +21,7 @@ use crate::fees::op::LowLevelDriveOperation; use dpp::data_contract::accessors::v0::DataContractV0Getters; use dpp::data_contract::config::v0::DataContractConfigGettersV0; use dpp::data_contract::document_type::accessors::DocumentTypeV0Getters; +use dpp::data_contract::document_type::DocumentPropertyType; use crate::drive::document::paths::contract_document_type_path_vec; use dpp::version::PlatformVersion; @@ -88,80 +90,185 @@ impl Drive { let mut index_path: Vec> = contract_document_type_path.clone(); index_path.push(Vec::from(name.as_bytes())); - // with the example of the dashpay contract's first index - // the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId - let document_top_field = document_and_contract_info - .owned_document_info - .document_info - .get_raw_for_document_type( - name, - document_type, - document_and_contract_info.owned_document_info.owner_id, - Some((sub_level, event_id)), - platform_version, - )? - .unwrap_or_default(); + // Check if this property is an array type + let is_array_property = document_type + .flattened_properties() + .get(name) + .map(|prop| { + matches!( + prop.property_type, + DocumentPropertyType::Array(_) | DocumentPropertyType::VariableTypeArray(_) + ) + }) + .unwrap_or(false); - if let Some(estimated_costs_only_with_layer_info) = estimated_costs_only_with_layer_info - { - let document_top_field_estimated_size = document_and_contract_info + if is_array_property { + // Handle array property - remove index entries for each element + let array_elements = document_and_contract_info .owned_document_info .document_info - .get_estimated_size_for_document_type(name, document_type, platform_version)?; + .get_raw_array_elements_for_document_type( + name, + document_type, + platform_version, + )?; - if document_top_field_estimated_size > u8::MAX as u16 { - return Err(Error::Fee(FeeError::Overflow( - "document top field is too big for being an index", - ))); + if array_elements.is_empty() { + // Empty array - nothing to remove at top level + continue; } - // On this level we will have all the user defined values for the paths - estimated_costs_only_with_layer_info.insert( - KeyInfoPath::from_known_owned_path(index_path.clone()), - EstimatedLayerInformation { - tree_type: TreeType::NormalTree, - estimated_layer_count: PotentiallyAtMaxElements, - estimated_layer_sizes: AllSubtrees( - document_top_field_estimated_size as u8, - NoSumTrees, - storage_flags.map(|s| s.serialized_size()), - ), - }, - ); - } + // For each array element, remove the index entry + for element_value in array_elements { + let element_key = Key(element_value); + if let Some(estimated_costs_only_with_layer_info) = + estimated_costs_only_with_layer_info + { + let document_top_field_estimated_size = document_and_contract_info + .owned_document_info + .document_info + .get_estimated_size_for_document_type( + name, + document_type, + platform_version, + )?; + + if document_top_field_estimated_size > u8::MAX as u16 { + return Err(Error::Fee(FeeError::Overflow( + "document top field is too big for being an index", + ))); + } + + estimated_costs_only_with_layer_info.insert( + KeyInfoPath::from_known_owned_path(index_path.clone()), + EstimatedLayerInformation { + tree_type: TreeType::NormalTree, + estimated_layer_count: PotentiallyAtMaxElements, + estimated_layer_sizes: AllSubtrees( + document_top_field_estimated_size as u8, + NoSumTrees, + storage_flags.map(|s| s.serialized_size()), + ), + }, + ); + } + + let any_fields_null = false; // Not null since we have an element + let all_fields_null = false; + + let mut index_path_info = if document_and_contract_info + .owned_document_info + .document_info + .is_document_size() + { + // This is a stateless operation + PathInfo::PathWithSizes(KeyInfoPath::from_known_owned_path( + index_path.clone(), + )) + } else { + PathInfo::PathAsVec::<0>(index_path.clone()) + }; - let any_fields_null = document_top_field.is_empty(); - let all_fields_null = document_top_field.is_empty(); + // Push the element value to the path + index_path_info.push(element_key)?; - let mut index_path_info = if document_and_contract_info - .owned_document_info - .document_info - .is_document_size() - { - // This is a stateless operation - PathInfo::PathWithSizes(KeyInfoPath::from_known_owned_path(index_path)) + self.remove_indices_for_index_level_for_contract_operations( + document_and_contract_info, + index_path_info, + sub_level, + any_fields_null, + all_fields_null, + &storage_flags, + previous_batch_operations, + estimated_costs_only_with_layer_info, + event_id, + transaction, + batch_operations, + platform_version, + )?; + } } else { - PathInfo::PathAsVec::<0>(index_path) - }; + // Handle scalar property - existing logic + // with the example of the dashpay contract's first index + // the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId + let document_top_field = document_and_contract_info + .owned_document_info + .document_info + .get_raw_for_document_type( + name, + document_type, + document_and_contract_info.owned_document_info.owner_id, + Some((sub_level, event_id)), + platform_version, + )? + .unwrap_or_default(); + + if let Some(estimated_costs_only_with_layer_info) = + estimated_costs_only_with_layer_info + { + let document_top_field_estimated_size = document_and_contract_info + .owned_document_info + .document_info + .get_estimated_size_for_document_type( + name, + document_type, + platform_version, + )?; - // we push the actual value of the index path - index_path_info.push(document_top_field)?; - // the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId/ - - self.remove_indices_for_index_level_for_contract_operations( - document_and_contract_info, - index_path_info, - sub_level, - any_fields_null, - all_fields_null, - &storage_flags, - previous_batch_operations, - estimated_costs_only_with_layer_info, - event_id, - transaction, - batch_operations, - platform_version, - )?; + if document_top_field_estimated_size > u8::MAX as u16 { + return Err(Error::Fee(FeeError::Overflow( + "document top field is too big for being an index", + ))); + } + + // On this level we will have all the user defined values for the paths + estimated_costs_only_with_layer_info.insert( + KeyInfoPath::from_known_owned_path(index_path.clone()), + EstimatedLayerInformation { + tree_type: TreeType::NormalTree, + estimated_layer_count: PotentiallyAtMaxElements, + estimated_layer_sizes: AllSubtrees( + document_top_field_estimated_size as u8, + NoSumTrees, + storage_flags.map(|s| s.serialized_size()), + ), + }, + ); + } + + let any_fields_null = document_top_field.is_empty(); + let all_fields_null = document_top_field.is_empty(); + + let mut index_path_info = if document_and_contract_info + .owned_document_info + .document_info + .is_document_size() + { + // This is a stateless operation + PathInfo::PathWithSizes(KeyInfoPath::from_known_owned_path(index_path)) + } else { + PathInfo::PathAsVec::<0>(index_path) + }; + + // we push the actual value of the index path + index_path_info.push(document_top_field)?; + // the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId/ + + self.remove_indices_for_index_level_for_contract_operations( + document_and_contract_info, + index_path_info, + sub_level, + any_fields_null, + all_fields_null, + &storage_flags, + previous_batch_operations, + estimated_costs_only_with_layer_info, + event_id, + transaction, + batch_operations, + platform_version, + )?; + } } Ok(()) } diff --git a/packages/rs-drive/src/drive/document/insert/add_indices_for_index_level_for_contract_operations/v0/mod.rs b/packages/rs-drive/src/drive/document/insert/add_indices_for_index_level_for_contract_operations/v0/mod.rs index 94662fd923a..d06a570d14c 100644 --- a/packages/rs-drive/src/drive/document/insert/add_indices_for_index_level_for_contract_operations/v0/mod.rs +++ b/packages/rs-drive/src/drive/document/insert/add_indices_for_index_level_for_contract_operations/v0/mod.rs @@ -3,10 +3,12 @@ use crate::error::fee::FeeError; use crate::error::Error; use crate::fees::op::LowLevelDriveOperation; use crate::util::grove_operations::BatchInsertTreeApplyType; -use crate::util::object_size_info::DriveKeyInfo::KeyRef; +use crate::util::object_size_info::DriveKeyInfo::{Key, KeyRef}; use crate::util::object_size_info::{DocumentAndContractInfo, DocumentInfoV0Methods, PathInfo}; use crate::util::storage_flags::StorageFlags; use crate::util::type_constants::DEFAULT_HASH_SIZE_U8; +use dpp::data_contract::document_type::accessors::DocumentTypeV0Getters; +use dpp::data_contract::document_type::DocumentPropertyType; use dpp::data_contract::document_type::IndexLevel; use dpp::version::PlatformVersion; @@ -91,23 +93,23 @@ impl Drive { let mut sub_level_index_path_info = index_path_info.clone(); let index_property_key = KeyRef(name.as_bytes()); - let document_index_field = document_and_contract_info - .owned_document_info - .document_info - .get_raw_for_document_type( - name, - document_type, - document_and_contract_info.owned_document_info.owner_id, - Some((sub_level, event_id)), - platform_version, - )? - .unwrap_or_default(); + // Check if this property is an array type + let is_array_property = document_type + .flattened_properties() + .get(name) + .map(|prop| { + matches!( + prop.property_type, + DocumentPropertyType::Array(_) | DocumentPropertyType::VariableTypeArray(_) + ) + }) + .unwrap_or(false); + // Insert the property name tree (e.g., "hashtags") let path_key_info = index_property_key .clone() .add_path_info(sub_level_index_path_info.clone()); - // here we are inserting an empty tree that will have a subtree of all other index properties self.batch_insert_empty_tree_if_not_exists( path_key_info.clone(), TreeType::NormalTree, @@ -130,7 +132,7 @@ impl Drive { if document_top_field_estimated_size > u8::MAX as u16 { return Err(Error::Fee(FeeError::Overflow( - "document top field is too big for being an index on delete", + "document top field is too big for being an index", ))); } @@ -148,46 +150,113 @@ impl Drive { ); } - // Iteration 1. the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId//toUserId - // Iteration 2. the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId//toUserId//accountReference + if is_array_property { + // Handle array property - create index entries for each element + let array_elements = document_and_contract_info + .owned_document_info + .document_info + .get_raw_array_elements_for_document_type( + name, + document_type, + platform_version, + )?; - let path_key_info = document_index_field - .clone() - .add_path_info(sub_level_index_path_info.clone()); + if array_elements.is_empty() { + // Empty array has zero elements to index, so we don't create any entries. + // This differs from null scalars which create an entry with an empty key. + // No recursion needed since there are no elements to pair with sub-level fields. + continue; + } - // here we are inserting an empty tree that will have a subtree of all other index properties - self.batch_insert_empty_tree_if_not_exists( - path_key_info.clone(), - TreeType::NormalTree, - *storage_flags, - apply_type, - transaction, - previous_batch_operations, - batch_operations, - &platform_version.drive, - )?; + // For each array element, create an index entry + for element_value in array_elements { + let element_key = Key(element_value); + let mut element_path_info = sub_level_index_path_info.clone(); - any_fields_null |= document_index_field.is_empty(); - all_fields_null &= document_index_field.is_empty(); + // Insert tree for this element value + let element_path_key_info = + element_key.clone().add_path_info(element_path_info.clone()); - // we push the actual value of the index path - sub_level_index_path_info.push(document_index_field)?; - // Iteration 1. the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId//toUserId// - // Iteration 2. the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId//toUserId//accountReference/ - self.add_indices_for_index_level_for_contract_operations_v0( - document_and_contract_info, - sub_level_index_path_info, - sub_level, - any_fields_null, - all_fields_null, - previous_batch_operations, - storage_flags, - estimated_costs_only_with_layer_info, - event_id, - transaction, - batch_operations, - platform_version, - )?; + self.batch_insert_empty_tree_if_not_exists( + element_path_key_info, + TreeType::NormalTree, + *storage_flags, + apply_type, + transaction, + previous_batch_operations, + batch_operations, + &platform_version.drive, + )?; + + // Push element value to path and recurse + element_path_info.push(element_key)?; + + self.add_indices_for_index_level_for_contract_operations_v0( + document_and_contract_info, + element_path_info, + sub_level, + any_fields_null, + false, // Not all fields null since we have an element + previous_batch_operations, + storage_flags, + estimated_costs_only_with_layer_info, + event_id, + transaction, + batch_operations, + platform_version, + )?; + } + } else { + // Handle scalar property - existing logic + let document_index_field = document_and_contract_info + .owned_document_info + .document_info + .get_raw_for_document_type( + name, + document_type, + document_and_contract_info.owned_document_info.owner_id, + Some((sub_level, event_id)), + platform_version, + )? + .unwrap_or_default(); + + let path_key_info = document_index_field + .clone() + .add_path_info(sub_level_index_path_info.clone()); + + // Insert tree for this field value + self.batch_insert_empty_tree_if_not_exists( + path_key_info.clone(), + TreeType::NormalTree, + *storage_flags, + apply_type, + transaction, + previous_batch_operations, + batch_operations, + &platform_version.drive, + )?; + + any_fields_null |= document_index_field.is_empty(); + all_fields_null &= document_index_field.is_empty(); + + // Push the actual value of the index path + sub_level_index_path_info.push(document_index_field)?; + + self.add_indices_for_index_level_for_contract_operations_v0( + document_and_contract_info, + sub_level_index_path_info, + sub_level, + any_fields_null, + all_fields_null, + previous_batch_operations, + storage_flags, + estimated_costs_only_with_layer_info, + event_id, + transaction, + batch_operations, + platform_version, + )?; + } } Ok(()) } diff --git a/packages/rs-drive/src/drive/document/insert/add_indices_for_top_index_level_for_contract_operations/v0/mod.rs b/packages/rs-drive/src/drive/document/insert/add_indices_for_top_index_level_for_contract_operations/v0/mod.rs index e7d56f81d7e..1688c851739 100644 --- a/packages/rs-drive/src/drive/document/insert/add_indices_for_top_index_level_for_contract_operations/v0/mod.rs +++ b/packages/rs-drive/src/drive/document/insert/add_indices_for_top_index_level_for_contract_operations/v0/mod.rs @@ -4,6 +4,7 @@ use crate::util::type_constants::DEFAULT_HASH_SIZE_U8; use crate::util::grove_operations::BatchInsertTreeApplyType; use crate::drive::Drive; +use crate::util::object_size_info::DriveKeyInfo::Key; use crate::util::object_size_info::{DocumentAndContractInfo, DocumentInfoV0Methods, PathInfo}; use crate::error::fee::FeeError; @@ -12,6 +13,7 @@ use crate::fees::op::LowLevelDriveOperation; use dpp::data_contract::accessors::v0::DataContractV0Getters; use dpp::data_contract::config::v0::DataContractConfigGettersV0; use dpp::data_contract::document_type::accessors::DocumentTypeV0Getters; +use dpp::data_contract::document_type::DocumentPropertyType; use dpp::version::PlatformVersion; @@ -101,94 +103,213 @@ impl Drive { let mut index_path: Vec> = contract_document_type_path.clone(); index_path.push(Vec::from(name.as_bytes())); - // with the example of the dashpay contract's first index - // the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId - let document_top_field = document_and_contract_info - .owned_document_info - .document_info - .get_raw_for_document_type( - name, - document_type, - document_and_contract_info.owned_document_info.owner_id, - Some((sub_level, event_id)), - platform_version, - )? - .unwrap_or_default(); - - // The zero will not matter here, because the PathKeyInfo is variable - let path_key_info = document_top_field.clone().add_path::<0>(index_path.clone()); - // here we are inserting an empty tree that will have a subtree of all other index properties - self.batch_insert_empty_tree_if_not_exists( - path_key_info.clone(), - TreeType::NormalTree, - storage_flags, - apply_type, - transaction, - previous_batch_operations, - batch_operations, - drive_version, - )?; - - if let Some(estimated_costs_only_with_layer_info) = estimated_costs_only_with_layer_info - { - let document_top_field_estimated_size = document_and_contract_info + // Check if this property is an array type + let is_array_property = document_type + .flattened_properties() + .get(name) + .map(|prop| { + matches!( + prop.property_type, + DocumentPropertyType::Array(_) | DocumentPropertyType::VariableTypeArray(_) + ) + }) + .unwrap_or(false); + + if is_array_property { + // Handle array property - create index entries for each element + let array_elements = document_and_contract_info .owned_document_info .document_info - .get_estimated_size_for_document_type(name, document_type, platform_version)?; + .get_raw_array_elements_for_document_type( + name, + document_type, + platform_version, + )?; - if document_top_field_estimated_size > u8::MAX as u16 { - return Err(Error::Fee(FeeError::Overflow( - "document field is too big for being an index on delete", - ))); + if array_elements.is_empty() { + // Empty array - nothing to index at top level + continue; } - // On this level we will have all the user defined values for the paths - estimated_costs_only_with_layer_info.insert( - KeyInfoPath::from_known_owned_path(index_path.clone()), - EstimatedLayerInformation { - tree_type: TreeType::NormalTree, - estimated_layer_count: PotentiallyAtMaxElements, - estimated_layer_sizes: AllSubtrees( - document_top_field_estimated_size as u8, - NoSumTrees, - storage_flags.map(|s| s.serialized_size()), - ), - }, - ); - } + // For each array element, create an index entry + for element_value in array_elements { + let element_key = Key(element_value); + let path_key_info = element_key.clone().add_path::<0>(index_path.clone()); + + // Insert tree for this element value + self.batch_insert_empty_tree_if_not_exists( + path_key_info.clone(), + TreeType::NormalTree, + storage_flags, + apply_type, + transaction, + previous_batch_operations, + batch_operations, + drive_version, + )?; + + if let Some(estimated_costs_only_with_layer_info) = + estimated_costs_only_with_layer_info + { + let document_top_field_estimated_size = document_and_contract_info + .owned_document_info + .document_info + .get_estimated_size_for_document_type( + name, + document_type, + platform_version, + )?; + + if document_top_field_estimated_size > u8::MAX as u16 { + return Err(Error::Fee(FeeError::Overflow( + "document field is too big for being an index", + ))); + } + + estimated_costs_only_with_layer_info.insert( + KeyInfoPath::from_known_owned_path(index_path.clone()), + EstimatedLayerInformation { + tree_type: TreeType::NormalTree, + estimated_layer_count: PotentiallyAtMaxElements, + estimated_layer_sizes: AllSubtrees( + document_top_field_estimated_size as u8, + NoSumTrees, + storage_flags.map(|s| s.serialized_size()), + ), + }, + ); + } - let any_fields_null = document_top_field.is_empty(); - let all_fields_null = document_top_field.is_empty(); + let any_fields_null = false; // Not null since we have an element + let all_fields_null = false; - let mut index_path_info = if document_and_contract_info - .owned_document_info - .document_info - .is_document_size() - { - // This is a stateless operation - PathInfo::PathWithSizes(KeyInfoPath::from_known_owned_path(index_path)) + let mut index_path_info = if document_and_contract_info + .owned_document_info + .document_info + .is_document_size() + { + // This is a stateless operation + PathInfo::PathWithSizes(KeyInfoPath::from_known_owned_path( + index_path.clone(), + )) + } else { + PathInfo::PathAsVec::<0>(index_path.clone()) + }; + + // Push the element value to the path + index_path_info.push(element_key)?; + + self.add_indices_for_index_level_for_contract_operations( + document_and_contract_info, + index_path_info, + sub_level, + any_fields_null, + all_fields_null, + previous_batch_operations, + &storage_flags, + estimated_costs_only_with_layer_info, + event_id, + transaction, + batch_operations, + platform_version, + )?; + } } else { - PathInfo::PathAsVec::<0>(index_path) - }; + // Handle scalar property - existing logic + // with the example of the dashpay contract's first index + // the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId + let document_top_field = document_and_contract_info + .owned_document_info + .document_info + .get_raw_for_document_type( + name, + document_type, + document_and_contract_info.owned_document_info.owner_id, + Some((sub_level, event_id)), + platform_version, + )? + .unwrap_or_default(); - // we push the actual value of the index path - index_path_info.push(document_top_field)?; - // the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId/ - - self.add_indices_for_index_level_for_contract_operations( - document_and_contract_info, - index_path_info, - sub_level, - any_fields_null, - all_fields_null, - previous_batch_operations, - &storage_flags, - estimated_costs_only_with_layer_info, - event_id, - transaction, - batch_operations, - platform_version, - )?; + // The zero will not matter here, because the PathKeyInfo is variable + let path_key_info = document_top_field.clone().add_path::<0>(index_path.clone()); + // here we are inserting an empty tree that will have a subtree of all other index properties + self.batch_insert_empty_tree_if_not_exists( + path_key_info.clone(), + TreeType::NormalTree, + storage_flags, + apply_type, + transaction, + previous_batch_operations, + batch_operations, + drive_version, + )?; + + if let Some(estimated_costs_only_with_layer_info) = + estimated_costs_only_with_layer_info + { + let document_top_field_estimated_size = document_and_contract_info + .owned_document_info + .document_info + .get_estimated_size_for_document_type( + name, + document_type, + platform_version, + )?; + + if document_top_field_estimated_size > u8::MAX as u16 { + return Err(Error::Fee(FeeError::Overflow( + "document field is too big for being an index", + ))); + } + + // On this level we will have all the user defined values for the paths + estimated_costs_only_with_layer_info.insert( + KeyInfoPath::from_known_owned_path(index_path.clone()), + EstimatedLayerInformation { + tree_type: TreeType::NormalTree, + estimated_layer_count: PotentiallyAtMaxElements, + estimated_layer_sizes: AllSubtrees( + document_top_field_estimated_size as u8, + NoSumTrees, + storage_flags.map(|s| s.serialized_size()), + ), + }, + ); + } + + let any_fields_null = document_top_field.is_empty(); + let all_fields_null = document_top_field.is_empty(); + + let mut index_path_info = if document_and_contract_info + .owned_document_info + .document_info + .is_document_size() + { + // This is a stateless operation + PathInfo::PathWithSizes(KeyInfoPath::from_known_owned_path(index_path)) + } else { + PathInfo::PathAsVec::<0>(index_path) + }; + + // we push the actual value of the index path + index_path_info.push(document_top_field)?; + // the index path is now something likeDataContracts/ContractID/Documents(1)/$ownerId/ + + self.add_indices_for_index_level_for_contract_operations( + document_and_contract_info, + index_path_info, + sub_level, + any_fields_null, + all_fields_null, + previous_batch_operations, + &storage_flags, + estimated_costs_only_with_layer_info, + event_id, + transaction, + batch_operations, + platform_version, + )?; + } } Ok(()) } diff --git a/packages/rs-drive/src/drive/document/insert_contested/add_contested_indices_for_contract_operations/v0/mod.rs b/packages/rs-drive/src/drive/document/insert_contested/add_contested_indices_for_contract_operations/v0/mod.rs index 9eed9dee870..caafa1a7b69 100644 --- a/packages/rs-drive/src/drive/document/insert_contested/add_contested_indices_for_contract_operations/v0/mod.rs +++ b/packages/rs-drive/src/drive/document/insert_contested/add_contested_indices_for_contract_operations/v0/mod.rs @@ -10,6 +10,7 @@ use crate::error::Error; use crate::fees::op::LowLevelDriveOperation; use dpp::data_contract::accessors::v0::DataContractV0Getters; use dpp::data_contract::document_type::accessors::DocumentTypeV0Getters; +use dpp::data_contract::document_type::DocumentPropertyType; use crate::drive::votes::paths::{ vote_contested_resource_contract_documents_indexes_path_vec, @@ -106,6 +107,24 @@ impl Drive { // This is different from document secondary indexes // The reason is that there is only one index so we already know the structure + // Check if this property is an array type - arrays are not supported in contested indexes + let is_array_property = document_type + .flattened_properties() + .get(name) + .map(|prop| { + matches!( + prop.property_type, + DocumentPropertyType::Array(_) | DocumentPropertyType::VariableTypeArray(_) + ) + }) + .unwrap_or(false); + + if is_array_property { + return Err(Error::Drive(DriveError::NotSupported( + "Array properties are not supported in contested indexes", + ))); + } + if let Some(estimated_costs_only_with_layer_info) = estimated_costs_only_with_layer_info { let document_top_field_estimated_size = document_and_contract_info diff --git a/packages/rs-drive/src/drive/document/update/internal/update_document_for_contract_operations/v0/mod.rs b/packages/rs-drive/src/drive/document/update/internal/update_document_for_contract_operations/v0/mod.rs index b903ca123c1..96f7214b417 100644 --- a/packages/rs-drive/src/drive/document/update/internal/update_document_for_contract_operations/v0/mod.rs +++ b/packages/rs-drive/src/drive/document/update/internal/update_document_for_contract_operations/v0/mod.rs @@ -19,6 +19,7 @@ use crate::util::storage_flags::StorageFlags; use dpp::block::block_info::BlockInfo; use dpp::data_contract::accessors::v0::DataContractV0Getters; use dpp::data_contract::document_type::accessors::DocumentTypeV0Getters; +use dpp::data_contract::document_type::DocumentPropertyType; use dpp::document::document_methods::DocumentMethodsV0; use dpp::document::serialization_traits::DocumentPlatformConversionMethodsV0; @@ -169,8 +170,77 @@ impl Drive { ))); }; + // Check upfront if any index has array properties. + // If so, we use remove+add for ALL indexes to avoid duplicate processing. + // If not, we use the optimized inline update approach. + let any_index_has_array = document_type.indexes().values().any(|index| { + index.properties.iter().any(|prop| { + document_type + .flattened_properties() + .get(&prop.name) + .map(|p| { + matches!( + p.property_type, + DocumentPropertyType::Array(_) + | DocumentPropertyType::VariableTypeArray(_) + ) + }) + .unwrap_or(false) + }) + }); + + // If any index contains arrays, use remove+add approach for all indexes. + // This avoids the complexity of mixing inline updates with array handling. + if any_index_has_array { + // Create old document info for removal + let old_document_and_contract_info = DocumentAndContractInfo { + owned_document_info: crate::util::object_size_info::OwnedDocumentInfo { + document_info: old_document_info, + owner_id: None, // Use original owner id from old document + }, + contract, + document_type, + }; + + // We need to apply remove operations first in a separate batch, + // then apply add operations. This avoids GroveDB conflicts when + // the same path is both removed and added (for unchanged array elements). + let mut remove_operations: Vec = vec![]; + + // Remove old index entries into a separate operations vec + self.remove_indices_for_top_index_level_for_contract_operations( + &old_document_and_contract_info, + &None, + estimated_costs_only_with_layer_info, + transaction, + &mut remove_operations, + platform_version, + )?; + + // Apply remove operations first + self.apply_batch_low_level_drive_operations( + None, + transaction, + remove_operations, + &mut batch_operations, + &platform_version.drive, + )?; + + // Add new index entries + self.add_indices_for_top_index_level_for_contract_operations( + &document_and_contract_info, + previous_batch_operations, + estimated_costs_only_with_layer_info, + transaction, + &mut batch_operations, + platform_version, + )?; + + return Ok(batch_operations); + } + + // No array indexes - use the optimized inline update approach for scalar-only indexes let mut batch_insertion_cache: HashSet>> = HashSet::new(); - // fourth we need to store a reference to the document for each index for index in document_type.indexes().values() { // at this point the contract path is to the contract documents // for each index the top index component will already have been added @@ -477,6 +547,7 @@ impl Drive { } } } + Ok(batch_operations) } } diff --git a/packages/rs-drive/src/query/conditions.rs b/packages/rs-drive/src/query/conditions.rs index ef2b6244d86..3d6a18207b8 100644 --- a/packages/rs-drive/src/query/conditions.rs +++ b/packages/rs-drive/src/query/conditions.rs @@ -19,8 +19,8 @@ use std::cmp::Ordering; use std::collections::{BTreeMap, BTreeSet}; use std::fmt::Display; use WhereOperator::{ - Between, BetweenExcludeBounds, BetweenExcludeLeft, BetweenExcludeRight, Equal, GreaterThan, - GreaterThanOrEquals, In, LessThan, LessThanOrEquals, StartsWith, + Between, BetweenExcludeBounds, BetweenExcludeLeft, BetweenExcludeRight, Contains, Equal, + GreaterThan, GreaterThanOrEquals, In, LessThan, LessThanOrEquals, StartsWith, }; /// Converts SQL values to CBOR. @@ -73,6 +73,8 @@ pub enum WhereOperator { In, /// Starts with StartsWith, + /// Contains - checks if an array field contains a specific value + Contains, } impl WhereOperator { @@ -90,6 +92,7 @@ impl WhereOperator { BetweenExcludeRight => false, In => false, StartsWith => false, + Contains => false, } } @@ -119,6 +122,9 @@ impl WhereOperator { StartsWith => Err(Error::Query(QuerySyntaxError::InvalidWhereClauseOrder( "Startswith clause order invalid", ))), + Contains => Err(Error::Query(QuerySyntaxError::InvalidWhereClauseOrder( + "Contains clause order invalid", + ))), } } } @@ -129,9 +135,8 @@ impl WhereOperator { match self { Equal => false, GreaterThan | GreaterThanOrEquals | LessThan | LessThanOrEquals | Between - | BetweenExcludeBounds | BetweenExcludeLeft | BetweenExcludeRight | In | StartsWith => { - true - } + | BetweenExcludeBounds | BetweenExcludeLeft | BetweenExcludeRight | In | StartsWith + | Contains => true, } } @@ -158,6 +163,7 @@ impl WhereOperator { | "between_exclude_right" => Some(BetweenExcludeRight), "In" | "in" => Some(In), "StartsWith" | "startsWith" | "startswith" | "starts_with" => Some(StartsWith), + "Contains" | "contains" => Some(Contains), &_ => None, } } @@ -232,6 +238,10 @@ impl WhereOperator { (Value::Text(text), Value::Text(prefix)) => text.starts_with(prefix.as_str()), _ => false, }, + Contains => match left_value { + Value::Array(array) => array.contains(right_value), + _ => false, + }, } } @@ -242,6 +252,24 @@ impl WhereOperator { Equal => true, In => matches!(value, Value::Array(_) | Value::Bytes(_)), StartsWith => matches!(value, Value::Text(_)), + Contains => { + // For Contains, the value should match the array's element type + // The property_type here is the Array type, so we check the element type + if let DocumentPropertyType::Array(item_type) = property_type { + use dpp::data_contract::document_type::ArrayItemType; + match item_type { + ArrayItemType::String(_, _) => matches!(value, Value::Text(_)), + ArrayItemType::Date => is_numeric_value(value), + ArrayItemType::Integer => is_numeric_value(value), + ArrayItemType::Number => is_numeric_value(value), + ArrayItemType::ByteArray(_, _) => matches!(value, Value::Bytes(_)), + ArrayItemType::Identifier => matches!(value, Value::Identifier(_)), + ArrayItemType::Boolean => matches!(value, Value::Bool(_)), + } + } else { + false + } + } GreaterThan | GreaterThanOrEquals | LessThan | LessThanOrEquals => { match property_type { DocumentPropertyType::F64 => is_numeric_value(value), @@ -307,6 +335,7 @@ impl Display for WhereOperator { BetweenExcludeRight => "BetweenExcludeRight", In => "In", StartsWith => "StartsWith", + Contains => "Contains", }; write!(f, "{}", s) @@ -636,6 +665,7 @@ impl<'a> WhereClause { BetweenExcludeBounds => false, BetweenExcludeRight => false, BetweenExcludeLeft => false, + Contains => false, }) .collect(); @@ -653,6 +683,7 @@ impl<'a> WhereClause { BetweenExcludeBounds => true, BetweenExcludeRight => true, BetweenExcludeLeft => true, + Contains => true, }) .collect(); @@ -1168,6 +1199,29 @@ impl<'a> WhereClause { } } } + Contains => { + // For Contains, we serialize the element value we're searching for + // and insert it as a key. The query path will be adjusted elsewhere + // to navigate to the array element subtree. + let key = document_type.serialize_value_for_key( + self.field.as_str(), + &self.value, + platform_version, + )?; + match starts_at_key_option { + None => { + query.insert_key(key); + } + Some((starts_at_key, included)) => { + if (left_to_right && starts_at_key < key) + || (!left_to_right && starts_at_key > key) + || (included && starts_at_key == key) + { + query.insert_key(key); + } + } + } + } } Ok(query) } @@ -1645,9 +1699,8 @@ pub fn allowed_ops_for_type(property_type: &DocumentPropertyType) -> &'static [W DocumentPropertyType::Identifier => &[Equal, In], DocumentPropertyType::ByteArray(_) => &[Equal, In], DocumentPropertyType::Boolean => &[Equal], - DocumentPropertyType::Object(_) - | DocumentPropertyType::Array(_) - | DocumentPropertyType::VariableTypeArray(_) => &[], + DocumentPropertyType::Object(_) | DocumentPropertyType::VariableTypeArray(_) => &[], + DocumentPropertyType::Array(_) => &[Contains], } } diff --git a/packages/rs-drive/src/query/filter.rs b/packages/rs-drive/src/query/filter.rs index f4bf4be33ec..b7bb7cbdb1e 100644 --- a/packages/rs-drive/src/query/filter.rs +++ b/packages/rs-drive/src/query/filter.rs @@ -535,6 +535,7 @@ impl DriveDocumentQueryFilter<'_> { _ => false, }, WhereOperator::StartsWith => false, + WhereOperator::Contains => false, }; if ok { QuerySyntaxSimpleValidationResult::new() diff --git a/packages/rs-drive/src/util/object_size_info/document_info.rs b/packages/rs-drive/src/util/object_size_info/document_info.rs index 875565e9926..5706b5c3ae0 100644 --- a/packages/rs-drive/src/util/object_size_info/document_info.rs +++ b/packages/rs-drive/src/util/object_size_info/document_info.rs @@ -59,6 +59,15 @@ pub trait DocumentInfoV0Methods { size_info_with_base_event: Option<(&IndexLevel, [u8; 32])>, platform_version: &PlatformVersion, ) -> Result>, Error>; + /// Gets array element values for an indexed array property. + /// Each element is encoded for use as an index tree key. + /// Returns an empty Vec if the field is not an array, is missing, or is empty. + fn get_raw_array_elements_for_document_type( + &self, + key_path: &str, + document_type: DocumentTypeRef, + platform_version: &PlatformVersion, + ) -> Result>, Error>; /// Gets the borrowed document fn get_borrowed_document_and_storage_flags(&self) -> Option<(&Document, Option<&StorageFlags>)>; @@ -248,6 +257,30 @@ impl DocumentInfoV0Methods for DocumentInfo<'_> { } } + /// Gets array element values for an indexed array property. + fn get_raw_array_elements_for_document_type( + &self, + key_path: &str, + document_type: DocumentTypeRef, + platform_version: &PlatformVersion, + ) -> Result>, Error> { + match self { + DocumentInfo::DocumentRefAndSerialization((document, _, _)) + | DocumentInfo::DocumentRefInfo((document, _)) => document + .get_raw_array_elements_for_document_type(key_path, document_type, platform_version) + .map_err(|e| Error::Protocol(Box::new(e))), + DocumentInfo::DocumentOwnedInfo((document, _)) + | DocumentInfo::DocumentAndSerialization((document, _, _)) => document + .get_raw_array_elements_for_document_type(key_path, document_type, platform_version) + .map_err(|e| Error::Protocol(Box::new(e))), + DocumentInfo::DocumentEstimatedAverageSize(_) => { + // For estimated sizes, we can't know the actual array elements + // Return empty - caller should handle this case + Ok(vec![]) + } + } + } + /// Gets the borrowed document fn get_borrowed_document_and_storage_flags( &self, diff --git a/packages/rs-drive/tests/query_tests.rs b/packages/rs-drive/tests/query_tests.rs index aa0e4f22771..157885c5e09 100644 --- a/packages/rs-drive/tests/query_tests.rs +++ b/packages/rs-drive/tests/query_tests.rs @@ -6999,3 +6999,1290 @@ mod tests { assert_eq!(query_result.documents().len(), 1); } } + +#[cfg(feature = "server")] +mod array_index_tests { + use super::*; + use dpp::data_contract::document_type::accessors::DocumentTypeV0Getters; + use dpp::document::document_methods::DocumentMethodsV0; + + /// Sets up a contract with indexed array fields and adds test documents + fn setup_array_index_tests(platform_version: &PlatformVersion) -> (Drive, DataContract) { + let drive_config = DriveConfig::default(); + let drive = setup_drive(Some(drive_config)); + let db_transaction = drive.grove.start_transaction(); + + // Create contracts tree + let mut batch = GroveDbOpBatch::new(); + add_init_contracts_structure_operations(&mut batch); + + drive + .grove_apply_batch(batch, false, Some(&db_transaction), &platform_version.drive) + .expect("expected to create contracts tree successfully"); + + // Setup the contract with indexed array fields + let contract = test_helpers::setup_contract( + &drive, + "tests/supporting_files/contract/array-index/array-index-contract.json", + None, + None, + None::, + Some(&db_transaction), + Some(platform_version), + ); + + // Create test documents with array values + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // Post 1: hashtags ["dash", "crypto", "blockchain"] + let post1_value = json!({ + "$id": "AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "content": "Hello world from Dash Platform!", + "hashtags": ["dash", "crypto", "blockchain"] + }); + let post1_cbor = cbor_serializer::serializable_value_to_cbor(&post1_value, Some(0)) + .expect("expected to serialize to cbor"); + let post1 = Document::from_cbor(post1_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + let storage_flags = Some(Cow::Owned(StorageFlags::SingleEpoch(0))); + drive + .add_document_for_contract( + DocumentAndContractInfo { + owned_document_info: OwnedDocumentInfo { + document_info: DocumentRefInfo((&post1, storage_flags.clone())), + owner_id: None, + }, + contract: &contract, + document_type: post_type, + }, + true, + BlockInfo::genesis(), + true, + Some(&db_transaction), + platform_version, + None, + ) + .expect("document should be inserted"); + + // Post 2: hashtags ["tech", "crypto"] + let post2_value = json!({ + "$id": "BZ5tKL2QDsJhDzCxvSQtn99NkafM6aMUTrr8aQXgQuS", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1a3BnFuzepGtR3Mhh2swLk6", + "content": "Another post about tech", + "hashtags": ["tech", "crypto"] + }); + let post2_cbor = cbor_serializer::serializable_value_to_cbor(&post2_value, Some(0)) + .expect("expected to serialize to cbor"); + let post2 = Document::from_cbor(post2_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + drive + .add_document_for_contract( + DocumentAndContractInfo { + owned_document_info: OwnedDocumentInfo { + document_info: DocumentRefInfo((&post2, storage_flags.clone())), + owner_id: None, + }, + contract: &contract, + document_type: post_type, + }, + true, + BlockInfo::genesis(), + true, + Some(&db_transaction), + platform_version, + None, + ) + .expect("document should be inserted"); + + // Post 3: hashtags ["dash", "defi"] + let post3_value = json!({ + "$id": "CZ6uML3REtJiEzDyvTRuoaaPm1gN7pNVUss9bRYhRvT", + "$ownerId": "BdZVCSvAmUwryNsQqkqqD1a3BnFuzepGtR3Mhh2swLk7", + "content": "Post about DeFi on Dash", + "hashtags": ["dash", "defi"] + }); + let post3_cbor = cbor_serializer::serializable_value_to_cbor(&post3_value, Some(0)) + .expect("expected to serialize to cbor"); + let post3 = Document::from_cbor(post3_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + drive + .add_document_for_contract( + DocumentAndContractInfo { + owned_document_info: OwnedDocumentInfo { + document_info: DocumentRefInfo((&post3, storage_flags)), + owner_id: None, + }, + contract: &contract, + document_type: post_type, + }, + true, + BlockInfo::genesis(), + true, + Some(&db_transaction), + platform_version, + None, + ) + .expect("document should be inserted"); + + drive + .grove + .commit_transaction(db_transaction) + .unwrap() + .expect("transaction should be committed"); + + (drive, contract) + } + + #[test] + fn should_have_array_index_contract_types() { + // Test that a contract with indexed array fields has correct property types + use dpp::tests::json_document::json_document_to_contract; + + let platform_version = PlatformVersion::latest(); + + // Just load the contract without applying to drive + let contract = json_document_to_contract( + "tests/supporting_files/contract/array-index/array-index-contract.json", + false, + platform_version, + ) + .expect("expected to load contract"); + + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // Check flattened_properties + let hashtags_flat = post_type + .flattened_properties() + .get("hashtags") + .expect("expected hashtags in flattened_properties"); + + // Check properties + let hashtags_nested = post_type + .properties() + .get("hashtags") + .expect("expected hashtags in properties"); + + // Both should be Array type + assert!( + matches!( + hashtags_flat.property_type, + dpp::data_contract::document_type::DocumentPropertyType::Array(_) + ), + "flattened_properties hashtags should be Array, got: {:?}", + hashtags_flat.property_type + ); + + assert!( + matches!( + hashtags_nested.property_type, + dpp::data_contract::document_type::DocumentPropertyType::Array(_) + ), + "properties hashtags should be Array, got: {:?}", + hashtags_nested.property_type + ); + } + + #[test] + fn should_preserve_array_document_values_after_cbor_deserialize() { + // Test that document values are correctly typed after CBOR deserialization + use dpp::document::DocumentV0Getters; + + let platform_version = PlatformVersion::latest(); + + // Create document from JSON -> CBOR -> Document + let post1_value = json!({ + "$id": "AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "content": "Hello world from Dash Platform!", + "hashtags": ["dash", "crypto", "blockchain"] + }); + let post1_cbor = cbor_serializer::serializable_value_to_cbor(&post1_value, Some(0)) + .expect("expected to serialize to cbor"); + let post1 = Document::from_cbor(post1_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + // Check the hashtags value + let hashtags_value = post1.properties().get("hashtags"); + assert!(hashtags_value.is_some(), "hashtags should exist"); + + if let Some(platform_value::Value::Array(elements)) = hashtags_value { + assert_eq!(elements.len(), 3, "should have 3 hashtags"); + for element in elements.iter() { + assert!(element.is_text(), "each element should be Text"); + } + } else { + panic!("hashtags should be an array, got: {:?}", hashtags_value); + } + } + + #[test] + fn should_serialize_document_with_array_field() { + // Test serializing a document with array field + use dpp::document::serialization_traits::DocumentPlatformConversionMethodsV0; + use dpp::tests::json_document::json_document_to_contract; + + let platform_version = PlatformVersion::latest(); + + // Load the contract + let contract = json_document_to_contract( + "tests/supporting_files/contract/array-index/array-index-contract.json", + false, + platform_version, + ) + .expect("expected to load contract"); + + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // Create document from JSON -> CBOR -> Document + let post1_value = json!({ + "$id": "AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "content": "Hello world from Dash Platform!", + "hashtags": ["dash", "crypto", "blockchain"] + }); + let post1_cbor = cbor_serializer::serializable_value_to_cbor(&post1_value, Some(0)) + .expect("expected to serialize to cbor"); + let post1 = Document::from_cbor(post1_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + // Serialize using the trait method + let serialized: Result, _> = DocumentPlatformConversionMethodsV0::serialize( + &post1, + post_type, + &contract, + platform_version, + ); + assert!( + serialized.is_ok(), + "Serialization should succeed, got: {:?}", + serialized.as_ref().err() + ); + assert!( + !serialized.unwrap().is_empty(), + "Serialized bytes should not be empty" + ); + } + + #[test] + fn should_serialize_with_drive_contract() { + // Test serializing a document using contract loaded via test_helpers::setup_contract + use dpp::document::serialization_traits::DocumentPlatformConversionMethodsV0; + + let platform_version = PlatformVersion::latest(); + let drive_config = DriveConfig::default(); + let drive = setup_drive(Some(drive_config)); + + { + let db_transaction = drive.grove.start_transaction(); + + // Create contracts tree + let mut batch = GroveDbOpBatch::new(); + add_init_contracts_structure_operations(&mut batch); + drive + .grove_apply_batch(batch, false, Some(&db_transaction), &platform_version.drive) + .expect("expected to create contracts tree successfully"); + + // Load contract using setup_contract (same as failing test) + let contract = test_helpers::setup_contract( + &drive, + "tests/supporting_files/contract/array-index/array-index-contract.json", + None, + None, + None::, + Some(&db_transaction), + Some(platform_version), + ); + + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // Create document from JSON -> CBOR -> Document + let post1_value = json!({ + "$id": "AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "content": "Hello world from Dash Platform!", + "hashtags": ["dash", "crypto", "blockchain"] + }); + let post1_cbor = cbor_serializer::serializable_value_to_cbor(&post1_value, Some(0)) + .expect("expected to serialize to cbor"); + let post1 = Document::from_cbor(post1_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + // Serialize using the contract from setup_contract + let serialized: Result, _> = DocumentPlatformConversionMethodsV0::serialize( + &post1, + post_type, + &contract, + platform_version, + ); + assert!( + serialized.is_ok(), + "Serialization should succeed, got: {:?}", + serialized.err() + ); + } + + drop(drive); + } + + #[test] + fn should_create_contract_with_array_index() { + // Test that a contract with indexed array fields can be created + let platform_version = PlatformVersion::latest(); + let (drive, contract) = setup_array_index_tests(platform_version); + + // Verify contract was created successfully + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // Verify the hashtags property exists and is an array in flattened_properties + let hashtags_prop = post_type + .flattened_properties() + .get("hashtags") + .expect("expected hashtags property in flattened_properties"); + + assert!( + matches!( + hashtags_prop.property_type, + dpp::data_contract::document_type::DocumentPropertyType::Array(_) + ), + "flattened_properties hashtags should be Array, got: {:?}", + hashtags_prop.property_type + ); + + // Also verify in properties() which is used during serialization + let hashtags_prop_nested = post_type + .properties() + .get("hashtags") + .expect("expected hashtags property in properties"); + + assert!( + matches!( + hashtags_prop_nested.property_type, + dpp::data_contract::document_type::DocumentPropertyType::Array(_) + ), + "properties hashtags should be Array, got: {:?}", + hashtags_prop_nested.property_type + ); + + // Verify indexes exist + assert!( + post_type.indexes().len() >= 2, + "expected at least 2 indexes including the array index" + ); + + drop(drive); + } + + #[test] + fn should_query_documents_with_contains_operator() { + // Test querying documents using the contains operator + let platform_version = PlatformVersion::latest(); + let (drive, contract) = setup_array_index_tests(platform_version); + + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // Query for posts containing "dash" hashtag + // This should return 2 posts (post1 and post3) + let query_value = json!({ + "where": [ + ["hashtags", "contains", "dash"] + ], + "limit": 100, + "orderBy": [ + ["hashtags", "asc"] + ] + }); + + let where_cbor = cbor_serializer::serializable_value_to_cbor(&query_value, None) + .expect("expected to serialize to cbor"); + + let query = DriveDocumentQuery::from_cbor( + where_cbor.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results.len(), + 2, + "expected 2 posts containing 'dash' hashtag" + ); + + // Query for posts containing "crypto" hashtag + // This should return 2 posts (post1 and post2) + let query_value = json!({ + "where": [ + ["hashtags", "contains", "crypto"] + ], + "limit": 100, + "orderBy": [ + ["hashtags", "asc"] + ] + }); + + let where_cbor = cbor_serializer::serializable_value_to_cbor(&query_value, None) + .expect("expected to serialize to cbor"); + + let query = DriveDocumentQuery::from_cbor( + where_cbor.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results.len(), + 2, + "expected 2 posts containing 'crypto' hashtag" + ); + + // Query for posts containing "defi" hashtag + // This should return 1 post (post3) + let query_value = json!({ + "where": [ + ["hashtags", "contains", "defi"] + ], + "limit": 100, + "orderBy": [ + ["hashtags", "asc"] + ] + }); + + let where_cbor = cbor_serializer::serializable_value_to_cbor(&query_value, None) + .expect("expected to serialize to cbor"); + + let query = DriveDocumentQuery::from_cbor( + where_cbor.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results.len(), + 1, + "expected 1 post containing 'defi' hashtag" + ); + + drop(drive); + } + + #[test] + fn should_get_raw_array_elements_for_document_type() { + // Test the get_raw_array_elements_for_document_type method + let platform_version = PlatformVersion::latest(); + + // Create a simple document with array field + let post_value = json!({ + "$id": "AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "content": "Test content", + "hashtags": ["alpha", "beta", "gamma"] + }); + + let post_cbor = cbor_serializer::serializable_value_to_cbor(&post_value, Some(0)) + .expect("expected to serialize to cbor"); + let document = Document::from_cbor(post_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + let contract = json_document_to_contract( + "tests/supporting_files/contract/array-index/array-index-contract.json", + false, + platform_version, + ) + .expect("expected to get contract"); + + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // Test getting array elements + let elements = document + .get_raw_array_elements_for_document_type("hashtags", post_type, platform_version) + .expect("should get array elements"); + + assert_eq!(elements.len(), 3, "expected 3 unique elements"); + + // Verify the elements are encoded correctly (as bytes) + assert!(elements.iter().any(|e| e == b"alpha")); + assert!(elements.iter().any(|e| e == b"beta")); + assert!(elements.iter().any(|e| e == b"gamma")); + } + + #[test] + fn should_deduplicate_array_elements() { + // Test that duplicate array elements are deduplicated + let platform_version = PlatformVersion::latest(); + + // Create a document with duplicate hashtags + let post_value = json!({ + "$id": "AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "content": "Test content", + "hashtags": ["dash", "crypto", "dash", "crypto", "dash"] + }); + + let post_cbor = cbor_serializer::serializable_value_to_cbor(&post_value, Some(0)) + .expect("expected to serialize to cbor"); + let document = Document::from_cbor(post_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + let contract = json_document_to_contract( + "tests/supporting_files/contract/array-index/array-index-contract.json", + false, + platform_version, + ) + .expect("expected to get contract"); + + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // Test getting array elements - duplicates should be removed + let elements = document + .get_raw_array_elements_for_document_type("hashtags", post_type, platform_version) + .expect("should get array elements"); + + assert_eq!( + elements.len(), + 2, + "expected 2 unique elements after deduplication" + ); + + // Verify the actual deduplicated element byte values + assert!( + elements.iter().any(|e| e == b"dash"), + "elements should contain 'dash'" + ); + assert!( + elements.iter().any(|e| e == b"crypto"), + "elements should contain 'crypto'" + ); + } + + #[test] + fn should_remove_array_index_entries_on_delete() { + // Test that deleting a document with array fields removes all index entries + let platform_version = PlatformVersion::latest(); + let (drive, contract) = setup_array_index_tests(platform_version); + + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // First verify we can find posts with "dash" hashtag (should be 2: post1 and post3) + let query_value = json!({ + "where": [ + ["hashtags", "contains", "dash"] + ], + "limit": 100, + "orderBy": [ + ["hashtags", "asc"] + ] + }); + + let where_cbor = cbor_serializer::serializable_value_to_cbor(&query_value, None) + .expect("expected to serialize to cbor"); + + let query = DriveDocumentQuery::from_cbor( + where_cbor.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results.len(), + 2, + "expected 2 posts with 'dash' before delete" + ); + + // Delete post1 (which has hashtags ["dash", "crypto", "blockchain"]) + let db_transaction = drive.grove.start_transaction(); + + // Post1 ID from setup_array_index_tests + let post1_id: [u8; 32] = bs58::decode("AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ") + .into_vec() + .expect("expected to decode") + .try_into() + .expect("expected 32 bytes"); + + drive + .delete_document_for_contract( + post1_id.into(), + &contract, + "post", + BlockInfo::genesis(), + true, + Some(&db_transaction), + platform_version, + None, + ) + .expect("expected to delete document"); + + drive + .grove + .commit_transaction(db_transaction) + .unwrap() + .expect("expected to commit"); + + // Now query again - should only find 1 post with "dash" (post3) + let (results_after_delete, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results_after_delete.len(), + 1, + "expected 1 post with 'dash' after deleting post1" + ); + + // Also verify "crypto" query - should now only return post2 + let query_crypto = json!({ + "where": [ + ["hashtags", "contains", "crypto"] + ], + "limit": 100, + "orderBy": [ + ["hashtags", "asc"] + ] + }); + + let where_cbor_crypto = cbor_serializer::serializable_value_to_cbor(&query_crypto, None) + .expect("expected to serialize to cbor"); + + let query_crypto = DriveDocumentQuery::from_cbor( + where_cbor_crypto.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results_crypto, _, _) = query_crypto + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results_crypto.len(), + 1, + "expected 1 post with 'crypto' after deleting post1" + ); + + // And "blockchain" should return 0 (only post1 had it) + let query_blockchain = json!({ + "where": [ + ["hashtags", "contains", "blockchain"] + ], + "limit": 100, + "orderBy": [ + ["hashtags", "asc"] + ] + }); + + let where_cbor_blockchain = + cbor_serializer::serializable_value_to_cbor(&query_blockchain, None) + .expect("expected to serialize to cbor"); + + let query_blockchain = DriveDocumentQuery::from_cbor( + where_cbor_blockchain.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results_blockchain, _, _) = query_blockchain + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results_blockchain.len(), + 0, + "expected 0 posts with 'blockchain' after deleting post1" + ); + + drop(drive); + } + + #[test] + fn should_update_array_index_entries_on_update() { + // Test that updating array fields correctly updates index entries + let platform_version = PlatformVersion::latest(); + let (drive, contract) = setup_array_index_tests(platform_version); + + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // First verify initial state - "defi" should return 1 post (post3) + let query_defi = json!({ + "where": [ + ["hashtags", "contains", "defi"] + ], + "limit": 100, + "orderBy": [ + ["hashtags", "asc"] + ] + }); + + let where_cbor_defi = cbor_serializer::serializable_value_to_cbor(&query_defi, None) + .expect("expected to serialize to cbor"); + + let query = DriveDocumentQuery::from_cbor( + where_cbor_defi.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results_before, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results_before.len(), + 1, + "expected 1 post with 'defi' before update" + ); + + // Update post3 to change hashtags from ["dash", "defi"] to ["dash", "nft"] + let db_transaction = drive.grove.start_transaction(); + + let updated_post3_value = json!({ + "$id": "CZ6uML3REtJiEzDyvTRuoaaPm1gN7pNVUss9bRYhRvT", + "$ownerId": "BdZVCSvAmUwryNsQqkqqD1a3BnFuzepGtR3Mhh2swLk7", + "content": "Post about NFTs on Dash", + "hashtags": ["dash", "nft"] + }); + let updated_post3_cbor = + cbor_serializer::serializable_value_to_cbor(&updated_post3_value, Some(0)) + .expect("expected to serialize to cbor"); + let updated_post3 = + Document::from_cbor(updated_post3_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + let storage_flags = Some(Cow::Owned(StorageFlags::SingleEpoch(0))); + + drive + .update_document_for_contract( + &updated_post3, + &contract, + post_type, + None, + BlockInfo::genesis(), + true, + storage_flags, + Some(&db_transaction), + platform_version, + None, + ) + .expect("expected to update document"); + + drive + .grove + .commit_transaction(db_transaction) + .unwrap() + .expect("expected to commit"); + + // "defi" should now return 0 posts + let (results_defi_after, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results_defi_after.len(), + 0, + "expected 0 posts with 'defi' after update" + ); + + // "nft" should now return 1 post + let query_nft = json!({ + "where": [ + ["hashtags", "contains", "nft"] + ], + "limit": 100, + "orderBy": [ + ["hashtags", "asc"] + ] + }); + + let where_cbor_nft = cbor_serializer::serializable_value_to_cbor(&query_nft, None) + .expect("expected to serialize to cbor"); + + let query_nft = DriveDocumentQuery::from_cbor( + where_cbor_nft.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results_nft, _, _) = query_nft + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results_nft.len(), + 1, + "expected 1 post with 'nft' after update" + ); + + // "dash" should still return 2 posts (post1 and updated post3) + let query_dash = json!({ + "where": [ + ["hashtags", "contains", "dash"] + ], + "limit": 100, + "orderBy": [ + ["hashtags", "asc"] + ] + }); + + let where_cbor_dash = cbor_serializer::serializable_value_to_cbor(&query_dash, None) + .expect("expected to serialize to cbor"); + + let query_dash = DriveDocumentQuery::from_cbor( + where_cbor_dash.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results_dash, _, _) = query_dash + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results_dash.len(), + 2, + "expected 2 posts with 'dash' after update (unchanged)" + ); + + drop(drive); + } + + #[test] + fn should_not_index_empty_arrays() { + // Test that documents with empty arrays don't create index entries + let platform_version = PlatformVersion::latest(); + let drive_config = DriveConfig::default(); + let drive = setup_drive(Some(drive_config)); + let db_transaction = drive.grove.start_transaction(); + + // Create contracts tree + let mut batch = GroveDbOpBatch::new(); + add_init_contracts_structure_operations(&mut batch); + + drive + .grove_apply_batch(batch, false, Some(&db_transaction), &platform_version.drive) + .expect("expected to create contracts tree successfully"); + + // Load the contract with optional hashtags (not required) + let contract = test_helpers::setup_contract( + &drive, + "tests/supporting_files/contract/array-index/array-index-with-types-contract.json", + None, + None, + None::, + Some(&db_transaction), + Some(platform_version), + ); + + let item_type = contract + .document_type_for_name("item") + .expect("expected to get item document type"); + + // Create document with empty tags array + let item_with_empty_array = json!({ + "$id": "AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "name": "Item with empty array", + "tags": [] + }); + let item_cbor = + cbor_serializer::serializable_value_to_cbor(&item_with_empty_array, Some(0)) + .expect("expected to serialize to cbor"); + let document = Document::from_cbor(item_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + let storage_flags = Some(Cow::Owned(StorageFlags::SingleEpoch(0))); + drive + .add_document_for_contract( + DocumentAndContractInfo { + owned_document_info: OwnedDocumentInfo { + document_info: DocumentRefInfo((&document, storage_flags.clone())), + owner_id: None, + }, + contract: &contract, + document_type: item_type, + }, + true, + BlockInfo::genesis(), + true, + Some(&db_transaction), + platform_version, + None, + ) + .expect("document with empty array should be inserted"); + + // Create a document with non-empty tags for comparison + let item_with_tags = json!({ + "$id": "BZ5tKL2QDsJhDzCxvSQtn99NkafM6aMUTrr8aQXgQuS", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1a3BnFuzepGtR3Mhh2swLk6", + "name": "Item with tags", + "tags": ["test", "example"] + }); + let item2_cbor = cbor_serializer::serializable_value_to_cbor(&item_with_tags, Some(0)) + .expect("expected to serialize to cbor"); + let document2 = Document::from_cbor(item2_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + drive + .add_document_for_contract( + DocumentAndContractInfo { + owned_document_info: OwnedDocumentInfo { + document_info: DocumentRefInfo((&document2, storage_flags)), + owner_id: None, + }, + contract: &contract, + document_type: item_type, + }, + true, + BlockInfo::genesis(), + true, + Some(&db_transaction), + platform_version, + None, + ) + .expect("document with tags should be inserted"); + + drive + .grove + .commit_transaction(db_transaction) + .unwrap() + .expect("transaction should be committed"); + + // Query for "test" should return 1 document (item_with_tags only) + let query_test = json!({ + "where": [ + ["tags", "contains", "test"] + ], + "limit": 100, + "orderBy": [ + ["tags", "asc"] + ] + }); + + let where_cbor_test = cbor_serializer::serializable_value_to_cbor(&query_test, None) + .expect("expected to serialize to cbor"); + + let query = DriveDocumentQuery::from_cbor( + where_cbor_test.as_slice(), + &contract, + item_type, + &drive.config, + ) + .expect("query should be built"); + + let (results, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results.len(), + 1, + "expected 1 item with 'test' tag (empty array item not included)" + ); + + drop(drive); + } + + #[test] + fn should_index_integer_array_elements() { + // Test array indexing with integer element type + let platform_version = PlatformVersion::latest(); + let drive_config = DriveConfig::default(); + let drive = setup_drive(Some(drive_config)); + let db_transaction = drive.grove.start_transaction(); + + // Create contracts tree + let mut batch = GroveDbOpBatch::new(); + add_init_contracts_structure_operations(&mut batch); + + drive + .grove_apply_batch(batch, false, Some(&db_transaction), &platform_version.drive) + .expect("expected to create contracts tree successfully"); + + let contract = test_helpers::setup_contract( + &drive, + "tests/supporting_files/contract/array-index/array-index-with-types-contract.json", + None, + None, + None::, + Some(&db_transaction), + Some(platform_version), + ); + + let item_type = contract + .document_type_for_name("item") + .expect("expected to get item document type"); + + // Create document with integer scores array including negative values + let item1 = json!({ + "$id": "AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "name": "Item 1", + "scores": [100, -50, 200] + }); + let item1_cbor = cbor_serializer::serializable_value_to_cbor(&item1, Some(0)) + .expect("expected to serialize to cbor"); + let document1 = Document::from_cbor(item1_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + let storage_flags = Some(Cow::Owned(StorageFlags::SingleEpoch(0))); + drive + .add_document_for_contract( + DocumentAndContractInfo { + owned_document_info: OwnedDocumentInfo { + document_info: DocumentRefInfo((&document1, storage_flags.clone())), + owner_id: None, + }, + contract: &contract, + document_type: item_type, + }, + true, + BlockInfo::genesis(), + true, + Some(&db_transaction), + platform_version, + None, + ) + .expect("document should be inserted"); + + // Create another document with overlapping scores + let item2 = json!({ + "$id": "BZ5tKL2QDsJhDzCxvSQtn99NkafM6aMUTrr8aQXgQuS", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1a3BnFuzepGtR3Mhh2swLk6", + "name": "Item 2", + "scores": [-50, 300] + }); + let item2_cbor = cbor_serializer::serializable_value_to_cbor(&item2, Some(0)) + .expect("expected to serialize to cbor"); + let document2 = Document::from_cbor(item2_cbor.as_slice(), None, None, platform_version) + .expect("document should be properly deserialized"); + + drive + .add_document_for_contract( + DocumentAndContractInfo { + owned_document_info: OwnedDocumentInfo { + document_info: DocumentRefInfo((&document2, storage_flags)), + owner_id: None, + }, + contract: &contract, + document_type: item_type, + }, + true, + BlockInfo::genesis(), + true, + Some(&db_transaction), + platform_version, + None, + ) + .expect("document should be inserted"); + + drive + .grove + .commit_transaction(db_transaction) + .unwrap() + .expect("transaction should be committed"); + + // Query for items with score -50 (should return 2) + let query_negative = json!({ + "where": [ + ["scores", "contains", -50] + ], + "limit": 100, + "orderBy": [ + ["scores", "asc"] + ] + }); + + let where_cbor = cbor_serializer::serializable_value_to_cbor(&query_negative, None) + .expect("expected to serialize to cbor"); + + let query = DriveDocumentQuery::from_cbor( + where_cbor.as_slice(), + &contract, + item_type, + &drive.config, + ) + .expect("query should be built"); + + let (results, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!(results.len(), 2, "expected 2 items with score -50"); + + // Query for items with score 100 (should return 1) + let query_positive = json!({ + "where": [ + ["scores", "contains", 100] + ], + "limit": 100, + "orderBy": [ + ["scores", "asc"] + ] + }); + + let where_cbor_positive = + cbor_serializer::serializable_value_to_cbor(&query_positive, None) + .expect("expected to serialize to cbor"); + + let query_positive = DriveDocumentQuery::from_cbor( + where_cbor_positive.as_slice(), + &contract, + item_type, + &drive.config, + ) + .expect("query should be built"); + + let (results_positive, _, _) = query_positive + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!(results_positive.len(), 1, "expected 1 item with score 100"); + + drop(drive); + } + + #[test] + fn should_query_compound_index_with_array() { + // Test compound index queries with array field (using existing contract) + let platform_version = PlatformVersion::latest(); + let (drive, contract) = setup_array_index_tests(platform_version); + + let post_type = contract + .document_type_for_name("post") + .expect("expected to get post document type"); + + // The contract has a compound index on [$ownerId, hashtags] + // Post1 owner: AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6, hashtags: ["dash", "crypto", "blockchain"] + // Post2 owner: AcYUCSvAmUwryNsQqkqqD1a3BnFuzepGtR3Mhh2swLk6, hashtags: ["tech", "crypto"] + // Post3 owner: BdZVCSvAmUwryNsQqkqqD1a3BnFuzepGtR3Mhh2swLk7, hashtags: ["dash", "defi"] + + // Query using compound index: owner + contains + // Owner1 ID as string (base58 format used in queries) + let owner1_id = "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6"; + + let query_compound = json!({ + "where": [ + ["$ownerId", "==", owner1_id], + ["hashtags", "contains", "dash"] + ], + "limit": 100, + "orderBy": [ + ["$ownerId", "asc"], + ["hashtags", "asc"] + ] + }); + + let where_cbor = cbor_serializer::serializable_value_to_cbor(&query_compound, None) + .expect("expected to serialize to cbor"); + + let query = DriveDocumentQuery::from_cbor( + where_cbor.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results, _, _) = query + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + // Only post1 belongs to owner1 and has "dash" hashtag + assert_eq!( + results.len(), + 1, + "expected 1 post from owner1 with 'dash' hashtag" + ); + + // Query for owner1 with "crypto" - should also return 1 (post1) + let query_owner1_crypto = json!({ + "where": [ + ["$ownerId", "==", owner1_id], + ["hashtags", "contains", "crypto"] + ], + "limit": 100, + "orderBy": [ + ["$ownerId", "asc"], + ["hashtags", "asc"] + ] + }); + + let where_cbor_crypto = + cbor_serializer::serializable_value_to_cbor(&query_owner1_crypto, None) + .expect("expected to serialize to cbor"); + + let query_crypto = DriveDocumentQuery::from_cbor( + where_cbor_crypto.as_slice(), + &contract, + post_type, + &drive.config, + ) + .expect("query should be built"); + + let (results_crypto, _, _) = query_crypto + .execute_raw_results_no_proof(&drive, None, None, platform_version) + .expect("query should execute"); + + assert_eq!( + results_crypto.len(), + 1, + "expected 1 post from owner1 with 'crypto' hashtag" + ); + + drop(drive); + } +} diff --git a/packages/rs-drive/tests/supporting_files/contract/array-index/array-index-contract.json b/packages/rs-drive/tests/supporting_files/contract/array-index/array-index-contract.json new file mode 100644 index 00000000000..eb352d253f3 --- /dev/null +++ b/packages/rs-drive/tests/supporting_files/contract/array-index/array-index-contract.json @@ -0,0 +1,58 @@ +{ + "$format_version": "0", + "id": "8NRsVP68gRuYYwE6djM5jD3GwCGvUqUxRKTdaQRnqsPe", + "ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "version": 1, + "documentSchemas": { + "post": { + "type": "object", + "indices": [ + { + "properties": [ + { + "$ownerId": "asc" + } + ] + }, + { + "properties": [ + { + "hashtags": "asc" + } + ] + }, + { + "properties": [ + { + "$ownerId": "asc" + }, + { + "hashtags": "asc" + } + ] + } + ], + "properties": { + "content": { + "type": "string", + "maxLength": 280, + "position": 0 + }, + "hashtags": { + "type": "array", + "items": { + "type": "string", + "maxLength": 50 + }, + "maxItems": 10, + "position": 1 + } + }, + "required": [ + "content", + "hashtags" + ], + "additionalProperties": false + } + } +} diff --git a/packages/rs-drive/tests/supporting_files/contract/array-index/array-index-with-types-contract.json b/packages/rs-drive/tests/supporting_files/contract/array-index/array-index-with-types-contract.json new file mode 100644 index 00000000000..f136c00fcd4 --- /dev/null +++ b/packages/rs-drive/tests/supporting_files/contract/array-index/array-index-with-types-contract.json @@ -0,0 +1,82 @@ +{ + "$format_version": "0", + "id": "9NRsVP68gRuYYwE6djM5jD3GwCGvUqUxRKTdaQRnqsPf", + "ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "version": 1, + "documentSchemas": { + "item": { + "type": "object", + "indices": [ + { + "properties": [ + { + "$ownerId": "asc" + } + ] + }, + { + "properties": [ + { + "tags": "asc" + } + ] + }, + { + "properties": [ + { + "scores": "asc" + } + ] + }, + { + "properties": [ + { + "identifiers": "asc" + } + ] + } + ], + "properties": { + "name": { + "type": "string", + "maxLength": 100, + "position": 0 + }, + "tags": { + "type": "array", + "items": { + "type": "string", + "maxLength": 50 + }, + "maxItems": 10, + "position": 1 + }, + "scores": { + "type": "array", + "items": { + "type": "integer", + "minimum": -1000000, + "maximum": 1000000 + }, + "maxItems": 10, + "position": 2 + }, + "identifiers": { + "type": "array", + "items": { + "type": "array", + "byteArray": true, + "minItems": 32, + "maxItems": 32 + }, + "maxItems": 10, + "position": 3 + } + }, + "required": [ + "name" + ], + "additionalProperties": false + } + } +} diff --git a/packages/rs-drive/tests/supporting_files/contract/array-index/post0.json b/packages/rs-drive/tests/supporting_files/contract/array-index/post0.json new file mode 100644 index 00000000000..c8ae73ef546 --- /dev/null +++ b/packages/rs-drive/tests/supporting_files/contract/array-index/post0.json @@ -0,0 +1,6 @@ +{ + "$id": "AZ4sJK1PCrHgCyBwvSPsm98Nj9eL5LmTLSqp7ZPWfPtQ", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "content": "Hello world from Dash Platform! #dash #crypto #blockchain", + "hashtags": ["dash", "crypto", "blockchain"] +} diff --git a/packages/rs-drive/tests/supporting_files/contract/array-index/post1.json b/packages/rs-drive/tests/supporting_files/contract/array-index/post1.json new file mode 100644 index 00000000000..251b5e20103 --- /dev/null +++ b/packages/rs-drive/tests/supporting_files/contract/array-index/post1.json @@ -0,0 +1,6 @@ +{ + "$id": "BZ5tKL2QDsIhDzCxvSQtn99Ok0fM6oMUTrr8aQXgQuS", + "$ownerId": "AcYUCSvAmUwryNsQqkqqD1o3BnFuzepGtR3Mhh2swLk6", + "content": "Another post about tech and crypto", + "hashtags": ["tech", "crypto"] +} diff --git a/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/mod.rs b/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/mod.rs index 22148d7eeaf..cc6d7eeb803 100644 --- a/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/mod.rs +++ b/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/mod.rs @@ -21,5 +21,6 @@ pub struct DocumentMethodVersions { pub hash: FeatureVersion, pub get_raw_for_contract: FeatureVersion, pub get_raw_for_document_type: FeatureVersion, + pub get_raw_array_elements_for_document_type: FeatureVersion, pub try_into_asset_unlock_base_transaction_info: FeatureVersion, } diff --git a/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v1.rs b/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v1.rs index 8911129ab66..568949f5e99 100644 --- a/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v1.rs +++ b/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v1.rs @@ -26,6 +26,7 @@ pub const DOCUMENT_VERSIONS_V1: DPPDocumentVersions = DPPDocumentVersions { hash: 0, get_raw_for_contract: 0, get_raw_for_document_type: 0, + get_raw_array_elements_for_document_type: 0, try_into_asset_unlock_base_transaction_info: 0, }, }; diff --git a/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v2.rs b/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v2.rs index 76116755df8..3a51fc49e4a 100644 --- a/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v2.rs +++ b/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v2.rs @@ -26,6 +26,7 @@ pub const DOCUMENT_VERSIONS_V2: DPPDocumentVersions = DPPDocumentVersions { hash: 0, get_raw_for_contract: 0, get_raw_for_document_type: 0, + get_raw_array_elements_for_document_type: 0, try_into_asset_unlock_base_transaction_info: 0, }, }; diff --git a/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v3.rs b/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v3.rs index 67837019e8b..920fc536e6c 100644 --- a/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v3.rs +++ b/packages/rs-platform-version/src/version/dpp_versions/dpp_document_versions/v3.rs @@ -26,6 +26,7 @@ pub const DOCUMENT_VERSIONS_V3: DPPDocumentVersions = DPPDocumentVersions { hash: 0, get_raw_for_contract: 0, get_raw_for_document_type: 0, + get_raw_array_elements_for_document_type: 0, try_into_asset_unlock_base_transaction_info: 0, }, };