diff --git a/docs/configuration/index.md b/docs/configuration/index.md index ece6293e5c5e..76bac1e5905e 100644 --- a/docs/configuration/index.md +++ b/docs/configuration/index.md @@ -1431,6 +1431,7 @@ Additional Peon configs include: |`druid.indexer.task.tmpStorageBytesPerTask`|Maximum number of bytes per task to be used to store temporary files on disk. This config is generally intended for internal usage. Attempts to set it are very likely to be overwritten by the TaskRunner that executes the task, so be sure of what you expect to happen before directly adjusting this configuration parameter. The config is documented here primarily to provide an understanding of what it means if/when someone sees that it has been set. A value of -1 disables this limit. |-1| |`druid.indexer.task.allowHadoopTaskExecution`|Conditional dictating if the cluster allows `index_hadoop` tasks to be executed. `index_hadoop` is deprecated, and defaulting to false will force cluster operators to acknowledge the deprecation and consciously opt in to using index_hadoop with the understanding that it will be removed in the future.|false| |`druid.indexer.server.maxChatRequests`|Maximum number of concurrent requests served by a task's chat handler. Set to 0 to disable limiting.|0| +|`druid.indexing.formats.maxStringLength`|Maximum number of characters to store per string dimension value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Set to 0 to disable. Can be overridden per-dimension using `maxStringLength` in the [dimension object](../ingestion/ingestion-spec.md#dimension-objects).|0 (no truncation)| If the Peon is running in remote mode, there must be an Overlord up and running. Peons in remote mode can set the following configurations: diff --git a/docs/ingestion/ingestion-spec.md b/docs/ingestion/ingestion-spec.md index 496c687bec13..0e1df8e7c995 100644 --- a/docs/ingestion/ingestion-spec.md +++ b/docs/ingestion/ingestion-spec.md @@ -247,6 +247,7 @@ Dimension objects can have the following components: | name | The name of the dimension. This will be used as the field name to read from input records, as well as the column name stored in generated segments.

Note that you can use a [`transformSpec`](#transformspec) if you want to rename columns during ingestion time. | none (required) | | createBitmapIndex | For `string` typed dimensions, whether or not bitmap indexes should be created for the column in generated segments. Creating a bitmap index requires more storage, but speeds up certain kinds of filtering (especially equality and prefix filtering). Only supported for `string` typed dimensions. | `true` | | multiValueHandling | For `string` typed dimensions, specifies the type of handling for [multi-value fields](../querying/multi-value-dimensions.md). Possible values are `array` (ingest string arrays as-is), `sorted_array` (sort string arrays during ingestion), and `sorted_set` (sort and de-duplicate string arrays during ingestion). This parameter is ignored for types other than `string`. | `sorted_array` | +| maxStringLength | For `string` typed dimensions, the maximum number of characters to store per value. Longer values are truncated during ingestion. Does not apply to multi-value string dimensions. Set to 0 to disable. Overrides the global [`druid.indexing.formats.maxStringLength`](../configuration/index.md#additional-peon-configuration) property. | `0` (no truncation) | #### Inclusions and exclusions diff --git a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java index bd5314c636b9..ab00952e867a 100644 --- a/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java +++ b/processing/src/main/java/org/apache/druid/data/input/impl/StringDimensionSchema.java @@ -21,15 +21,26 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonInclude; import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.druid.guice.BuiltInTypesModule; import org.apache.druid.segment.DimensionHandler; import org.apache.druid.segment.StringDimensionHandler; import org.apache.druid.segment.column.ColumnType; +import javax.annotation.Nullable; + public class StringDimensionSchema extends DimensionSchema { private static final boolean DEFAULT_CREATE_BITMAP_INDEX = true; + public static int getDefaultMaxStringLength() + { + return BuiltInTypesModule.getMaxStringLength(); + } + + private final int maxStringLength; + @JsonCreator public static StringDimensionSchema create(String name) { @@ -40,15 +51,33 @@ public static StringDimensionSchema create(String name) public StringDimensionSchema( @JsonProperty("name") String name, @JsonProperty("multiValueHandling") MultiValueHandling multiValueHandling, - @JsonProperty("createBitmapIndex") Boolean createBitmapIndex + @JsonProperty("createBitmapIndex") Boolean createBitmapIndex, + @JsonProperty("maxStringLength") @Nullable Integer maxStringLength ) { super(name, multiValueHandling, createBitmapIndex == null ? DEFAULT_CREATE_BITMAP_INDEX : createBitmapIndex); + this.maxStringLength = maxStringLength != null && maxStringLength > 0 ? maxStringLength : getDefaultMaxStringLength(); + } + + public StringDimensionSchema( + String name, + MultiValueHandling multiValueHandling, + Boolean createBitmapIndex + ) + { + this(name, multiValueHandling, createBitmapIndex, getDefaultMaxStringLength()); } public StringDimensionSchema(String name) { - this(name, null, DEFAULT_CREATE_BITMAP_INDEX); + this(name, null, DEFAULT_CREATE_BITMAP_INDEX, getDefaultMaxStringLength()); + } + + @JsonProperty + @JsonInclude(JsonInclude.Include.NON_DEFAULT) + public int getMaxStringLength() + { + return maxStringLength; } @Override @@ -73,6 +102,6 @@ public boolean canBeMultiValued() @Override public DimensionHandler getDimensionHandler() { - return new StringDimensionHandler(getName(), getMultiValueHandling(), hasBitmapIndex(), false); + return new StringDimensionHandler(getName(), getMultiValueHandling(), hasBitmapIndex(), false, maxStringLength); } } diff --git a/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java b/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java index 71433b5cce48..e260a4bd8b66 100644 --- a/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java +++ b/processing/src/main/java/org/apache/druid/guice/BuiltInTypesModule.java @@ -53,6 +53,7 @@ public class BuiltInTypesModule implements DruidModule */ private static DimensionSchema.MultiValueHandling STRING_MV_MODE = DimensionSchema.MultiValueHandling.SORTED_ARRAY; private static IndexSpec DEFAULT_INDEX_SPEC = IndexSpec.builder().build(); + private static int MAX_STRING_LENGTH = 0; /** * @return the configured string multi value handling mode from the system config if set; otherwise, returns @@ -89,6 +90,7 @@ public void configure(Binder binder) public SideEffectRegisterer initDimensionHandlerAndMvHandlingMode(DefaultColumnFormatConfig formatsConfig) { setStringMultiValueHandlingModeIfConfigured(formatsConfig.getStringMultiValueHandlingMode()); + setMaxStringLengthIfConfigured(formatsConfig.getMaxStringLength()); setIndexSpecDefaults(formatsConfig.getIndexSpec()); setNestedColumnDefaults(formatsConfig); @@ -128,6 +130,24 @@ private static void registerSerde() } } + private static void setMaxStringLengthIfConfigured(@Nullable Integer maxStringLength) + { + if (maxStringLength != null) { + MAX_STRING_LENGTH = maxStringLength; + } + } + + @VisibleForTesting + public static void setMaxStringLength(int maxStringLength) + { + MAX_STRING_LENGTH = maxStringLength; + } + + public static int getMaxStringLength() + { + return MAX_STRING_LENGTH; + } + private static void setStringMultiValueHandlingModeIfConfigured(@Nullable String stringMultiValueHandlingMode) { if (stringMultiValueHandlingMode != null) { diff --git a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java index 210ec5c686b0..19b875b5f6cb 100644 --- a/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java +++ b/processing/src/main/java/org/apache/druid/segment/DefaultColumnFormatConfig.java @@ -68,6 +68,21 @@ private static String validateMultiValueHandlingMode( return stringMultiValueHandlingMode; } + @Nullable + private static Integer validateMaxStringLength(@Nullable Integer maxStringLength) + { + if (maxStringLength != null && maxStringLength <= 0) { + throw DruidException.forPersona(DruidException.Persona.OPERATOR) + .ofCategory(DruidException.Category.INVALID_INPUT) + .build( + "Invalid value[%s] specified for 'druid.indexing.formats.maxStringLength'." + + " Value must be a positive integer.", + maxStringLength + ); + } + return maxStringLength; + } + @JsonProperty("stringMultiValueHandlingMode") @Nullable private final Integer nestedColumnFormatVersion; @@ -80,11 +95,16 @@ private static String validateMultiValueHandlingMode( @Nullable private final IndexSpec indexSpec; + @JsonProperty("maxStringLength") + @Nullable + private final Integer maxStringLength; + @JsonCreator public DefaultColumnFormatConfig( @JsonProperty("stringMultiValueHandlingMode") @Nullable String stringMultiValueHandlingMode, @JsonProperty("nestedColumnFormatVersion") @Nullable Integer nestedColumnFormatVersion, - @JsonProperty("indexSpec") @Nullable IndexSpec indexSpec + @JsonProperty("indexSpec") @Nullable IndexSpec indexSpec, + @JsonProperty("maxStringLength") @Nullable Integer maxStringLength ) { validateMultiValueHandlingMode(stringMultiValueHandlingMode); @@ -93,6 +113,7 @@ public DefaultColumnFormatConfig( this.stringMultiValueHandlingMode = validateMultiValueHandlingMode(stringMultiValueHandlingMode); this.nestedColumnFormatVersion = nestedColumnFormatVersion; this.indexSpec = indexSpec; + this.maxStringLength = validateMaxStringLength(maxStringLength); } @Nullable @@ -116,6 +137,13 @@ public IndexSpec getIndexSpec() return indexSpec; } + @Nullable + @JsonProperty("maxStringLength") + public Integer getMaxStringLength() + { + return maxStringLength; + } + @Override public boolean equals(Object o) { @@ -128,13 +156,14 @@ public boolean equals(Object o) DefaultColumnFormatConfig that = (DefaultColumnFormatConfig) o; return Objects.equals(nestedColumnFormatVersion, that.nestedColumnFormatVersion) && Objects.equals(stringMultiValueHandlingMode, that.stringMultiValueHandlingMode) - && Objects.equals(indexSpec, that.indexSpec); + && Objects.equals(indexSpec, that.indexSpec) + && Objects.equals(maxStringLength, that.maxStringLength); } @Override public int hashCode() { - return Objects.hash(nestedColumnFormatVersion, stringMultiValueHandlingMode, indexSpec); + return Objects.hash(nestedColumnFormatVersion, stringMultiValueHandlingMode, indexSpec, maxStringLength); } @Override @@ -144,6 +173,7 @@ public String toString() "stringMultiValueHandlingMode=" + stringMultiValueHandlingMode + ", nestedColumnFormatVersion=" + nestedColumnFormatVersion + ", indexSpec=" + indexSpec + + ", maxStringLength=" + maxStringLength + '}'; } } diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java index f20ed3bbc1ac..d2b41ab7a4ba 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionHandler.java @@ -104,6 +104,7 @@ private static IndexedInts getRow(ColumnValueSelector s) private final MultiValueHandling multiValueHandling; private final boolean hasBitmapIndexes; private final boolean hasSpatialIndexes; + private final int maxStringLength; public StringDimensionHandler( String dimensionName, @@ -111,11 +112,23 @@ public StringDimensionHandler( boolean hasBitmapIndexes, boolean hasSpatialIndexes ) + { + this(dimensionName, multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, StringDimensionSchema.getDefaultMaxStringLength()); + } + + public StringDimensionHandler( + String dimensionName, + MultiValueHandling multiValueHandling, + boolean hasBitmapIndexes, + boolean hasSpatialIndexes, + int maxStringLength + ) { this.dimensionName = dimensionName; this.multiValueHandling = multiValueHandling; this.hasBitmapIndexes = hasBitmapIndexes; this.hasSpatialIndexes = hasSpatialIndexes; + this.maxStringLength = maxStringLength; } @Override @@ -160,7 +173,7 @@ public SettableColumnValueSelector makeNewSettableEncodedValueSelector() @Override public DimensionIndexer makeIndexer() { - return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes); + return new StringDimensionIndexer(multiValueHandling, hasBitmapIndexes, hasSpatialIndexes, maxStringLength); } @Override diff --git a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java index d1ce3cf48d03..d41fe6fea980 100644 --- a/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java +++ b/processing/src/main/java/org/apache/druid/segment/StringDimensionIndexer.java @@ -24,6 +24,7 @@ import org.apache.druid.collections.bitmap.BitmapFactory; import org.apache.druid.collections.bitmap.MutableBitmap; import org.apache.druid.data.input.impl.DimensionSchema.MultiValueHandling; +import org.apache.druid.data.input.impl.StringDimensionSchema; import org.apache.druid.error.DruidException; import org.apache.druid.java.util.common.ISE; import org.apache.druid.java.util.common.StringUtils; @@ -57,6 +58,7 @@ public class StringDimensionIndexer extends DictionaryEncodedColumnIndexer 0 && value != null && value.length() > maxStringLength) { + return value.substring(0, maxStringLength); + } + return value; } @Override @@ -92,7 +113,7 @@ public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent(@N dimLookup.add(null); encodedDimensionValues = IntArrays.EMPTY_ARRAY; } else if (dimValuesList.size() == 1) { - encodedDimensionValues = new int[]{dimLookup.add(Evals.asString(dimValuesList.get(0)))}; + encodedDimensionValues = new int[]{dimLookup.add(truncateIfNeeded(Evals.asString(dimValuesList.get(0))))}; } else { hasMultipleValues = true; final String[] dimensionValues = new String[dimValuesList.size()]; @@ -125,7 +146,7 @@ public EncodedKeyComponent processRowValsToUnsortedEncodedKeyComponent(@N encodedDimensionValues = new int[]{dimLookup.add(Evals.asString(StringUtils.encodeBase64String((byte[]) dimValues)))}; } else { - encodedDimensionValues = new int[]{dimLookup.add(Evals.asString(dimValues))}; + encodedDimensionValues = new int[]{dimLookup.add(truncateIfNeeded(Evals.asString(dimValues)))}; } // If dictionary size has changed, the sorted lookup is no longer valid. diff --git a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java index cfc9006fe57e..3354ac8b82a1 100644 --- a/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java +++ b/processing/src/test/java/org/apache/druid/data/input/impl/StringDimensionSchemaTest.java @@ -54,9 +54,11 @@ public void testDeserializeFromJson() throws JsonProcessingException final String json = "{\n" + " \"name\" : \"dim\",\n" + " \"multiValueHandling\" : \"SORTED_SET\",\n" - + " \"createBitmapIndex\" : false\n" + + " \"createBitmapIndex\" : false,\n" + + " \"maxStringLength\" : 200\n" + "}"; final StringDimensionSchema schema = (StringDimensionSchema) jsonMapper.readValue(json, DimensionSchema.class); Assert.assertEquals(new StringDimensionSchema("dim", MultiValueHandling.SORTED_SET, false), schema); + Assert.assertEquals(200, schema.getMaxStringLength()); } } diff --git a/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java b/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java index fc43eb2c5a66..189a8a2bdf38 100644 --- a/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java +++ b/processing/src/test/java/org/apache/druid/guice/BuiltInTypesModuleTest.java @@ -33,9 +33,9 @@ import org.apache.druid.segment.data.ConciseBitmapSerdeFactory; import org.apache.druid.segment.nested.NestedCommonFormatColumnFormatSpec; import org.apache.druid.segment.nested.NestedDataComplexTypeSerde; +import org.junit.After; import org.junit.AfterClass; import org.junit.Test; -import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeAll; @@ -56,10 +56,11 @@ public static void setup() DimensionHandlerUtils.DIMENSION_HANDLER_PROVIDERS.remove(NestedDataComplexTypeSerde.TYPE_NAME); } - @AfterEach - public void beforeEach() + @After + public void teardownEach() { BuiltInTypesModule.setIndexSpecDefaults(IndexSpec.builder().build()); + BuiltInTypesModule.setMaxStringLength(0); } @AfterClass @@ -74,6 +75,7 @@ public static void teardown() ); } BuiltInTypesModule.setIndexSpecDefaults(IndexSpec.builder().build()); + BuiltInTypesModule.setMaxStringLength(0); } @Test @@ -95,6 +97,8 @@ public void testDefaults() DimensionSchema.MultiValueHandling.SORTED_ARRAY, BuiltInTypesModule.getStringMultiValueHandlingMode() ); + + Assertions.assertEquals(0, BuiltInTypesModule.getMaxStringLength()); } @Test @@ -174,6 +178,34 @@ public void testInvalidMultiValueHandlingMode() )); } + @Test + public void testMaxStringLengthOverride() + { + final Properties props = new Properties(); + props.setProperty("druid.indexing.formats.maxStringLength", "500"); + final Injector gadget = makeInjector(props); + + gadget.getInstance(BuiltInTypesModule.SideEffectRegisterer.class); + + Assertions.assertEquals(500, BuiltInTypesModule.getMaxStringLength()); + } + + @Test + public void testInvalidMaxStringLength() + { + final Properties props = new Properties(); + props.setProperty("druid.indexing.formats.maxStringLength", "-1"); + final Injector gadget = makeInjector(props); + + final Exception exception = Assertions.assertThrows( + Exception.class, + () -> gadget.getInstance(BuiltInTypesModule.SideEffectRegisterer.class) + ); + Assertions.assertTrue(exception.getMessage().contains( + "Invalid value[-1] specified for 'druid.indexing.formats.maxStringLength'" + )); + } + private Injector makeInjector(Properties props) { diff --git a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java index 8d5e66589c3c..cd6cd073ac7d 100644 --- a/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java +++ b/processing/src/test/java/org/apache/druid/query/scan/NestedDataScanQueryTest.java @@ -76,7 +76,7 @@ public class NestedDataScanQueryTest extends InitializedNullHandlingTest { private static final Logger LOG = new Logger(NestedDataScanQueryTest.class); - DefaultColumnFormatConfig DEFAULT_FORMAT = new DefaultColumnFormatConfig(null, null, null); + DefaultColumnFormatConfig DEFAULT_FORMAT = new DefaultColumnFormatConfig(null, null, null, null); @Rule public final TemporaryFolder tempFolder = new TemporaryFolder(); diff --git a/processing/src/test/java/org/apache/druid/segment/DefaultColumnFormatsConfigTest.java b/processing/src/test/java/org/apache/druid/segment/DefaultColumnFormatsConfigTest.java index 41af9742bfa9..5c787e7666df 100644 --- a/processing/src/test/java/org/apache/druid/segment/DefaultColumnFormatsConfigTest.java +++ b/processing/src/test/java/org/apache/druid/segment/DefaultColumnFormatsConfigTest.java @@ -34,7 +34,7 @@ public class DefaultColumnFormatsConfigTest @Test public void testDefaultsSerde() throws JsonProcessingException { - DefaultColumnFormatConfig defaultColumnFormatConfig = new DefaultColumnFormatConfig(null, null, null); + DefaultColumnFormatConfig defaultColumnFormatConfig = new DefaultColumnFormatConfig(null, null, null, null); String there = MAPPER.writeValueAsString(defaultColumnFormatConfig); DefaultColumnFormatConfig andBack = MAPPER.readValue(there, DefaultColumnFormatConfig.class); Assert.assertEquals(defaultColumnFormatConfig, andBack); @@ -45,12 +45,13 @@ public void testDefaultsSerde() throws JsonProcessingException @Test public void testDefaultsSerdeOverride() throws JsonProcessingException { - DefaultColumnFormatConfig defaultColumnFormatConfig = new DefaultColumnFormatConfig("ARRAY", 5, null); + DefaultColumnFormatConfig defaultColumnFormatConfig = new DefaultColumnFormatConfig("ARRAY", 5, null, null); String there = MAPPER.writeValueAsString(defaultColumnFormatConfig); DefaultColumnFormatConfig andBack = MAPPER.readValue(there, DefaultColumnFormatConfig.class); Assert.assertEquals(defaultColumnFormatConfig, andBack); Assert.assertEquals(5, (int) andBack.getNestedColumnFormatVersion()); Assert.assertEquals(DimensionSchema.MultiValueHandling.ARRAY.toString(), andBack.getStringMultiValueHandlingMode()); + Assert.assertNull(andBack.getMaxStringLength()); } @Test diff --git a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java index e627827960b1..2be76c4e1f58 100644 --- a/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java +++ b/processing/src/test/java/org/apache/druid/segment/NestedDataColumnSchemaTest.java @@ -33,7 +33,7 @@ public class NestedDataColumnSchemaTest { - private static final DefaultColumnFormatConfig DEFAULT_CONFIG = new DefaultColumnFormatConfig(null, null, null); + private static final DefaultColumnFormatConfig DEFAULT_CONFIG = new DefaultColumnFormatConfig(null, null, null, null); private static final NestedCommonFormatColumnFormatSpec DEFAULT_NESTED_SPEC = NestedCommonFormatColumnFormatSpec.builder() .setObjectFieldsDictionaryEncoding( @@ -47,7 +47,8 @@ public class NestedDataColumnSchemaTest private static final DefaultColumnFormatConfig DEFAULT_NESTED_SPEC_CONFIG = new DefaultColumnFormatConfig( null, null, - IndexSpec.builder().withAutoColumnFormatSpec(DEFAULT_NESTED_SPEC).build() + IndexSpec.builder().withAutoColumnFormatSpec(DEFAULT_NESTED_SPEC).build(), + null ); private static final ObjectMapper MAPPER; diff --git a/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java b/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java index 7e8adc577b0f..da3361c155b8 100644 --- a/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java +++ b/processing/src/test/java/org/apache/druid/segment/StringDimensionIndexerTest.java @@ -26,6 +26,7 @@ import org.junit.Test; import java.util.Arrays; +import java.util.Collections; /** * Unit tests for {@link StringDimensionIndexer}. @@ -140,6 +141,63 @@ public void testBinaryInputs() ); } + @Test + public void testTruncation() + { + final StringDimensionIndexer indexer = new StringDimensionIndexer( + DimensionSchema.MultiValueHandling.SORTED_ARRAY, + true, + false, + 5 + ); + + EncodedKeyComponent keyComponent = indexer.processRowValsToUnsortedEncodedKeyComponent("abcdefghij", false); + Assert.assertEquals( + "abcde", + indexer.convertUnsortedEncodedKeyComponentToActualList(keyComponent.getComponent()) + ); + } + + @Test + public void testSingleValueMvdTruncated() + { + final StringDimensionIndexer indexer = new StringDimensionIndexer( + DimensionSchema.MultiValueHandling.SORTED_ARRAY, + true, + false, + 5 + ); + + EncodedKeyComponent keyComponent = indexer.processRowValsToUnsortedEncodedKeyComponent( + Collections.singletonList("abcdefghij"), + false + ); + Assert.assertEquals( + "abcde", + indexer.convertUnsortedEncodedKeyComponentToActualList(keyComponent.getComponent()) + ); + } + + @Test + public void testMultiValueNotTruncated() + { + final StringDimensionIndexer indexer = new StringDimensionIndexer( + DimensionSchema.MultiValueHandling.SORTED_ARRAY, + true, + false, + 5 + ); + + EncodedKeyComponent keyComponent = indexer.processRowValsToUnsortedEncodedKeyComponent( + Arrays.asList("abcdefghij", "klmnopqrst"), + false + ); + Assert.assertEquals( + Arrays.asList("abcdefghij", "klmnopqrst"), + indexer.convertUnsortedEncodedKeyComponentToActualList(keyComponent.getComponent()) + ); + } + private long verifyEncodedValues( StringDimensionIndexer indexer, Object dimensionValues, diff --git a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java index 186d109b1f53..e713531d3591 100644 --- a/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java +++ b/services/src/test/java/org/apache/druid/cli/DumpSegmentTest.java @@ -132,10 +132,10 @@ public void testDumpRows() throws Exception new InjectableValues.Std() .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) .addValue(ObjectMapper.class.getName(), mapper) - .addValue(DefaultColumnFormatConfig.class, new DefaultColumnFormatConfig(null, null, null)) + .addValue(DefaultColumnFormatConfig.class, new DefaultColumnFormatConfig(null, null, null, null)) ); Mockito.when(injector.getInstance(Key.get(ObjectMapper.class, Json.class))).thenReturn(mapper); - Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new DefaultColumnFormatConfig(null, null, null)); + Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new DefaultColumnFormatConfig(null, null, null, null)); List segments = createSegments(tempFolder, closer); QueryableIndex queryableIndex = segments.get(0).as(QueryableIndex.class); @@ -206,10 +206,10 @@ public void testDumpNestedColumn() throws Exception new InjectableValues.Std() .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) .addValue(ObjectMapper.class.getName(), mapper) - .addValue(DefaultColumnFormatConfig.class, new DefaultColumnFormatConfig(null, null, null)) + .addValue(DefaultColumnFormatConfig.class, new DefaultColumnFormatConfig(null, null, null, null)) ); Mockito.when(injector.getInstance(Key.get(ObjectMapper.class, Json.class))).thenReturn(mapper); - Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new DefaultColumnFormatConfig(null, null, null)); + Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new DefaultColumnFormatConfig(null, null, null, null)); List segments = createSegments(tempFolder, closer); QueryableIndex queryableIndex = segments.get(0).as(QueryableIndex.class); @@ -239,10 +239,10 @@ public void testDumpNestedColumnPath() throws Exception new InjectableValues.Std() .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) .addValue(ObjectMapper.class.getName(), mapper) - .addValue(DefaultColumnFormatConfig.class, new DefaultColumnFormatConfig(null, null, null)) + .addValue(DefaultColumnFormatConfig.class, new DefaultColumnFormatConfig(null, null, null, null)) ); Mockito.when(injector.getInstance(Key.get(ObjectMapper.class, Json.class))).thenReturn(mapper); - Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new DefaultColumnFormatConfig(null, null, null)); + Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new DefaultColumnFormatConfig(null, null, null, null)); List segments = createSegments(tempFolder, closer); QueryableIndex queryableIndex = segments.get(0).as(QueryableIndex.class); @@ -288,10 +288,10 @@ public void testDumpV10Metadata() throws IOException new InjectableValues.Std() .addValue(ExprMacroTable.class.getName(), TestExprMacroTable.INSTANCE) .addValue(ObjectMapper.class.getName(), mapper) - .addValue(DefaultColumnFormatConfig.class, new DefaultColumnFormatConfig(null, null, null)) + .addValue(DefaultColumnFormatConfig.class, new DefaultColumnFormatConfig(null, null, null, null)) ); Mockito.when(injector.getInstance(Key.get(ObjectMapper.class, Json.class))).thenReturn(mapper); - Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new DefaultColumnFormatConfig(null, null, null)); + Mockito.when(injector.getInstance(DefaultColumnFormatConfig.class)).thenReturn(new DefaultColumnFormatConfig(null, null, null, null)); File f = buildV10Segment(); diff --git a/website/.spelling b/website/.spelling index f2b87496eb2b..aff868c497f3 100644 --- a/website/.spelling +++ b/website/.spelling @@ -426,6 +426,7 @@ maxBytes maxNumericInFilters maxNumFiles maxNumSegments +maxStringLength max_map_count memcached mergeable