diff --git a/CHANGELOG.md b/CHANGELOG.md index 254c35c66998b..da0724b8c2937 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Refactor the RefreshStats class to use the Builder pattern instead of constructors ([#19835](https://github.com/opensearch-project/OpenSearch/pull/19835)) - Refactor the DocStats and StoreStats class to use the Builder pattern instead of constructors ([#19863](https://github.com/opensearch-project/OpenSearch/pull/19863)) - Pass registry of headers from ActionPlugin.getRestHeaders to ActionPlugin.getRestHandlerWrapper ([#19875](https://github.com/opensearch-project/OpenSearch/pull/19875)) +- Only store single reference to mapping for indices that share the same mappings in the cluster state ([#19929](https://github.com/opensearch-project/OpenSearch/pull/19929)) - Refactor the RemoteTranslogTransferTracker.Stats and RemoteSegmentTransferTracker.Stats class to use the Builder pattern instead of constructors ([#19837](https://github.com/opensearch-project/OpenSearch/pull/19837)) ### Fixed diff --git a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java index 03b343e94b5ab..3ff3105d14a32 100644 --- a/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java +++ b/server/src/main/java/org/opensearch/cluster/metadata/Metadata.java @@ -52,6 +52,7 @@ import org.opensearch.common.Nullable; import org.opensearch.common.UUIDs; import org.opensearch.common.annotation.PublicApi; +import org.opensearch.common.compress.CompressedXContent; import org.opensearch.common.regex.Regex; import org.opensearch.common.settings.Setting; import org.opensearch.common.settings.Setting.Property; @@ -1607,6 +1608,28 @@ public Builder generateClusterUuidIfNeeded() { return this; } + private void canonicalizeMappingsInPlace() { + final Map pool = new HashMap<>(); + + for (Map.Entry e : indices.entrySet()) { + final IndexMetadata im = e.getValue(); + final MappingMetadata mm = im.mapping(); + if (mm == null) continue; + + final CompressedXContent src = mm.source(); + if (!pool.containsKey(src)) { + // first time we've seen this mapping, do not deduplicate + pool.put(src, mm); + continue; + } + + // Seen this mapping in another index. Perform de-duplication. + final MappingMetadata newMm = pool.get(src); + final IndexMetadata newIm = IndexMetadata.builder(im).putMapping(newMm).build(); + e.setValue(newIm); + } + } + public Metadata build() { DataStreamMetadata dataStreamMetadata = (DataStreamMetadata) this.customs.get(DataStreamMetadata.TYPE); DataStreamMetadata previousDataStreamMetadata = (previousMetadata != null) @@ -1615,6 +1638,8 @@ public Metadata build() { buildSystemTemplatesLookup(); + canonicalizeMappingsInPlace(); + boolean recomputeRequiredforIndicesLookups = (previousMetadata == null) || (indices.equals(previousMetadata.indices) == false) || (previousDataStreamMetadata != null && previousDataStreamMetadata.equals(dataStreamMetadata) == false) diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataMappingTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataMappingTests.java new file mode 100644 index 0000000000000..8cb5f0970bdc8 --- /dev/null +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataMappingTests.java @@ -0,0 +1,114 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.cluster.metadata; + +import org.opensearch.Version; +import org.opensearch.common.compress.CompressedXContent; +import org.opensearch.common.io.stream.BytesStreamOutput; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.io.stream.NamedWriteableAwareStreamInput; +import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.test.OpenSearchTestCase; + +public class MetadataMappingTests extends OpenSearchTestCase { + + private static final String MAPPING_JSON = """ + { + "properties": { + "title": { "type": "keyword" }, + "year": { "type": "integer" } + } + } + """; + + private static IndexMetadata newIndexWithMapping(String name, String mappingJson) throws Exception { + MappingMetadata mm = new MappingMetadata(new CompressedXContent(mappingJson)); + return IndexMetadata.builder(name) + .settings( + Settings.builder() + .put(IndexMetadata.SETTING_VERSION_CREATED, Version.CURRENT.id) // or your current + .put(IndexMetadata.SETTING_NUMBER_OF_SHARDS, 1) + .put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, 0) + ) + .putMapping(mm) + .build(); + } + + private static CompressedXContent cx(String json) { + try { + return new CompressedXContent(json); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public void testDedupsIdenticalMappingsByIdentity() throws Exception { + Metadata.Builder mb = Metadata.builder(); + IndexMetadata idx1 = newIndexWithMapping("idx-000001", MAPPING_JSON); + IndexMetadata idx2 = newIndexWithMapping("idx-000002", MAPPING_JSON + ""); // intentional cloning + mb.put(idx1, false); + mb.put(idx2, false); + + Metadata md = mb.build(); + + CompressedXContent aAfter = md.index("idx-000001").mapping().source(); + CompressedXContent bAfter = md.index("idx-000002").mapping().source(); + + // Identity must be the same now + assertSame("De-duplicating should make both point to same instance", aAfter, bAfter); + } + + public void testPartialSharingOnlyDedupsMatchingOnes() throws Exception { + String different = """ + {"properties":{"title":{"type":"keyword"},"score":{"type":"float"}}} + """; + + Metadata md = Metadata.builder() + .put(newIndexWithMapping("idx-A", MAPPING_JSON), false) + .put(newIndexWithMapping("idx-B", MAPPING_JSON + ""), false) // intentional cloning + .put(newIndexWithMapping("idx-C", different), false) + .build(); + + var aSrc = md.index("idx-A").mapping().source(); + var bSrc = md.index("idx-B").mapping().source(); + var cSrc = md.index("idx-C").mapping().source(); + + assertSame("A and B should share the same canonical instance", aSrc, bSrc); + assertNotSame("C should not share with A/B", aSrc, cSrc); + } + + public void testWriteReadRoundTripDedupsMappings() throws Exception { + // Build once + var builder = Metadata.builder() + .put(newIndexWithMapping("i1", MAPPING_JSON), false) + .put(newIndexWithMapping("i2", MAPPING_JSON + ""), false); // intentional cloning + + Metadata md1 = builder.build(); + var s1 = md1.index("i1").mapping().source(); + var s2 = md1.index("i2").mapping().source(); + assertSame(s1, s2); + + BytesStreamOutput out = new BytesStreamOutput(); + md1.writeTo(out); + + // Minimal registry: only IndexGraveyard (the default Custom in Builder) + NamedWriteableRegistry registry = new NamedWriteableRegistry( + java.util.List.of(new NamedWriteableRegistry.Entry(Metadata.Custom.class, IndexGraveyard.TYPE, IndexGraveyard::new)) + ); + + StreamInput in = new NamedWriteableAwareStreamInput(out.bytes().streamInput(), registry); + Metadata md2 = Metadata.readFrom(in); + + var t1 = md2.index("i1").mapping().source(); + var t2 = md2.index("i2").mapping().source(); + // After readFrom(), builder.build() was called internally; de-duplicating should apply again. + assertSame("Round-trip should still end up deduplicated", t1, t2); + } +} diff --git a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java index c6cde72ab60b1..18069c07c2d5b 100644 --- a/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java +++ b/server/src/test/java/org/opensearch/cluster/metadata/MetadataTests.java @@ -988,78 +988,79 @@ public static void assertMultiField(Map properties, String field assertLeafs(subFieldsDef, subFields); } - private static final String FIND_MAPPINGS_TEST_ITEM = "{\n" - + " \"_doc\": {\n" - + " \"_routing\": {\n" - + " \"required\":true\n" - + " }," - + " \"_source\": {\n" - + " \"enabled\":false\n" - + " }," - + " \"properties\": {\n" - + " \"name\": {\n" - + " \"properties\": {\n" - + " \"first\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"last\": {\n" - + " \"type\": \"keyword\"\n" - + " }\n" - + " }\n" - + " },\n" - + " \"birth\": {\n" - + " \"type\": \"date\"\n" - + " },\n" - + " \"age\": {\n" - + " \"type\": \"integer\"\n" - + " },\n" - + " \"ip\": {\n" - + " \"type\": \"ip\"\n" - + " },\n" - + " \"suggest\" : {\n" - + " \"type\": \"completion\"\n" - + " },\n" - + " \"address\": {\n" - + " \"type\": \"object\",\n" - + " \"properties\": {\n" - + " \"street\": {\n" - + " \"type\": \"keyword\"\n" - + " },\n" - + " \"location\": {\n" - + " \"type\": \"geo_point\"\n" - + " },\n" - + " \"area\": {\n" - + " \"type\": \"geo_shape\", \n" - + " \"tree\": \"quadtree\",\n" - + " \"precision\": \"1m\"\n" - + " }\n" - + " }\n" - + " },\n" - + " \"properties\": {\n" - + " \"type\": \"nested\",\n" - + " \"properties\": {\n" - + " \"key\" : {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\" : {\n" - + " \"type\" : \"keyword\"\n" - + " }\n" - + " }\n" - + " },\n" - + " \"value\" : {\n" - + " \"type\": \"text\",\n" - + " \"fields\": {\n" - + " \"keyword\" : {\n" - + " \"type\" : \"keyword\"\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + " }\n" - + "}"; + private static final String FIND_MAPPINGS_TEST_ITEM = """ + { + "_doc": { + "_routing": { + "required":true + }, + "_source": { + "enabled":false + }, + "properties": { + "name": { + "properties": { + "first": { + "type": "keyword" + }, + "last": { + "type": "keyword" + } + } + }, + "birth": { + "type": "date" + }, + "age": { + "type": "integer" + }, + "ip": { + "type": "ip" + }, + "suggest" : { + "type": "completion" + }, + "address": { + "type": "object", + "properties": { + "street": { + "type": "keyword" + }, + "location": { + "type": "geo_point" + }, + "area": { + "type": "geo_shape", + "tree": "quadtree", + "precision": "1m" + } + } + }, + "properties": { + "type": "nested", + "properties": { + "key" : { + "type": "text", + "fields": { + "keyword" : { + "type" : "keyword" + } + } + }, + "value" : { + "type": "text", + "fields": { + "keyword" : { + "type" : "keyword" + } + } + } + } + } + } + } + } + }"""; public void testTransientSettingsOverridePersistentSettings() { final Setting setting = Setting.simpleString("key");