Skip to content

Commit e641d40

Browse files
authored
Apply source excludes early when retrieving the _inference_fields (#135897) (#135954)
The inference fields is automatically retrieved when `exclude_vectors` is set to false. In this change, we apply the source exclude early, in case the _inference_fields is removed, to avoid loading it entirely. We also protect against immutable map when adding the _inference_fields in _source since we cannot ensure that the map is always mutable.
1 parent ffaf8bd commit e641d40

File tree

4 files changed

+56
-0
lines changed

4 files changed

+56
-0
lines changed

docs/changelog/135897.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 135897
2+
summary: Apply source excludes early when retrieving the `_inference_fields`
3+
area: Search
4+
type: bug
5+
issues: []

server/src/main/java/org/elasticsearch/index/get/ShardGetService.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -426,6 +426,14 @@ private static Boolean shouldExcludeVectorsFromSourceExplicit(FetchSourceContext
426426

427427
public static boolean shouldExcludeInferenceFieldsFromSource(IndexSettings indexSettings, FetchSourceContext fetchSourceContext) {
428428
var explicit = shouldExcludeInferenceFieldsFromSourceExplicit(fetchSourceContext);
429+
var filter = fetchSourceContext != null ? fetchSourceContext.filter() : null;
430+
if (filter != null) {
431+
if (filter.isPathFiltered(InferenceMetadataFieldsMapper.NAME, true)) {
432+
return true;
433+
} else if (filter.isExplicitlyIncluded(InferenceMetadataFieldsMapper.NAME)) {
434+
return false;
435+
}
436+
}
429437
return explicit != null ? explicit : INDEX_MAPPING_EXCLUDE_SOURCE_VECTORS_SETTING.get(indexSettings.getSettings());
430438
}
431439

server/src/main/java/org/elasticsearch/search/fetch/subphase/FetchSourcePhase.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import org.elasticsearch.search.lookup.Source;
2020
import org.elasticsearch.search.lookup.SourceFilter;
2121

22+
import java.util.HashMap;
2223
import java.util.Map;
2324

2425
public final class FetchSourcePhase implements FetchSubPhase {
@@ -99,6 +100,10 @@ private Source replaceInferenceMetadataFields(SearchHit hit, Source source) {
99100
return source;
100101
}
101102
var newSource = source.source();
103+
if (newSource instanceof HashMap == false) {
104+
// the map is not mutable
105+
newSource = new HashMap<>(newSource);
106+
}
102107
newSource.put(InferenceMetadataFieldsMapper.NAME, field.getValues().get(0));
103108
return Source.fromMap(newSource, source.sourceContentType());
104109
}

x-pack/plugin/inference/src/yamlRestTest/resources/rest-api-spec/test/inference/30_semantic_text_inference.yml

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1385,4 +1385,42 @@ setup:
13851385
- match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.start_offset: 0 }
13861386
- match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.end_offset: 22 }
13871387

1388+
- do:
1389+
search:
1390+
index: test-index
1391+
body:
1392+
_source:
1393+
exclude_vectors: false
1394+
excludes: ["*"]
1395+
query:
1396+
term:
1397+
_id: doc_1
1398+
1399+
- match: { hits.total.value: 1 }
1400+
- length: { hits.hits.0._source: 0}
1401+
1402+
- do:
1403+
search:
1404+
index: test-index
1405+
body:
1406+
_source:
1407+
exclude_vectors: false
1408+
excludes: ["*_field"]
1409+
query:
1410+
term:
1411+
_id: doc_1
1412+
1413+
- match: { hits.total.value: 1 }
1414+
- length: { hits.hits.0._source: 1}
1415+
- length: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks: 1 }
1416+
- length: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field: 1 }
1417+
- exists: hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.embeddings
1418+
- match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.start_offset: 0 }
1419+
- match: { hits.hits.0._source._inference_fields.sparse_field.inference.chunks.sparse_field.0.end_offset: 14 }
1420+
- length: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field: 1 }
1421+
- exists: hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.embeddings
1422+
- match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.start_offset: 0 }
1423+
- match: { hits.hits.0._source._inference_fields.dense_field.inference.chunks.dense_field.0.end_offset: 22 }
1424+
1425+
13881426

0 commit comments

Comments
 (0)