From 5af6d91ba281c3657d20d94464037190fb8f06d7 Mon Sep 17 00:00:00 2001 From: Pete Gillin Date: Mon, 29 Sep 2025 17:13:17 +0100 Subject: [PATCH 1/4] Add convenience API key param to remote reindex This adds a `source.remote.api_key` parameter to the reindex API. This is equivalent to using `source.remote.headers` with `Authorization` set to `ApiKey `. It is a convenience for the user. Note on docs changes: The example request using an API key now uses an `applies-switch` to show both the pre-9.3 and post-9.3 / serverless versions. There are a few drive-by improvements to the docs, including: - Adding some subsections, as suggested by a tech writer. - Fixing the bug which meant that `` was previously not being rendered, because `<` is a control character. - Repeating the note about using HTTPS for basic auth in the API key section, as we wouldn't recommend sending API keys over plain HTTP either. - A nit, but writing `:9200` is strange, since we have a placeholder URL, and the port is part of the URL, so that's fixed. --- docs/changelog/135949.yaml | 5 ++ .../rest-apis/reindex-indices.md | 68 ++++++++++++++++--- .../index/reindex/ReindexRequest.java | 20 ++++++ .../index/reindex/ReindexRequestTests.java | 39 +++++++++++ 4 files changed, 122 insertions(+), 10 deletions(-) create mode 100644 docs/changelog/135949.yaml diff --git a/docs/changelog/135949.yaml b/docs/changelog/135949.yaml new file mode 100644 index 0000000000000..43cc1df6f016b --- /dev/null +++ b/docs/changelog/135949.yaml @@ -0,0 +1,5 @@ +pr: 135949 +summary: Add convenience API key param to remote reindex +area: Indices APIs +type: enhancement +issues: [] diff --git a/docs/reference/elasticsearch/rest-apis/reindex-indices.md b/docs/reference/elasticsearch/rest-apis/reindex-indices.md index 2bacf0ad88020..ae7feb78f2a8a 100644 --- a/docs/reference/elasticsearch/rest-apis/reindex-indices.md +++ b/docs/reference/elasticsearch/rest-apis/reindex-indices.md @@ -597,7 +597,7 @@ POST _reindex { "source": { "remote": { - "host": ":9200", + "host": "<OTHER_HOST_URL>", "username": "user", "password": "pass" }, @@ -620,19 +620,54 @@ POST _reindex % TEST[s/"password": "pass"/"password": "x-pack-test-password"/] The `host` parameter must contain a scheme, host, port (for example, `https://otherhost:9200`), and optional path (for example, `https://otherhost:9200/proxy`). -The `username` and `password` parameters are optional, and when they are present the reindex API will connect to the remote {{es}} node using basic auth. -Be sure to use `https` when using basic auth or the password will be sent in plain text. There are a range of settings available to configure the behaviour of the `https` connection. -When using {{ecloud}}, it is also possible to authenticate against the remote cluster through the use of a valid API key: +### Using basic auth [reindex-basic-auth] +To authenticate with the remote cluster using basic auth, set the `username` and `password` parameters, as in the example above. +Be sure to use `https` when using basic auth, or the password will be sent in plain text. There are a range of settings available to configure the behaviour of the `https` connection. + +### Using an API key [reindex-api-key] + +When using {{ecloud}}, it is also possible (and encouraged) to authenticate with the remote cluster through the use of a valid API key: + +::::{applies-switch} + +:::{applies-item} { "stack": "ga 9.3", "serverless": } ```console POST _reindex { "source": { "remote": { - "host": ":9200", + "host": "<OTHER_HOST_URL>", + "api_key": "<API_KEY_VALUE>" + }, + "index": "my-index-000001", + "query": { + "match": { + "test": "data" + } + } + }, + "dest": { + "index": "my-new-index-000001" + } +} +``` +% TEST[setup:host] +% TEST[s/^/PUT my-index-000001\n/] +% TEST[s/otherhost:9200",/\${host}",/] +% TEST[s/"headers": \{[^}]*\}/"username": "test_admin", "password": "x-pack-test-password"/] +::: + +:::{applies-item} { "stack": "ga 9.0" } +```console +POST _reindex +{ + "source": { + "remote": { + "host": "<OTHER_HOST_URL>", "headers": { - "Authorization": "ApiKey API_KEY_VALUE" + "Authorization": "ApiKey <API_KEY_VALUE>" } }, "index": "my-index-000001", @@ -651,15 +686,26 @@ POST _reindex % TEST[s/^/PUT my-index-000001\n/] % TEST[s/otherhost:9200",/\${host}",/] % TEST[s/"headers": \{[^}]*\}/"username": "test_admin", "password": "x-pack-test-password"/] +::: + +:::: + + +Be sure to use `https` when using an API key, or it will be sent in plain text. There are a range of settings available to configure the behaviour of the `https` connection. + +### Whitelisting remote hosts [reindex-remote-whitelist] Remote hosts have to be explicitly allowed in `elasticsearch.yml` using the `reindex.remote.whitelist` property. -It can be set to a comma delimited list of allowed remote `host` and `port` combinations. +It can be set to a comma-delimited list of allowed remote `host` and `port` combinations. Scheme is ignored, only the host and port are used. For example: ```yaml reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"] ``` The list of allowed hosts must be configured on any nodes that will coordinate the reindex. + +### Compatibility [reindex-remote-compatibility] + This feature should work with remote clusters of any version of {{es}} you are likely to find. This should allow you to upgrade from any version of {{es}} to the current version by reindexing from a cluster of the old version. ::::{warning} {{es}} does not support forward compatibility across major versions. For example, you cannot reindex from a 7.x cluster into a 6.x cluster. @@ -670,8 +716,10 @@ To enable queries sent to older versions of {{es}} the `query` parameter is sent Reindexing from remote clusters does not support manual or automatic slicing. :::: +### Tuning parameters [reindex-remote-tuning] + Reindexing from a remote server uses an on-heap buffer that defaults to a maximum size of 100mb. -If the remote index includes very large documents you'll need to use a smaller batch size. +If the remote index includes very large documents you'll need to use a smaller batch size. The example below sets the batch size to `10` which is very, very small. ```console @@ -679,7 +727,7 @@ POST _reindex { "source": { "remote": { - "host": ":9200", + "host": "<OTHER_HOST_URL>", ... }, "index": "source", @@ -709,7 +757,7 @@ POST _reindex { "source": { "remote": { - "host": ":9200", + "host": "<OTHER_HOST_URL>", ..., "socket_timeout": "1m", "connect_timeout": "10s" diff --git a/server/src/main/java/org/elasticsearch/index/reindex/ReindexRequest.java b/server/src/main/java/org/elasticsearch/index/reindex/ReindexRequest.java index c3e429af08a4e..f1730378a58c8 100644 --- a/server/src/main/java/org/elasticsearch/index/reindex/ReindexRequest.java +++ b/server/src/main/java/org/elasticsearch/index/reindex/ReindexRequest.java @@ -38,6 +38,7 @@ import java.io.IOException; import java.net.URI; import java.net.URISyntaxException; +import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.function.Predicate; @@ -417,6 +418,11 @@ static RemoteInfo buildRemoteInfo(Map source) throws IOException } Map headers = extractStringStringMap(remote, "headers"); + String apiKey = extractString(remote, "api_key"); + if (apiKey != null) { + headers = headersWithApiKey(headers, apiKey); + } + TimeValue socketTimeout = extractTimeValue(remote, "socket_timeout", RemoteInfo.DEFAULT_SOCKET_TIMEOUT); TimeValue connectTimeout = extractTimeValue(remote, "connect_timeout", RemoteInfo.DEFAULT_CONNECT_TIMEOUT); if (false == remote.isEmpty()) { @@ -493,4 +499,18 @@ static void setMaxDocsValidateIdentical(AbstractBulkByScrollRequest request, request.setMaxDocs(maxDocs); } } + + /** + * Returns a headers map with the {@code Authorization} key set to the value {@code "ApiKey "}. If the original map is a + * {@link HashMap}, it is mutated; if not (e.g. it is {@link java.util.Collections#EMPTY_MAP}), it is copied. If the headers already + * include an {@code Authorization} key, an {@link IllegalArgumentException} is thrown. + */ + private static Map headersWithApiKey(Map original, String apiKey) { + if (original.keySet().stream().anyMatch(key -> key.equalsIgnoreCase("Authorization"))) { + throw new IllegalArgumentException("Cannot specify both [api_key] and [headers] including [Authorization] key"); + } + Map updated = (original instanceof HashMap) ? original : new HashMap<>(original); + updated.put("Authorization", "ApiKey " + apiKey); + return updated; + } } diff --git a/server/src/test/java/org/elasticsearch/index/reindex/ReindexRequestTests.java b/server/src/test/java/org/elasticsearch/index/reindex/ReindexRequestTests.java index a279e3fb55deb..c09e78f3d55d4 100644 --- a/server/src/test/java/org/elasticsearch/index/reindex/ReindexRequestTests.java +++ b/server/src/test/java/org/elasticsearch/index/reindex/ReindexRequestTests.java @@ -325,6 +325,45 @@ public void testBuildRemoteInfoWithAllHostParts() throws IOException { assertEquals("[host] must be of the form [scheme]://[host]:[port](/[pathPrefix])? but was [https]", exception.getMessage()); } + public void testBuildRemoteInfoWithApiKey() throws IOException { + Map remote = new HashMap<>(); + remote.put("host", "https://example.com:9200"); + remote.put("api_key", "l3t-m3-1n"); + Map source = new HashMap<>(); + source.put("remote", remote); + RemoteInfo remoteInfo = ReindexRequest.buildRemoteInfo(source); + assertEquals(remoteInfo.getHeaders(), Map.of("Authorization", "ApiKey l3t-m3-1n")); + } + + public void testBuildRemoteInfoWithApiKeyAndOtherHeaders() throws IOException { + Map originalHeaders = new HashMap<>(); + originalHeaders.put("X-Routing-Magic", "Abracadabra"); + originalHeaders.put("X-Tracing-Magic", "12345"); + Map remote = new HashMap<>(); + remote.put("host", "https://example.com:9200"); + remote.put("api_key", "l3t-m3-1n"); + remote.put("headers", originalHeaders); + Map source = new HashMap<>(); + source.put("remote", remote); + RemoteInfo remoteInfo = ReindexRequest.buildRemoteInfo(source); + assertEquals( + remoteInfo.getHeaders(), + Map.of("X-Routing-Magic", "Abracadabra", "X-Tracing-Magic", "12345", "Authorization", "ApiKey l3t-m3-1n") + ); + } + + public void testBuildRemoteInfoWithConflictingApiKeyAndAuthorizationHeader() throws IOException { + Map originalHeaders = new HashMap<>(); + originalHeaders.put("aUtHoRiZaTiOn", "op3n-s3s4m3"); // non-standard capitalization, but HTTP headers are not case-sensitive + Map remote = new HashMap<>(); + remote.put("host", "https://example.com:9200"); + remote.put("api_key", "l3t-m3-1n"); + remote.put("headers", originalHeaders); + Map source = new HashMap<>(); + source.put("remote", remote); + assertThrows(IllegalArgumentException.class, () -> ReindexRequest.buildRemoteInfo(source)); + } + public void testReindexFromRemoteRequestParsing() throws IOException { BytesReference request; try (XContentBuilder b = JsonXContent.contentBuilder()) { From 2b67821718b3c5b97cd021ffcb73f98bc1da578f Mon Sep 17 00:00:00 2001 From: Pete Gillin Date: Mon, 6 Oct 2025 19:13:06 +0100 Subject: [PATCH 2/4] Update docs/reference/elasticsearch/rest-apis/reindex-indices.md Co-authored-by: shainaraskas <58563081+shainaraskas@users.noreply.github.com> --- docs/reference/elasticsearch/rest-apis/reindex-indices.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/reference/elasticsearch/rest-apis/reindex-indices.md b/docs/reference/elasticsearch/rest-apis/reindex-indices.md index ae7feb78f2a8a..230501c7b5a02 100644 --- a/docs/reference/elasticsearch/rest-apis/reindex-indices.md +++ b/docs/reference/elasticsearch/rest-apis/reindex-indices.md @@ -619,7 +619,7 @@ POST _reindex % TEST[s/"username": "user",/"username": "test_admin",/] % TEST[s/"password": "pass"/"password": "x-pack-test-password"/] -The `host` parameter must contain a scheme, host, port (for example, `https://otherhost:9200`), and optional path (for example, `https://otherhost:9200/proxy`). +The `host` parameter must contain a scheme, host, port (for example, `https://:9200`), and optional path (for example, `https://:9200/proxy`). ### Using basic auth [reindex-basic-auth] From 9fcada6d151f858933055e377df049685aea5c5c Mon Sep 17 00:00:00 2001 From: Pete Gillin Date: Mon, 6 Oct 2025 19:26:44 +0100 Subject: [PATCH 3/4] add cross-reference to SSL settings in docs --- docs/reference/elasticsearch/rest-apis/reindex-indices.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/reindex-indices.md b/docs/reference/elasticsearch/rest-apis/reindex-indices.md index 230501c7b5a02..cfade4df5053e 100644 --- a/docs/reference/elasticsearch/rest-apis/reindex-indices.md +++ b/docs/reference/elasticsearch/rest-apis/reindex-indices.md @@ -624,7 +624,7 @@ The `host` parameter must contain a scheme, host, port (for example, `https:// Date: Tue, 7 Oct 2025 09:44:30 +0100 Subject: [PATCH 4/4] Remove the `When using {{escloud}}` caveat at the start of the API key section, since you can use API keys on pretty much any ES type --- docs/reference/elasticsearch/rest-apis/reindex-indices.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/reference/elasticsearch/rest-apis/reindex-indices.md b/docs/reference/elasticsearch/rest-apis/reindex-indices.md index cfade4df5053e..84e3d432f61ef 100644 --- a/docs/reference/elasticsearch/rest-apis/reindex-indices.md +++ b/docs/reference/elasticsearch/rest-apis/reindex-indices.md @@ -628,7 +628,7 @@ Be sure to use `https` when using basic auth, or the password will be sent in pl ### Using an API key [reindex-api-key] -When using {{ecloud}}, it is also possible (and encouraged) to authenticate with the remote cluster through the use of a valid API key: +It is also possible (and encouraged) to authenticate with the remote cluster through the use of a valid API key: ::::{applies-switch} @@ -702,7 +702,7 @@ Scheme is ignored, only the host and port are used. For example: ```yaml reindex.remote.whitelist: [otherhost:9200, another:9200, 127.0.10.*:9200, localhost:*"] ``` -The list of allowed hosts must be configured on any nodes that will coordinate the reindex. +The list of allowed hosts must be configured on any node that will coordinate the reindex. ### Compatibility [reindex-remote-compatibility]