From b27779620c5ea86898fb6fe2ada60fbcfb9c196d Mon Sep 17 00:00:00 2001 From: Jesper Van Caeter Date: Mon, 18 Dec 2023 10:59:32 +0100 Subject: [PATCH 1/2] Add setSelect(ALL_ATTRIBUTES) to retrieve snapshots By doing so, the indexes don't need to have ProjectionStrategy ALL, but KEYS_ONLY suffices. Thus, a duplicate of the payload doesn't need to be stored in the index, which is more space efficient. This allows to make the choice between time and space efficiency, by selecting the projection strategy for the created index. Depending on the choice, setting this flag for retrieval will make either strategy work as efficient as intended. --- .../scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala | 9 ++++++++- .../dynamodb/snapshot/DynamoDBSnapshotRequests.scala | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/main/scala/org/apache/pekko/persistence/dynamodb/query/scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala b/src/main/scala/org/apache/pekko/persistence/dynamodb/query/scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala index 51a9844..7362b30 100644 --- a/src/main/scala/org/apache/pekko/persistence/dynamodb/query/scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala +++ b/src/main/scala/org/apache/pekko/persistence/dynamodb/query/scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala @@ -69,7 +69,14 @@ trait CreatePersistenceIdsIndex { object CreatePersistenceIdsIndex { - /** required by [[DynamoDBCurrentPersistenceIdsQuery.currentPersistenceIdsByPageQuery]] */ + /** + * required by [[DynamoDBCurrentPersistenceIdsQuery.currentPersistenceIdsByPageQuery]] + * + * When requesting snapshots by timestamp, Select ALL_ATTRIBUTES is used. Thus, a duplicate of + * the payload doesn't need to be stored in the index, which is more space efficient. + * This allows to make the choice between time and space efficiency by selecting the projection + * strategy for the created index. + */ def createPersistenceIdsIndexRequest( indexName: String, tableName: String, diff --git a/src/main/scala/org/apache/pekko/persistence/dynamodb/snapshot/DynamoDBSnapshotRequests.scala b/src/main/scala/org/apache/pekko/persistence/dynamodb/snapshot/DynamoDBSnapshotRequests.scala index a6ceadb..3c4cadc 100644 --- a/src/main/scala/org/apache/pekko/persistence/dynamodb/snapshot/DynamoDBSnapshotRequests.scala +++ b/src/main/scala/org/apache/pekko/persistence/dynamodb/snapshot/DynamoDBSnapshotRequests.scala @@ -14,6 +14,7 @@ package org.apache.pekko.persistence.dynamodb.snapshot import com.amazonaws.services.dynamodbv2.model._ +import com.amazonaws.services.dynamodbv2.model.Select.ALL_ATTRIBUTES import org.apache.pekko import pekko.actor.ExtendedActorSystem import pekko.persistence.dynamodb._ @@ -107,6 +108,8 @@ trait DynamoDBSnapshotRequests extends DynamoDBRequests { .withConsistentRead(true) limit.foreach(request.setLimit(_)) + request.setSelect(ALL_ATTRIBUTES) + dynamo.query(request) } From 74054cc031f0f828c2cf74afc4f782aaa7873e90 Mon Sep 17 00:00:00 2001 From: Jean-Luc Deprez Date: Thu, 17 Oct 2024 13:10:55 +0200 Subject: [PATCH 2/2] Clarify the 1.1.0 change. --- .../query/scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/scala/org/apache/pekko/persistence/dynamodb/query/scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala b/src/main/scala/org/apache/pekko/persistence/dynamodb/query/scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala index 7362b30..2ce06b6 100644 --- a/src/main/scala/org/apache/pekko/persistence/dynamodb/query/scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala +++ b/src/main/scala/org/apache/pekko/persistence/dynamodb/query/scaladsl/DynamoDBCurrentPersistenceIdsQuery.scala @@ -72,8 +72,8 @@ object CreatePersistenceIdsIndex { /** * required by [[DynamoDBCurrentPersistenceIdsQuery.currentPersistenceIdsByPageQuery]] * - * When requesting snapshots by timestamp, Select ALL_ATTRIBUTES is used. Thus, a duplicate of - * the payload doesn't need to be stored in the index, which is more space efficient. + * Since v1.1.0, when requesting snapshots by timestamp, select ALL_ATTRIBUTES is used. Thus, a + * duplicate of the payload doesn't need to be stored in the index, which is more space efficient. * This allows to make the choice between time and space efficiency by selecting the projection * strategy for the created index. */