From 17d9d5ee67d27dc98455d941c9e3bacad2ddb2d0 Mon Sep 17 00:00:00 2001 From: Xavier Bai Date: Mon, 9 Sep 2024 15:53:48 +0800 Subject: [PATCH 1/3] [AMORO-3163] Support for configuring the number of remaining snapshots (#3164) * configure number of snapshots * update document --- .../maintainer/IcebergTableMaintainer.java | 18 ++++-- .../maintainer/MixedTableMaintainer.java | 14 ++-- .../server/table/TableConfigurations.java | 16 ++++- .../maintainer/TestSnapshotExpire.java | 64 +++++++++++++++++-- .../maintainer/TestSnapshotExpireHive.java | 2 +- .../apache/amoro/config/ConfigHelpers.java | 8 ++- .../amoro/config/TableConfiguration.java | 12 ++++ .../apache/amoro/table/TableProperties.java | 18 +++++- .../amoro/utils/CompatiblePropertyUtil.java | 2 + docs/user-guides/configurations.md | 3 +- 10 files changed, 132 insertions(+), 25 deletions(-) diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/maintainer/IcebergTableMaintainer.java b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/maintainer/IcebergTableMaintainer.java index cee5499af1..86c846e116 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/maintainer/IcebergTableMaintainer.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/maintainer/IcebergTableMaintainer.java @@ -167,7 +167,9 @@ public void expireSnapshots(TableRuntime tableRuntime) { if (!expireSnapshotEnabled(tableRuntime)) { return; } - expireSnapshots(mustOlderThan(tableRuntime, System.currentTimeMillis())); + expireSnapshots( + mustOlderThan(tableRuntime, System.currentTimeMillis()), + tableRuntime.getTableConfiguration().getSnapshotMinCount()); } protected boolean expireSnapshotEnabled(TableRuntime tableRuntime) { @@ -176,18 +178,22 @@ protected boolean expireSnapshotEnabled(TableRuntime tableRuntime) { } @VisibleForTesting - void expireSnapshots(long mustOlderThan) { - expireSnapshots(mustOlderThan, expireSnapshotNeedToExcludeFiles()); + void expireSnapshots(long mustOlderThan, int minCount) { + expireSnapshots(mustOlderThan, minCount, expireSnapshotNeedToExcludeFiles()); } - private void expireSnapshots(long olderThan, Set exclude) { - LOG.debug("start expire snapshots older than {}, the exclude is {}", olderThan, exclude); + private void expireSnapshots(long olderThan, int minCount, Set exclude) { + LOG.debug( + "start expire snapshots older than {} and retain last {} snapshots, the exclude is {}", + olderThan, + minCount, + exclude); final AtomicInteger toDeleteFiles = new AtomicInteger(0); Set parentDirectories = new HashSet<>(); Set expiredFiles = new HashSet<>(); table .expireSnapshots() - .retainLast(1) + .retainLast(Math.max(minCount, 1)) .expireOlderThan(olderThan) .deleteWith( file -> { diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/maintainer/MixedTableMaintainer.java b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/maintainer/MixedTableMaintainer.java index a4a6445fd9..1b5c8cbd82 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/maintainer/MixedTableMaintainer.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/maintainer/MixedTableMaintainer.java @@ -109,11 +109,11 @@ public void expireSnapshots(TableRuntime tableRuntime) { } @VisibleForTesting - protected void expireSnapshots(long mustOlderThan) { + protected void expireSnapshots(long mustOlderThan, int minCount) { if (changeMaintainer != null) { - changeMaintainer.expireSnapshots(mustOlderThan); + changeMaintainer.expireSnapshots(mustOlderThan, minCount); } - baseMaintainer.expireSnapshots(mustOlderThan); + baseMaintainer.expireSnapshots(mustOlderThan, minCount); } @Override @@ -291,9 +291,9 @@ public ChangeTableMaintainer(UnkeyedTable unkeyedTable) { @Override @VisibleForTesting - void expireSnapshots(long mustOlderThan) { + void expireSnapshots(long mustOlderThan, int minCount) { expireFiles(mustOlderThan); - super.expireSnapshots(mustOlderThan); + super.expireSnapshots(mustOlderThan, minCount); } @Override @@ -303,7 +303,9 @@ public void expireSnapshots(TableRuntime tableRuntime) { } long now = System.currentTimeMillis(); expireFiles(now - snapshotsKeepTime(tableRuntime)); - expireSnapshots(mustOlderThan(tableRuntime, now)); + expireSnapshots( + mustOlderThan(tableRuntime, now), + tableRuntime.getTableConfiguration().getSnapshotMinCount()); } @Override diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/table/TableConfigurations.java b/amoro-ams/src/main/java/org/apache/amoro/server/table/TableConfigurations.java index bdb258c0b2..bb7823c99e 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/table/TableConfigurations.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/table/TableConfigurations.java @@ -36,6 +36,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.time.temporal.ChronoUnit; import java.util.Locale; import java.util.Map; import java.util.Set; @@ -61,10 +62,19 @@ public static TableConfiguration parseTableConfig(Map properties TableProperties.ENABLE_TABLE_EXPIRE, TableProperties.ENABLE_TABLE_EXPIRE_DEFAULT)) .setSnapshotTTLMinutes( - CompatiblePropertyUtil.propertyAsLong( + ConfigHelpers.TimeUtils.parseDuration( + CompatiblePropertyUtil.propertyAsString( + properties, + TableProperties.SNAPSHOT_KEEP_DURATION, + TableProperties.SNAPSHOT_KEEP_DURATION_DEFAULT), + ChronoUnit.MINUTES) + .getSeconds() + / 60) + .setSnapshotMinCount( + CompatiblePropertyUtil.propertyAsInt( properties, - TableProperties.BASE_SNAPSHOT_KEEP_MINUTES, - TableProperties.BASE_SNAPSHOT_KEEP_MINUTES_DEFAULT)) + TableProperties.SNAPSHOT_MIN_COUNT, + TableProperties.SNAPSHOT_MIN_COUNT_DEFAULT)) .setChangeDataTTLMinutes( CompatiblePropertyUtil.propertyAsLong( properties, diff --git a/amoro-ams/src/test/java/org/apache/amoro/server/optimizing/maintainer/TestSnapshotExpire.java b/amoro-ams/src/test/java/org/apache/amoro/server/optimizing/maintainer/TestSnapshotExpire.java index 559accaa64..2796b21522 100644 --- a/amoro-ams/src/test/java/org/apache/amoro/server/optimizing/maintainer/TestSnapshotExpire.java +++ b/amoro-ams/src/test/java/org/apache/amoro/server/optimizing/maintainer/TestSnapshotExpire.java @@ -122,7 +122,7 @@ public void testExpireChangeTableFiles() { // In order to advance the snapshot insertChangeDataFiles(testKeyedTable, 2); - tableMaintainer.getChangeMaintainer().expireSnapshots(System.currentTimeMillis()); + tableMaintainer.getChangeMaintainer().expireSnapshots(System.currentTimeMillis(), 1); Assert.assertEquals(1, Iterables.size(testKeyedTable.changeTable().snapshots())); s1Files.forEach( @@ -333,7 +333,7 @@ public void testExpireTableFiles4All() { List newDataFiles = writeAndCommitBaseStore(table); Assert.assertEquals(3, Iterables.size(table.snapshots())); - new MixedTableMaintainer(table).expireSnapshots(System.currentTimeMillis()); + new MixedTableMaintainer(table).expireSnapshots(System.currentTimeMillis(), 1); Assert.assertEquals(1, Iterables.size(table.snapshots())); dataFiles.forEach(file -> Assert.assertFalse(table.io().exists(file.path().toString()))); @@ -383,7 +383,7 @@ public void testExpireTableFilesRepeatedly() { MixedTableMaintainer tableMaintainer = new MixedTableMaintainer(testKeyedTable); tableMaintainer.getChangeMaintainer().expireFiles(secondCommitTime + 1); - tableMaintainer.getChangeMaintainer().expireSnapshots(secondCommitTime + 1); + tableMaintainer.getChangeMaintainer().expireSnapshots(secondCommitTime + 1, 1); Set dataFiles = getDataFiles(testKeyedTable); Assert.assertEquals(last4File, dataFiles); @@ -449,7 +449,7 @@ public void testExpireStatisticsFiles() { Assert.assertTrue(baseTable.io().exists(file1.path())); Assert.assertTrue(baseTable.io().exists(file2.path())); Assert.assertTrue(baseTable.io().exists(file3.path())); - new MixedTableMaintainer(testKeyedTable).expireSnapshots(expireTime); + new MixedTableMaintainer(testKeyedTable).expireSnapshots(expireTime, 1); Assert.assertEquals(1, Iterables.size(baseTable.snapshots())); Assert.assertFalse(baseTable.io().exists(file1.path())); @@ -525,6 +525,62 @@ public void testBaseTableGcDisabled() { Assert.assertEquals(1, Iterables.size(testUnkeyedTable.snapshots())); } + @Test + public void testRetainMinSnapshot() { + UnkeyedTable table = + isKeyedTable() + ? getMixedTable().asKeyedTable().baseTable() + : getMixedTable().asUnkeyedTable(); + table.newAppend().commit(); + table.newAppend().commit(); + List expectedSnapshots = new ArrayList<>(); + expectedSnapshots.add(table.currentSnapshot()); + + table.updateProperties().set(TableProperties.SNAPSHOT_KEEP_DURATION, "0s").commit(); + table.updateProperties().set(TableProperties.SNAPSHOT_MIN_COUNT, "3").commit(); + + TableRuntime tableRuntime = Mockito.mock(TableRuntime.class); + Mockito.when(tableRuntime.getTableIdentifier()) + .thenReturn( + ServerTableIdentifier.of(AmsUtil.toTableIdentifier(table.id()), getTestFormat())); + Mockito.when(tableRuntime.getTableConfiguration()) + .thenReturn(TableConfigurations.parseTableConfig(table.properties())); + Mockito.when(tableRuntime.getOptimizingStatus()).thenReturn(OptimizingStatus.IDLE); + + new MixedTableMaintainer(table).expireSnapshots(tableRuntime); + Assert.assertEquals(2, Iterables.size(table.snapshots())); + + table.newAppend().commit(); + expectedSnapshots.add(table.currentSnapshot()); + table.newAppend().commit(); + expectedSnapshots.add(table.currentSnapshot()); + + new MixedTableMaintainer(table).expireSnapshots(tableRuntime); + Assert.assertEquals(3, Iterables.size(table.snapshots())); + Assert.assertTrue( + Iterators.elementsEqual(expectedSnapshots.iterator(), table.snapshots().iterator())); + } + + @Test + public void testSnapshotExpireConfig() { + UnkeyedTable table = + isKeyedTable() + ? getMixedTable().asKeyedTable().baseTable() + : getMixedTable().asUnkeyedTable(); + table.updateProperties().set(TableProperties.SNAPSHOT_KEEP_DURATION, "180s").commit(); + Assert.assertEquals( + 3L, TableConfigurations.parseTableConfig(table.properties()).getSnapshotTTLMinutes()); + + // using default time unit: minutes + table.updateProperties().set(TableProperties.SNAPSHOT_KEEP_DURATION, "720").commit(); + Assert.assertEquals( + 720L, TableConfigurations.parseTableConfig(table.properties()).getSnapshotTTLMinutes()); + + table.updateProperties().set(TableProperties.SNAPSHOT_MIN_COUNT, "10").commit(); + Assert.assertEquals( + 10, TableConfigurations.parseTableConfig(table.properties()).getSnapshotMinCount()); + } + private long waitUntilAfter(long timestampMillis) { long current = System.currentTimeMillis(); while (current <= timestampMillis) { diff --git a/amoro-ams/src/test/java/org/apache/amoro/server/optimizing/maintainer/TestSnapshotExpireHive.java b/amoro-ams/src/test/java/org/apache/amoro/server/optimizing/maintainer/TestSnapshotExpireHive.java index efad6c8834..dd9f198c86 100644 --- a/amoro-ams/src/test/java/org/apache/amoro/server/optimizing/maintainer/TestSnapshotExpireHive.java +++ b/amoro-ams/src/test/java/org/apache/amoro/server/optimizing/maintainer/TestSnapshotExpireHive.java @@ -115,7 +115,7 @@ public void testExpireTableFiles() { ? getMixedTable().asKeyedTable().baseTable() : getMixedTable().asUnkeyedTable(); MixedTableMaintainer mixedTableMaintainer = new MixedTableMaintainer(getMixedTable()); - mixedTableMaintainer.getBaseMaintainer().expireSnapshots(System.currentTimeMillis()); + mixedTableMaintainer.getBaseMaintainer().expireSnapshots(System.currentTimeMillis(), 1); Assert.assertEquals(1, Iterables.size(unkeyedTable.snapshots())); hiveFiles.forEach( diff --git a/amoro-common/src/main/java/org/apache/amoro/config/ConfigHelpers.java b/amoro-common/src/main/java/org/apache/amoro/config/ConfigHelpers.java index 4873e8581c..d7a705e04a 100644 --- a/amoro-common/src/main/java/org/apache/amoro/config/ConfigHelpers.java +++ b/amoro-common/src/main/java/org/apache/amoro/config/ConfigHelpers.java @@ -299,6 +299,10 @@ public static class TimeUtils { private static final Map LABEL_TO_UNIT_MAP = Collections.unmodifiableMap(initMap()); + public static Duration parseDuration(String text) { + return parseDuration(text, ChronoUnit.MILLIS); + } + /** * Parse the given string to a java {@link Duration}. The string is in format "{length * value}{time unit label}", e.g. "123ms", "321 s". If no time unit label is specified, it will @@ -318,7 +322,7 @@ public static class TimeUtils { * * @param text string to parse. */ - public static Duration parseDuration(String text) { + public static Duration parseDuration(String text, ChronoUnit defaultUnit) { checkNotNull(text); final String trimmed = text.trim(); @@ -350,7 +354,7 @@ public static Duration parseDuration(String text) { } if (unitLabel.isEmpty()) { - return Duration.of(value, ChronoUnit.MILLIS); + return Duration.of(value, defaultUnit); } ChronoUnit unit = LABEL_TO_UNIT_MAP.get(unitLabel); diff --git a/amoro-common/src/main/java/org/apache/amoro/config/TableConfiguration.java b/amoro-common/src/main/java/org/apache/amoro/config/TableConfiguration.java index f20705b1f2..5efedfbbea 100644 --- a/amoro-common/src/main/java/org/apache/amoro/config/TableConfiguration.java +++ b/amoro-common/src/main/java/org/apache/amoro/config/TableConfiguration.java @@ -27,6 +27,7 @@ public class TableConfiguration { private boolean expireSnapshotEnabled; private long snapshotTTLMinutes; + private int snapshotMinCount; private long changeDataTTLMinutes; private boolean cleanOrphanEnabled; private long orphanExistingMinutes; @@ -45,6 +46,10 @@ public long getSnapshotTTLMinutes() { return snapshotTTLMinutes; } + public int getSnapshotMinCount() { + return snapshotMinCount; + } + public long getChangeDataTTLMinutes() { return changeDataTTLMinutes; } @@ -76,6 +81,11 @@ public TableConfiguration setSnapshotTTLMinutes(long snapshotTTLMinutes) { return this; } + public TableConfiguration setSnapshotMinCount(int snapshotMinCount) { + this.snapshotMinCount = snapshotMinCount; + return this; + } + public TableConfiguration setChangeDataTTLMinutes(long changeDataTTLMinutes) { this.changeDataTTLMinutes = changeDataTTLMinutes; return this; @@ -130,6 +140,7 @@ public boolean equals(Object o) { TableConfiguration that = (TableConfiguration) o; return expireSnapshotEnabled == that.expireSnapshotEnabled && snapshotTTLMinutes == that.snapshotTTLMinutes + && snapshotMinCount == that.snapshotMinCount && changeDataTTLMinutes == that.changeDataTTLMinutes && cleanOrphanEnabled == that.cleanOrphanEnabled && orphanExistingMinutes == that.orphanExistingMinutes @@ -144,6 +155,7 @@ public int hashCode() { return Objects.hashCode( expireSnapshotEnabled, snapshotTTLMinutes, + snapshotMinCount, changeDataTTLMinutes, cleanOrphanEnabled, orphanExistingMinutes, diff --git a/amoro-format-iceberg/src/main/java/org/apache/amoro/table/TableProperties.java b/amoro-format-iceberg/src/main/java/org/apache/amoro/table/TableProperties.java index efd9e994dd..debe3f0cae 100644 --- a/amoro-format-iceberg/src/main/java/org/apache/amoro/table/TableProperties.java +++ b/amoro-format-iceberg/src/main/java/org/apache/amoro/table/TableProperties.java @@ -150,8 +150,22 @@ private TableProperties() {} public static final String CHANGE_DATA_TTL = "change.data.ttl.minutes"; public static final long CHANGE_DATA_TTL_DEFAULT = 10080; // 7 Days - public static final String BASE_SNAPSHOT_KEEP_MINUTES = "snapshot.base.keep.minutes"; - public static final long BASE_SNAPSHOT_KEEP_MINUTES_DEFAULT = 720; // 12 Hours + /** + * @deprecated Use {@link TableProperties#SNAPSHOT_KEEP_DURATION } instead; will be removed in + * 0.9.0 + */ + @Deprecated public static final String BASE_SNAPSHOT_KEEP_MINUTES = "snapshot.base.keep.minutes"; + /** + * @deprecated Use {@link TableProperties#SNAPSHOT_KEEP_DURATION_DEFAULT } instead; will be + * removed in 0.9.0 + */ + @Deprecated public static final long BASE_SNAPSHOT_KEEP_MINUTES_DEFAULT = 720; // 12 Hours + + public static final String SNAPSHOT_KEEP_DURATION = "snapshot.keep.duration"; + public static final String SNAPSHOT_KEEP_DURATION_DEFAULT = "720min"; // 12 Hours + + public static final String SNAPSHOT_MIN_COUNT = "snapshot.keep.min-count"; + public static final int SNAPSHOT_MIN_COUNT_DEFAULT = 1; public static final String ENABLE_ORPHAN_CLEAN = "clean-orphan-file.enabled"; public static final boolean ENABLE_ORPHAN_CLEAN_DEFAULT = false; diff --git a/amoro-format-iceberg/src/main/java/org/apache/amoro/utils/CompatiblePropertyUtil.java b/amoro-format-iceberg/src/main/java/org/apache/amoro/utils/CompatiblePropertyUtil.java index ab48600c58..6d68cfe7a6 100644 --- a/amoro-format-iceberg/src/main/java/org/apache/amoro/utils/CompatiblePropertyUtil.java +++ b/amoro-format-iceberg/src/main/java/org/apache/amoro/utils/CompatiblePropertyUtil.java @@ -96,6 +96,8 @@ private static String getLegacyProperty(String property) { return TableProperties.ENABLE_ORPHAN_CLEAN_LEGACY; case TableProperties.ENABLE_LOG_STORE: return TableProperties.ENABLE_LOG_STORE_LEGACY; + case TableProperties.SNAPSHOT_KEEP_DURATION: + return TableProperties.BASE_SNAPSHOT_KEEP_MINUTES; default: return null; } diff --git a/docs/user-guides/configurations.md b/docs/user-guides/configurations.md index aa1bda9877..5bcf7afbbd 100644 --- a/docs/user-guides/configurations.md +++ b/docs/user-guides/configurations.md @@ -69,7 +69,8 @@ Data-cleaning configurations are applicable to both Iceberg Format and Mixed str |---------------------------------------------|------------------|-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | table-expire.enabled | true | Enables periodically expire table | | change.data.ttl.minutes | 10080(7 days) | Time to live in minutes for data of ChangeStore | -| snapshot.base.keep.minutes | 720(12 hours) | Table-Expiration keeps the latest snapshots of BaseStore within a specified time in minutes | +| snapshot.keep.duration | 720min(12 hours) | Table-Expiration keeps the latest snapshots within a specified duration | +| snapshot.keep.min-count | 1 | Minimum number of snapshots retained for table expiration | | clean-orphan-file.enabled | false | Enables periodically clean orphan files | | clean-orphan-file.min-existing-time-minutes | 2880(2 days) | Cleaning orphan files keeps the files modified within a specified time in minutes | | clean-dangling-delete-files.enabled | true | Whether to enable cleaning of dangling delete files | From bc2e33e99fa643a86f2578b1b7c29587e9135571 Mon Sep 17 00:00:00 2001 From: Marig_Weizhi Date: Mon, 9 Sep 2024 16:06:48 +0800 Subject: [PATCH 2/3] [AMORO-3180] Implement health score calculation for tables (#3181) implement health score calculation for tables --- .../dashboard/controller/TableController.java | 5 ++ .../plan/AbstractPartitionPlan.java | 5 ++ .../plan/CommonPartitionEvaluator.java | 53 +++++++++++++++++++ .../optimizing/plan/OptimizingEvaluator.java | 9 ++++ .../optimizing/plan/PartitionEvaluator.java | 3 ++ .../amoro/server/table/TableRuntime.java | 4 ++ .../server/table/TableSummaryMetrics.java | 17 +++++- .../server/table/TestTableSummaryMetrics.java | 5 ++ .../amoro/table/descriptor/TableSummary.java | 10 ++++ amoro-web/mock/modules/table.js | 5 +- amoro-web/src/language/en.ts | 1 + amoro-web/src/language/zh.ts | 1 + amoro-web/src/types/common.type.ts | 2 + amoro-web/src/views/tables/index.vue | 13 +++++ docs/user-guides/metrics.md | 31 +++++------ 15 files changed, 146 insertions(+), 18 deletions(-) diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/dashboard/controller/TableController.java b/amoro-ams/src/main/java/org/apache/amoro/server/dashboard/controller/TableController.java index b83be317b4..43a71e785d 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/dashboard/controller/TableController.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/dashboard/controller/TableController.java @@ -48,6 +48,7 @@ import org.apache.amoro.server.dashboard.utils.AmsUtil; import org.apache.amoro.server.dashboard.utils.CommonUtil; import org.apache.amoro.server.optimizing.OptimizingStatus; +import org.apache.amoro.server.optimizing.plan.OptimizingEvaluator; import org.apache.amoro.server.table.TableRuntime; import org.apache.amoro.server.table.TableService; import org.apache.amoro.shade.guava32.com.google.common.base.Function; @@ -149,6 +150,10 @@ public void getTableDetail(Context ctx) { if (serverTableIdentifier.isPresent()) { TableRuntime tableRuntime = tableService.getRuntime(serverTableIdentifier.get()); tableSummary.setOptimizingStatus(tableRuntime.getOptimizingStatus().name()); + OptimizingEvaluator.PendingInput tableRuntimeSummary = tableRuntime.getTableSummary(); + if (tableRuntimeSummary != null) { + tableSummary.setHealthScore(tableRuntimeSummary.getHealthScore()); + } } else { tableSummary.setOptimizingStatus(OptimizingStatus.IDLE.name()); } diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/AbstractPartitionPlan.java b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/AbstractPartitionPlan.java index 5a87cd9e27..4551decd77 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/AbstractPartitionPlan.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/AbstractPartitionPlan.java @@ -181,6 +181,11 @@ protected interface TaskSplitter { List splitTasks(int targetTaskCount); } + @Override + public int getHealthScore() { + return evaluator.getHealthScore(); + } + @Override public int getFragmentFileCount() { return evaluator().getFragmentFileCount(); diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/CommonPartitionEvaluator.java b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/CommonPartitionEvaluator.java index ffdb2660ce..013f11f87a 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/CommonPartitionEvaluator.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/CommonPartitionEvaluator.java @@ -369,6 +369,59 @@ public boolean anyDeleteExist() { return equalityDeleteFileCount > 0 || posDeleteFileCount > 0; } + @Override + public int getHealthScore() { + long dataFilesSize = getFragmentFileSize() + getSegmentFileSize(); + long dataFiles = getFragmentFileCount() + getSegmentFileCount(); + long dataRecords = getFragmentFileRecords() + getSegmentFileRecords(); + + double averageDataFileSize = getNormalizedRatio(dataFilesSize, dataFiles); + double eqDeleteRatio = getNormalizedRatio(equalityDeleteFileRecords, dataRecords); + double posDeleteRatio = getNormalizedRatio(posDeleteFileRecords, dataRecords); + + double tablePenaltyFactor = getTablePenaltyFactor(dataFiles, dataFilesSize); + return (int) + Math.ceil( + 100 + - tablePenaltyFactor + * (40 * getSmallFilePenaltyFactor(averageDataFileSize) + + 40 * getEqDeletePenaltyFactor(eqDeleteRatio) + + 20 * getPosDeletePenaltyFactor(posDeleteRatio))); + } + + private double getEqDeletePenaltyFactor(double eqDeleteRatio) { + double eqDeleteRatioThreshold = config.getMajorDuplicateRatio(); + return getNormalizedRatio(eqDeleteRatio, eqDeleteRatioThreshold); + } + + private double getPosDeletePenaltyFactor(double posDeleteRatio) { + double posDeleteRatioThreshold = config.getMajorDuplicateRatio() * 2; + return getNormalizedRatio(posDeleteRatio, posDeleteRatioThreshold); + } + + private double getSmallFilePenaltyFactor(double averageDataFileSize) { + return 1 - getNormalizedRatio(averageDataFileSize, minTargetSize); + } + + private double getTablePenaltyFactor(long dataFiles, long dataFilesSize) { + // if the number of table files is less than or equal to 1, + // there is no penalty, i.e., the table is considered to be perfectly healthy + if (dataFiles <= 1) { + return 0; + } + // The small table has very little impact on performance, + // so there is only a small penalty + return getNormalizedRatio(dataFiles, config.getMinorLeastFileCount()) + * getNormalizedRatio(dataFilesSize, config.getTargetSize()); + } + + private double getNormalizedRatio(double numerator, double denominator) { + if (denominator <= 0) { + return 0; + } + return Math.min(numerator, denominator) / denominator; + } + @Override public int getFragmentFileCount() { return fragmentFileCount; diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/OptimizingEvaluator.java b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/OptimizingEvaluator.java index 7ce4c3e2dd..765bdf3bb1 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/OptimizingEvaluator.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/OptimizingEvaluator.java @@ -200,10 +200,12 @@ public static class PendingInput { private long equalityDeleteBytes = 0L; private long equalityDeleteFileRecords = 0L; private long positionalDeleteFileRecords = 0L; + private int healthScore = -1; // -1 means not calculated public PendingInput() {} public PendingInput(Collection evaluators) { + double totalHealthScore = 0; for (PartitionEvaluator evaluator : evaluators) { partitions .computeIfAbsent(evaluator.getPartition().first(), ignore -> Sets.newHashSet()) @@ -217,7 +219,9 @@ public PendingInput(Collection evaluators) { equalityDeleteBytes += evaluator.getEqualityDeleteFileSize(); equalityDeleteFileRecords += evaluator.getEqualityDeleteFileRecords(); equalityDeleteFileCount += evaluator.getEqualityDeleteFileCount(); + totalHealthScore += evaluator.getHealthScore(); } + healthScore = (int) Math.ceil(totalHealthScore / evaluators.size()); } public Map> getPartitions() { @@ -260,6 +264,10 @@ public long getPositionalDeleteFileRecords() { return positionalDeleteFileRecords; } + public int getHealthScore() { + return healthScore; + } + @Override public String toString() { return MoreObjects.toStringHelper(this) @@ -273,6 +281,7 @@ public String toString() { .add("equalityDeleteBytes", equalityDeleteBytes) .add("equalityDeleteFileRecords", equalityDeleteFileRecords) .add("positionalDeleteFileRecords", positionalDeleteFileRecords) + .add("healthScore", healthScore) .toString(); } } diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/PartitionEvaluator.java b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/PartitionEvaluator.java index 974703bf07..cc99b2c28b 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/PartitionEvaluator.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/optimizing/plan/PartitionEvaluator.java @@ -81,6 +81,9 @@ interface Weight extends Comparable {} */ OptimizingType getOptimizingType(); + /** Get health score of this partition. */ + int getHealthScore(); + /** Get the count of fragment files involved in optimizing. */ int getFragmentFileCount(); diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/table/TableRuntime.java b/amoro-ams/src/main/java/org/apache/amoro/server/table/TableRuntime.java index 9884d2e785..73855d3a0f 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/table/TableRuntime.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/table/TableRuntime.java @@ -392,6 +392,10 @@ public OptimizingEvaluator.PendingInput getPendingInput() { return pendingInput; } + public OptimizingEvaluator.PendingInput getTableSummary() { + return tableSummary; + } + private boolean updateConfigInternal(Map properties) { TableConfiguration newTableConfig = TableConfigurations.parseTableConfig(properties); if (tableConfiguration.equals(newTableConfig)) { diff --git a/amoro-ams/src/main/java/org/apache/amoro/server/table/TableSummaryMetrics.java b/amoro-ams/src/main/java/org/apache/amoro/server/table/TableSummaryMetrics.java index 5cd16e26fb..7d4dfd8601 100644 --- a/amoro-ams/src/main/java/org/apache/amoro/server/table/TableSummaryMetrics.java +++ b/amoro-ams/src/main/java/org/apache/amoro/server/table/TableSummaryMetrics.java @@ -112,13 +112,20 @@ public class TableSummaryMetrics { .withTags("catalog", "database", "table") .build(); - // table summary snapshots number metrics + // table summary snapshots number metric public static final MetricDefine TABLE_SUMMARY_SNAPSHOTS = defineGauge("table_summary_snapshots") .withDescription("Number of snapshots in the table") .withTags("catalog", "database", "table") .build(); + // table summary health score metric + public static final MetricDefine TABLE_SUMMARY_HEALTH_SCORE = + defineGauge("table_summary_health_score") + .withDescription("Health score of the table") + .withTags("catalog", "database", "table") + .build(); + private final ServerTableIdentifier identifier; private final List registeredMetricKeys = Lists.newArrayList(); private MetricRegistry globalRegistry; @@ -136,6 +143,7 @@ public class TableSummaryMetrics { private long dataFilesRecords = 0L; private long equalityDeleteFilesRecords = 0L; private long snapshots = 0L; + private long healthScore = 0L; public TableSummaryMetrics(ServerTableIdentifier identifier) { this.identifier = identifier; @@ -191,9 +199,12 @@ public void register(MetricRegistry registry) { TABLE_SUMMARY_EQUALITY_DELETE_FILES_RECORDS, (Gauge) () -> equalityDeleteFilesRecords); - // register snapshots number metrics + // register snapshots number metric registerMetric(registry, TABLE_SUMMARY_SNAPSHOTS, (Gauge) () -> snapshots); + // register health score metric + registerMetric(registry, TABLE_SUMMARY_HEALTH_SCORE, (Gauge) () -> healthScore); + globalRegistry = registry; } } @@ -231,6 +242,8 @@ public void refresh(OptimizingEvaluator.PendingInput tableSummary) { positionDeleteFilesRecords = tableSummary.getPositionalDeleteFileRecords(); dataFilesRecords = tableSummary.getDataFileRecords(); equalityDeleteFilesRecords = tableSummary.getEqualityDeleteFileRecords(); + + healthScore = tableSummary.getHealthScore(); } public void refreshSnapshots(MixedTable table) { diff --git a/amoro-ams/src/test/java/org/apache/amoro/server/table/TestTableSummaryMetrics.java b/amoro-ams/src/test/java/org/apache/amoro/server/table/TestTableSummaryMetrics.java index 1b4632b29c..e0b992e784 100644 --- a/amoro-ams/src/test/java/org/apache/amoro/server/table/TestTableSummaryMetrics.java +++ b/amoro-ams/src/test/java/org/apache/amoro/server/table/TestTableSummaryMetrics.java @@ -24,6 +24,7 @@ import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_EQUALITY_DELETE_FILES; import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_EQUALITY_DELETE_FILES_RECORDS; import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_EQUALITY_DELETE_FILES_SIZE; +import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_HEALTH_SCORE; import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_POSITION_DELETE_FILES; import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_POSITION_DELETE_FILES_RECORDS; import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_POSITION_DELETE_FILES_SIZE; @@ -170,6 +171,8 @@ public void testTableSummaryMetrics() { Gauge snapshots = getMetric(metrics, identifier, TABLE_SUMMARY_SNAPSHOTS); + Gauge healthScore = getMetric(metrics, identifier, TABLE_SUMMARY_HEALTH_SCORE); + Assertions.assertEquals(0, totalFiles.getValue()); Assertions.assertEquals(0, dataFiles.getValue()); Assertions.assertEquals(0, posDelFiles.getValue()); @@ -184,6 +187,7 @@ public void testTableSummaryMetrics() { Assertions.assertEquals(0, dataRecords.getValue()); Assertions.assertEquals(0, posDelRecords.getValue()); Assertions.assertEquals(0, eqDelRecords.getValue()); + Assertions.assertEquals(0, healthScore.getValue()); // refresh metrics initTableWithFiles(); @@ -202,6 +206,7 @@ public void testTableSummaryMetrics() { Assertions.assertTrue(posDelRecords.getValue() > 0); Assertions.assertTrue(snapshots.getValue() > 0); + Assertions.assertTrue(healthScore.getValue() > 0); } private Gauge getMetric( diff --git a/amoro-common/src/main/java/org/apache/amoro/table/descriptor/TableSummary.java b/amoro-common/src/main/java/org/apache/amoro/table/descriptor/TableSummary.java index 95e611e7fa..641f124e2c 100644 --- a/amoro-common/src/main/java/org/apache/amoro/table/descriptor/TableSummary.java +++ b/amoro-common/src/main/java/org/apache/amoro/table/descriptor/TableSummary.java @@ -26,6 +26,7 @@ public class TableSummary { private String tableFormat; private long records; private String optimizingStatus; + private int healthScore = -1; // -1 means not calculated public TableSummary() {} @@ -71,4 +72,13 @@ public String getOptimizingStatus() { public void setOptimizingStatus(String optimizingStatus) { this.optimizingStatus = optimizingStatus; } + + /** Current table health score */ + public int getHealthScore() { + return healthScore; + } + + public void setHealthScore(int healthScore) { + this.healthScore = healthScore; + } } diff --git a/amoro-web/mock/modules/table.js b/amoro-web/mock/modules/table.js index 0e94541caf..a3e61a14d4 100644 --- a/amoro-web/mock/modules/table.js +++ b/amoro-web/mock/modules/table.js @@ -76,7 +76,10 @@ export default [ "file": 2, "size": "1.79KB", "tableFormat": "Iceberg(V1)", - "averageFile": "918.00B" + "averageFile": "918.00B", + "records":24, + "optimizingStatus":"IDLE", + "healthScore":100, }, "baseLocation": "/mnt/dfs/4/warehouse_public/db/user", "filter": null, diff --git a/amoro-web/src/language/en.ts b/amoro-web/src/language/en.ts index 60d494f0dd..06ba5513ef 100644 --- a/amoro-web/src/language/en.ts +++ b/amoro-web/src/language/en.ts @@ -17,6 +17,7 @@ */ export default { + healthScore: 'Health Score', overview: 'Overview', catalogs: 'Catalogs', catalog: 'Catalog', diff --git a/amoro-web/src/language/zh.ts b/amoro-web/src/language/zh.ts index 6bafedd9e6..0936135cc3 100644 --- a/amoro-web/src/language/zh.ts +++ b/amoro-web/src/language/zh.ts @@ -17,6 +17,7 @@ */ export default { + healthScore: '健康度', overview: '总览', catalogs: '目录', catalog: '目录', diff --git a/amoro-web/src/types/common.type.ts b/amoro-web/src/types/common.type.ts index 10b26d8879..4af04c14dc 100644 --- a/amoro-web/src/types/common.type.ts +++ b/amoro-web/src/types/common.type.ts @@ -60,6 +60,7 @@ export interface IKeyAndValue { } export interface IBaseDetailInfo { optimizingStatus: string + records: string tableType: string tableName: string createTime: string @@ -68,6 +69,7 @@ export interface IBaseDetailInfo { averageFile: string tableFormat: string hasPartition: boolean + healthScore: number } export interface DetailColumnItem { diff --git a/amoro-web/src/views/tables/index.vue b/amoro-web/src/views/tables/index.vue index 6a911e908e..18fa8fca73 100644 --- a/amoro-web/src/views/tables/index.vue +++ b/amoro-web/src/views/tables/index.vue @@ -54,11 +54,13 @@ export default defineComponent({ isSecondaryNav: false, baseInfo: { optimizingStatus: '', + records: '', tableType: '', tableName: '', createTime: '', tableFormat: '', hasPartition: false, + healthScore: -1, } as IBaseDetailInfo, detailLoaded: false, }) @@ -150,6 +152,10 @@ export default defineComponent({

{{ $t('tableFormat') }}: {{ baseInfo.tableFormat }}

+ +

+ {{ $t('healthScore') }}: {{ baseInfo.healthScore == null || baseInfo.healthScore < 0 ? 'N/A' : baseInfo.healthScore }} +

@@ -178,9 +184,11 @@ export default defineComponent({ border: 1px solid #e8e8f0; padding: 12px 0; min-height: 100%; + .create-time { margin-top: 12px; } + .tables-menu-wrap { position: fixed; width: 100%; @@ -189,6 +197,7 @@ export default defineComponent({ left: 200px; z-index: 100; } + .table-name { font-size: 24px; line-height: 1.5; @@ -196,16 +205,20 @@ export default defineComponent({ max-width: 100%; padding-left: 24px; } + .table-info { padding: 12px 24px 0 24px; + .text-color { color: #7CB305; } } + .table-edit { font-size: 18px; padding-right: 12px; } + :deep(.ant-tabs-nav) { padding-left: 12px; margin-bottom: 0; diff --git a/docs/user-guides/metrics.md b/docs/user-guides/metrics.md index cc86bcb0d9..d65ce4c9ed 100644 --- a/docs/user-guides/metrics.md +++ b/docs/user-guides/metrics.md @@ -100,18 +100,19 @@ Amoro has supported built-in metrics to measure status of table self-optimizing ## table summary metrics -| Metric Name | Type | Tags | Description | -|---------------------------------------------|---------|--------------------------|-----------------------------------------------| -| table_summary_total_files | Gauge | catalog, database, table | Total number of files in the table | -| table_summary_data_files | Gauge | catalog, database, table | Number of data files in the table | -| table_summary_equality_delete_files | Gauge | catalog, database, table | Number of equality delete files in the table | -| table_summary_position_delete_files | Gauge | catalog, database, table | Number of position delete files in the table | -| table_summary_total_files_size | Gauge | catalog, database, table | Total size of files in the table | -| table_summary_data_files_size | Gauge | catalog, database, table | Size of data files in the table | -| table_summary_equality_delete_files_size | Gauge | catalog, database, table | Size of equality delete files in the table | -| table_summary_position_delete_files_size | Gauge | catalog, database, table | Size of position delete files in the table | -| table_summary_total_records | Gauge | catalog, database, table | Total records in the table | -| table_summary_data_files_records | Gauge | catalog, database, table | Records of data files in the table | -| table_summary_equality_delete_files_records | Gauge | catalog, database, table | Records of equality delete files in the table | -| table_summary_position_delete_files_records | Gauge | catalog, database, table | Records of position delete files in the table | -| table_summary_snapshots | Gauge | catalog, database, table | Number of snapshots in the table | \ No newline at end of file +| Metric Name | Type | Tags | Description | +|-----------------------------------------------|---------|--------------------------|-----------------------------------------------| +| table_summary_total_files | Gauge | catalog, database, table | Total number of files in the table | +| table_summary_data_files | Gauge | catalog, database, table | Number of data files in the table | +| table_summary_equality_delete_files | Gauge | catalog, database, table | Number of equality delete files in the table | +| table_summary_position_delete_files | Gauge | catalog, database, table | Number of position delete files in the table | +| table_summary_total_files_size | Gauge | catalog, database, table | Total size of files in the table | +| table_summary_data_files_size | Gauge | catalog, database, table | Size of data files in the table | +| table_summary_equality_delete_files_size | Gauge | catalog, database, table | Size of equality delete files in the table | +| table_summary_position_delete_files_size | Gauge | catalog, database, table | Size of position delete files in the table | +| table_summary_total_records | Gauge | catalog, database, table | Total records in the table | +| table_summary_data_files_records | Gauge | catalog, database, table | Records of data files in the table | +| table_summary_equality_delete_files_records | Gauge | catalog, database, table | Records of equality delete files in the table | +| table_summary_position_delete_files_records | Gauge | catalog, database, table | Records of position delete files in the table | +| table_summary_snapshots | Gauge | catalog, database, table | Number of snapshots in the table | +| table_summary_health_score | Gauge | catalog, database, table | Health score of the table | \ No newline at end of file From b52f64d7784441dd2496bae6844cfe55a3403088 Mon Sep 17 00:00:00 2001 From: Congxian Qiu Date: Tue, 10 Sep 2024 15:18:19 +0800 Subject: [PATCH 3/3] [AMORO-3145] Filter out info log for CodecPool in optimizer (#3185) [AMORO-3145] Filter out info log for CodecPool Currently there will be many logs for (de)compressor from CodecPool, and seems there is no other meaningful info log from this class, so we change the log level from CodecPool from info to warn to filter out these logs This only affect the local optimizer, and user need to change the configuration if they use flink/spark optimzier. --- dist/src/main/amoro-bin/conf/optimize/log4j2.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/dist/src/main/amoro-bin/conf/optimize/log4j2.xml b/dist/src/main/amoro-bin/conf/optimize/log4j2.xml index 6e4b9a13db..8ce1769969 100644 --- a/dist/src/main/amoro-bin/conf/optimize/log4j2.xml +++ b/dist/src/main/amoro-bin/conf/optimize/log4j2.xml @@ -101,6 +101,7 @@ +