Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[AMORO-3180] Implement health score calculation for tables #3181

Merged
merged 1 commit into from
Sep 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.apache.amoro.server.dashboard.utils.AmsUtil;
import org.apache.amoro.server.dashboard.utils.CommonUtil;
import org.apache.amoro.server.optimizing.OptimizingStatus;
import org.apache.amoro.server.optimizing.plan.OptimizingEvaluator;
import org.apache.amoro.server.table.TableRuntime;
import org.apache.amoro.server.table.TableService;
import org.apache.amoro.shade.guava32.com.google.common.base.Function;
Expand Down Expand Up @@ -149,6 +150,10 @@ public void getTableDetail(Context ctx) {
if (serverTableIdentifier.isPresent()) {
TableRuntime tableRuntime = tableService.getRuntime(serverTableIdentifier.get());
tableSummary.setOptimizingStatus(tableRuntime.getOptimizingStatus().name());
OptimizingEvaluator.PendingInput tableRuntimeSummary = tableRuntime.getTableSummary();
if (tableRuntimeSummary != null) {
tableSummary.setHealthScore(tableRuntimeSummary.getHealthScore());
}
} else {
tableSummary.setOptimizingStatus(OptimizingStatus.IDLE.name());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,11 @@ protected interface TaskSplitter {
List<SplitTask> splitTasks(int targetTaskCount);
}

@Override
public int getHealthScore() {
return evaluator.getHealthScore();
}

@Override
public int getFragmentFileCount() {
return evaluator().getFragmentFileCount();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,59 @@ public boolean anyDeleteExist() {
return equalityDeleteFileCount > 0 || posDeleteFileCount > 0;
}

@Override
public int getHealthScore() {
long dataFilesSize = getFragmentFileSize() + getSegmentFileSize();
long dataFiles = getFragmentFileCount() + getSegmentFileCount();
long dataRecords = getFragmentFileRecords() + getSegmentFileRecords();

double averageDataFileSize = getNormalizedRatio(dataFilesSize, dataFiles);
double eqDeleteRatio = getNormalizedRatio(equalityDeleteFileRecords, dataRecords);
double posDeleteRatio = getNormalizedRatio(posDeleteFileRecords, dataRecords);

double tablePenaltyFactor = getTablePenaltyFactor(dataFiles, dataFilesSize);
return (int)
Math.ceil(
100
- tablePenaltyFactor
* (40 * getSmallFilePenaltyFactor(averageDataFileSize)
+ 40 * getEqDeletePenaltyFactor(eqDeleteRatio)
+ 20 * getPosDeletePenaltyFactor(posDeleteRatio)));
}

private double getEqDeletePenaltyFactor(double eqDeleteRatio) {
double eqDeleteRatioThreshold = config.getMajorDuplicateRatio();
return getNormalizedRatio(eqDeleteRatio, eqDeleteRatioThreshold);
zhoujinsong marked this conversation as resolved.
Show resolved Hide resolved
}

private double getPosDeletePenaltyFactor(double posDeleteRatio) {
double posDeleteRatioThreshold = config.getMajorDuplicateRatio() * 2;
zhoujinsong marked this conversation as resolved.
Show resolved Hide resolved
return getNormalizedRatio(posDeleteRatio, posDeleteRatioThreshold);
}

private double getSmallFilePenaltyFactor(double averageDataFileSize) {
return 1 - getNormalizedRatio(averageDataFileSize, minTargetSize);
zhoujinsong marked this conversation as resolved.
Show resolved Hide resolved
}

private double getTablePenaltyFactor(long dataFiles, long dataFilesSize) {
// if the number of table files is less than or equal to 1,
// there is no penalty, i.e., the table is considered to be perfectly healthy
if (dataFiles <= 1) {
return 0;
}
// The small table has very little impact on performance,
// so there is only a small penalty
return getNormalizedRatio(dataFiles, config.getMinorLeastFileCount())
zhoujinsong marked this conversation as resolved.
Show resolved Hide resolved
* getNormalizedRatio(dataFilesSize, config.getTargetSize());
}

private double getNormalizedRatio(double numerator, double denominator) {
if (denominator <= 0) {
return 0;
}
return Math.min(numerator, denominator) / denominator;
}

@Override
public int getFragmentFileCount() {
return fragmentFileCount;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -200,10 +200,12 @@ public static class PendingInput {
private long equalityDeleteBytes = 0L;
private long equalityDeleteFileRecords = 0L;
private long positionalDeleteFileRecords = 0L;
private int healthScore = -1; // -1 means not calculated

public PendingInput() {}

public PendingInput(Collection<PartitionEvaluator> evaluators) {
double totalHealthScore = 0;
for (PartitionEvaluator evaluator : evaluators) {
partitions
.computeIfAbsent(evaluator.getPartition().first(), ignore -> Sets.newHashSet())
Expand All @@ -217,7 +219,9 @@ public PendingInput(Collection<PartitionEvaluator> evaluators) {
equalityDeleteBytes += evaluator.getEqualityDeleteFileSize();
equalityDeleteFileRecords += evaluator.getEqualityDeleteFileRecords();
equalityDeleteFileCount += evaluator.getEqualityDeleteFileCount();
totalHealthScore += evaluator.getHealthScore();
}
healthScore = (int) Math.ceil(totalHealthScore / evaluators.size());
}

public Map<Integer, Set<StructLike>> getPartitions() {
Expand Down Expand Up @@ -260,6 +264,10 @@ public long getPositionalDeleteFileRecords() {
return positionalDeleteFileRecords;
}

public int getHealthScore() {
return healthScore;
}

@Override
public String toString() {
return MoreObjects.toStringHelper(this)
Expand All @@ -273,6 +281,7 @@ public String toString() {
.add("equalityDeleteBytes", equalityDeleteBytes)
.add("equalityDeleteFileRecords", equalityDeleteFileRecords)
.add("positionalDeleteFileRecords", positionalDeleteFileRecords)
.add("healthScore", healthScore)
.toString();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,9 @@ interface Weight extends Comparable<Weight> {}
*/
OptimizingType getOptimizingType();

/** Get health score of this partition. */
int getHealthScore();

/** Get the count of fragment files involved in optimizing. */
int getFragmentFileCount();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -392,6 +392,10 @@ public OptimizingEvaluator.PendingInput getPendingInput() {
return pendingInput;
}

public OptimizingEvaluator.PendingInput getTableSummary() {
return tableSummary;
}

private boolean updateConfigInternal(Map<String, String> properties) {
TableConfiguration newTableConfig = TableConfigurations.parseTableConfig(properties);
if (tableConfiguration.equals(newTableConfig)) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,13 +112,20 @@ public class TableSummaryMetrics {
.withTags("catalog", "database", "table")
.build();

// table summary snapshots number metrics
// table summary snapshots number metric
public static final MetricDefine TABLE_SUMMARY_SNAPSHOTS =
defineGauge("table_summary_snapshots")
.withDescription("Number of snapshots in the table")
.withTags("catalog", "database", "table")
.build();

// table summary health score metric
public static final MetricDefine TABLE_SUMMARY_HEALTH_SCORE =
defineGauge("table_summary_health_score")
.withDescription("Health score of the table")
.withTags("catalog", "database", "table")
.build();

private final ServerTableIdentifier identifier;
private final List<MetricKey> registeredMetricKeys = Lists.newArrayList();
private MetricRegistry globalRegistry;
Expand All @@ -136,6 +143,7 @@ public class TableSummaryMetrics {
private long dataFilesRecords = 0L;
private long equalityDeleteFilesRecords = 0L;
private long snapshots = 0L;
private long healthScore = 0L;

public TableSummaryMetrics(ServerTableIdentifier identifier) {
this.identifier = identifier;
Expand Down Expand Up @@ -191,9 +199,12 @@ public void register(MetricRegistry registry) {
TABLE_SUMMARY_EQUALITY_DELETE_FILES_RECORDS,
(Gauge<Long>) () -> equalityDeleteFilesRecords);

// register snapshots number metrics
// register snapshots number metric
registerMetric(registry, TABLE_SUMMARY_SNAPSHOTS, (Gauge<Long>) () -> snapshots);

// register health score metric
registerMetric(registry, TABLE_SUMMARY_HEALTH_SCORE, (Gauge<Long>) () -> healthScore);

globalRegistry = registry;
}
}
Expand Down Expand Up @@ -231,6 +242,8 @@ public void refresh(OptimizingEvaluator.PendingInput tableSummary) {
positionDeleteFilesRecords = tableSummary.getPositionalDeleteFileRecords();
dataFilesRecords = tableSummary.getDataFileRecords();
equalityDeleteFilesRecords = tableSummary.getEqualityDeleteFileRecords();

healthScore = tableSummary.getHealthScore();
}

public void refreshSnapshots(MixedTable table) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_EQUALITY_DELETE_FILES;
import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_EQUALITY_DELETE_FILES_RECORDS;
import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_EQUALITY_DELETE_FILES_SIZE;
import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_HEALTH_SCORE;
import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_POSITION_DELETE_FILES;
import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_POSITION_DELETE_FILES_RECORDS;
import static org.apache.amoro.server.table.TableSummaryMetrics.TABLE_SUMMARY_POSITION_DELETE_FILES_SIZE;
Expand Down Expand Up @@ -170,6 +171,8 @@ public void testTableSummaryMetrics() {

Gauge<Long> snapshots = getMetric(metrics, identifier, TABLE_SUMMARY_SNAPSHOTS);

Gauge<Long> healthScore = getMetric(metrics, identifier, TABLE_SUMMARY_HEALTH_SCORE);

Assertions.assertEquals(0, totalFiles.getValue());
Assertions.assertEquals(0, dataFiles.getValue());
Assertions.assertEquals(0, posDelFiles.getValue());
Expand All @@ -184,6 +187,7 @@ public void testTableSummaryMetrics() {
Assertions.assertEquals(0, dataRecords.getValue());
Assertions.assertEquals(0, posDelRecords.getValue());
Assertions.assertEquals(0, eqDelRecords.getValue());
Assertions.assertEquals(0, healthScore.getValue());

// refresh metrics
initTableWithFiles();
Expand All @@ -202,6 +206,7 @@ public void testTableSummaryMetrics() {
Assertions.assertTrue(posDelRecords.getValue() > 0);

Assertions.assertTrue(snapshots.getValue() > 0);
Assertions.assertTrue(healthScore.getValue() > 0);
}

private Gauge<Long> getMetric(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ public class TableSummary {
private String tableFormat;
private long records;
private String optimizingStatus;
private int healthScore = -1; // -1 means not calculated

public TableSummary() {}

Expand Down Expand Up @@ -71,4 +72,13 @@ public String getOptimizingStatus() {
public void setOptimizingStatus(String optimizingStatus) {
this.optimizingStatus = optimizingStatus;
}

/** Current table health score */
public int getHealthScore() {
return healthScore;
}

public void setHealthScore(int healthScore) {
this.healthScore = healthScore;
}
}
5 changes: 4 additions & 1 deletion amoro-web/mock/modules/table.js
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,10 @@ export default [
"file": 2,
"size": "1.79KB",
"tableFormat": "Iceberg(V1)",
"averageFile": "918.00B"
"averageFile": "918.00B",
"records":24,
"optimizingStatus":"IDLE",
"healthScore":100,
},
"baseLocation": "/mnt/dfs/4/warehouse_public/db/user",
"filter": null,
Expand Down
1 change: 1 addition & 0 deletions amoro-web/src/language/en.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/

export default {
healthScore: 'Health Score',
overview: 'Overview',
catalogs: 'Catalogs',
catalog: 'Catalog',
Expand Down
1 change: 1 addition & 0 deletions amoro-web/src/language/zh.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
*/

export default {
healthScore: '健康度',
overview: '总览',
catalogs: '目录',
catalog: '目录',
Expand Down
2 changes: 2 additions & 0 deletions amoro-web/src/types/common.type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export interface IKeyAndValue {
}
export interface IBaseDetailInfo {
optimizingStatus: string
records: string
tableType: string
tableName: string
createTime: string
Expand All @@ -68,6 +69,7 @@ export interface IBaseDetailInfo {
averageFile: string
tableFormat: string
hasPartition: boolean
healthScore: number
}

export interface DetailColumnItem {
Expand Down
13 changes: 13 additions & 0 deletions amoro-web/src/views/tables/index.vue
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,13 @@ export default defineComponent({
isSecondaryNav: false,
baseInfo: {
optimizingStatus: '',
records: '',
tableType: '',
tableName: '',
createTime: '',
tableFormat: '',
hasPartition: false,
healthScore: -1,
} as IBaseDetailInfo,
detailLoaded: false,
})
Expand Down Expand Up @@ -150,6 +152,10 @@ export default defineComponent({
<a-divider type="vertical" />
</template>
<p>{{ $t('tableFormat') }}: <span class="text-color">{{ baseInfo.tableFormat }}</span></p>
<a-divider type="vertical" />
<p>
{{ $t('healthScore') }}: <span class="text-color">{{ baseInfo.healthScore == null || baseInfo.healthScore < 0 ? 'N/A' : baseInfo.healthScore }}</span>
</p>
</div>
</div>
</div>
Expand Down Expand Up @@ -178,9 +184,11 @@ export default defineComponent({
border: 1px solid #e8e8f0;
padding: 12px 0;
min-height: 100%;

.create-time {
margin-top: 12px;
}

.tables-menu-wrap {
position: fixed;
width: 100%;
Expand All @@ -189,23 +197,28 @@ export default defineComponent({
left: 200px;
z-index: 100;
}

.table-name {
font-size: 24px;
line-height: 1.5;
margin-right: 16px;
max-width: 100%;
padding-left: 24px;
}

.table-info {
padding: 12px 24px 0 24px;

.text-color {
color: #7CB305;
}
}

.table-edit {
font-size: 18px;
padding-right: 12px;
}

:deep(.ant-tabs-nav) {
padding-left: 12px;
margin-bottom: 0;
Expand Down
Loading
Loading