From 5e717ab79fee90752fb092470e8554d87eb469e5 Mon Sep 17 00:00:00 2001 From: yud8 Date: Tue, 13 May 2025 10:38:29 +0800 Subject: [PATCH 01/13] Optimize JVM memory monitoring metrics --- .../common/model/BaseHeartBeat.java | 4 ++ .../master/registry/MasterHeartBeatTask.java | 4 ++ .../meter/metrics/DefaultMetricsProvider.java | 46 +++++++++++++++++-- .../meter/metrics/SystemMetrics.java | 4 ++ .../worker/task/WorkerHeartBeatTask.java | 4 ++ 5 files changed, 59 insertions(+), 3 deletions(-) diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/BaseHeartBeat.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/BaseHeartBeat.java index 2837e5482b76..a2029a71a31c 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/BaseHeartBeat.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/BaseHeartBeat.java @@ -36,6 +36,10 @@ public class BaseHeartBeat implements HeartBeat { protected double jvmCpuUsage; protected double cpuUsage; protected double jvmMemoryUsage; + private double jvmHeapUsed; + private double jvmNonHeapUsed; + private double jvmHeapMax; + private double jvmNonHeapMax; protected double memoryUsage; protected double diskUsage; protected ServerStatus serverStatus; diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterHeartBeatTask.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterHeartBeatTask.java index 59174bac303e..328224840162 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterHeartBeatTask.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/registry/MasterHeartBeatTask.java @@ -74,6 +74,10 @@ public MasterHeartBeat getHeartBeat() { .jvmCpuUsage(systemMetrics.getJvmCpuUsagePercentage()) .cpuUsage(systemMetrics.getSystemCpuUsagePercentage()) .jvmMemoryUsage(systemMetrics.getJvmMemoryUsedPercentage()) + .jvmHeapUsed(systemMetrics.getJvmHeapUsed()) + .jvmHeapMax(systemMetrics.getJvmHeapMax()) + .jvmNonHeapUsed(systemMetrics.getJvmNonHeapUsed()) + .jvmNonHeapMax(systemMetrics.getJvmNonHeapMax()) .memoryUsage(systemMetrics.getSystemMemoryUsedPercentage()) .diskUsage(systemMetrics.getDiskUsedPercentage()) .processId(processId) diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java index 0db50ba95e88..6e596ee6fdec 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java @@ -21,6 +21,7 @@ import lombok.extern.slf4j.Slf4j; import io.micrometer.core.instrument.MeterRegistry; +import io.micrometer.core.instrument.Meter; @Slf4j public class DefaultMetricsProvider implements MetricsProvider { @@ -59,8 +60,19 @@ public SystemMetrics getSystemMetrics() { lastProcessCpuUsage = processCpuUsage; } - double jvmMemoryUsed = meterRegistry.get("jvm.memory.used").meter().measure().iterator().next().getValue(); - double jvmMemoryMax = meterRegistry.get("jvm.memory.max").meter().measure().iterator().next().getValue(); + // Calculate JVM memory usage and maximum values + double jvmHeapUsed = calculateTotalMemory(meterRegistry, "heap", "jvm.memory.used"); + double jvmNonHeapUsed = calculateTotalMemory(meterRegistry, "nonheap", "jvm.memory.used"); + + double jvmHeapMax = calculateTotalMemory(meterRegistry, "heap", "jvm.memory.max"); + double jvmNonHeapMax = calculateTotalMemory(meterRegistry, "nonheap", "jvm.memory.max"); + + // Calculate totals + double jvmMemoryUsed = jvmHeapUsed + jvmNonHeapUsed; + double jvmMemoryMax = jvmHeapMax + jvmNonHeapMax; + + // Ensure jvmMemoryMax is not zero + double jvmMemoryUsedPercentage = jvmMemoryMax > 0 ? jvmMemoryUsed / jvmMemoryMax : 0.0; long totalSystemMemory = OSUtils.getTotalSystemMemory(); long systemMemoryAvailable = OSUtils.getSystemAvailableMemoryUsed(); @@ -73,7 +85,11 @@ public SystemMetrics getSystemMetrics() { .jvmCpuUsagePercentage(processCpuUsage) .jvmMemoryUsed(jvmMemoryUsed) .jvmMemoryMax(jvmMemoryMax) - .jvmMemoryUsedPercentage(jvmMemoryUsed / jvmMemoryMax) + .jvmHeapUsed(jvmHeapUsed) + .jvmHeapMax(jvmHeapMax) + .jvmNonHeapUsed(jvmNonHeapUsed) + .jvmNonHeapMax(jvmNonHeapMax) + .jvmMemoryUsedPercentage(jvmMemoryUsedPercentage) .systemMemoryUsed(totalSystemMemory - systemMemoryAvailable) .systemMemoryMax(totalSystemMemory) .systemMemoryUsedPercentage((double) (totalSystemMemory - systemMemoryAvailable) / totalSystemMemory) @@ -85,4 +101,28 @@ public SystemMetrics getSystemMetrics() { return systemMetrics; } + /** + * Calculate the total memory usage for a specified area + * This method calculates the total memory usage by iterating over all meters in the MeterRegistry that match the given name + * It only sums up meters that have the same area tag and a value greater than 0 + * + * @param meterRegistry A MeterRegistry instance used to retrieve memory data + * @param area The memory area type ("heap" or "nonheap") + * @param name The meter name to match, used to find related meters in the MeterRegistry + * @return The total memory usage for the specified area + */ + private double calculateTotalMemory(MeterRegistry meterRegistry, String area, String name) { + double memory = 0.0; + Iterable meters = meterRegistry.find(name).meters(); + for (Meter meter : meters) { + if (area.equals(meter.getId().getTag("area"))) { + double value = meter.measure().iterator().next().getValue(); + if (value > 0) { // Ignore undefined maximum values (-1) + memory += value; + } + } + } + return memory; + } + } diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java index 6da8f8ca4ece..ca2f152eb8f3 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java @@ -36,6 +36,10 @@ public class SystemMetrics { // todo: get pod memory usage private double jvmMemoryUsed; private double jvmMemoryMax; + private double jvmHeapUsed; + private double jvmNonHeapUsed; + private double jvmHeapMax; + private double jvmNonHeapMax; private double jvmMemoryUsedPercentage; // System-Memory diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java index 01ed2f541ef0..2e157f6fd99d 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java @@ -71,6 +71,10 @@ public WorkerHeartBeat getHeartBeat() { .jvmCpuUsage(systemMetrics.getJvmCpuUsagePercentage()) .cpuUsage(systemMetrics.getSystemCpuUsagePercentage()) .jvmMemoryUsage(systemMetrics.getJvmMemoryUsedPercentage()) + .jvmHeapUsed(systemMetrics.getJvmHeapUsed()) + .jvmHeapMax(systemMetrics.getJvmHeapMax()) + .jvmNonHeapUsed(systemMetrics.getJvmNonHeapUsed()) + .jvmNonHeapMax(systemMetrics.getJvmNonHeapMax()) .memoryUsage(systemMetrics.getSystemMemoryUsedPercentage()) .diskUsage(systemMetrics.getDiskUsedPercentage()) .processId(processId) From 74ed24c327bbd8a47eb21843870bcf167cf15cc6 Mon Sep 17 00:00:00 2001 From: yud8 Date: Tue, 13 May 2025 11:05:53 +0800 Subject: [PATCH 02/13] [Fix-17173] [JVM Metrics] Fix JVM memory monitoring metrics,Add parentheses to make the operator precedence explicit --- .../meter/metrics/DefaultMetricsProvider.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java index 6e596ee6fdec..4552f3fb986a 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java @@ -72,7 +72,7 @@ public SystemMetrics getSystemMetrics() { double jvmMemoryMax = jvmHeapMax + jvmNonHeapMax; // Ensure jvmMemoryMax is not zero - double jvmMemoryUsedPercentage = jvmMemoryMax > 0 ? jvmMemoryUsed / jvmMemoryMax : 0.0; + double jvmMemoryUsedPercentage = (jvmMemoryMax > 0) ? (jvmMemoryUsed / jvmMemoryMax) : 0.0; long totalSystemMemory = OSUtils.getTotalSystemMemory(); long systemMemoryAvailable = OSUtils.getSystemAvailableMemoryUsed(); @@ -117,7 +117,8 @@ private double calculateTotalMemory(MeterRegistry meterRegistry, String area, St for (Meter meter : meters) { if (area.equals(meter.getId().getTag("area"))) { double value = meter.measure().iterator().next().getValue(); - if (value > 0) { // Ignore undefined maximum values (-1) + // Ignore undefined maximum values (-1) + if (value > 0) { memory += value; } } From 705e18408eb844be753de422305a49aa36c38c4f Mon Sep 17 00:00:00 2001 From: yud8 Date: Tue, 13 May 2025 13:59:04 +0800 Subject: [PATCH 03/13] [Fix-17173] [JVM Metrics] Fix JVM memory monitoring metrics, spotless Apply --- .../dolphinscheduler/meter/metrics/DefaultMetricsProvider.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java index 4552f3fb986a..c7465a889fac 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java @@ -20,8 +20,8 @@ import org.apache.dolphinscheduler.common.utils.OSUtils; import lombok.extern.slf4j.Slf4j; -import io.micrometer.core.instrument.MeterRegistry; import io.micrometer.core.instrument.Meter; +import io.micrometer.core.instrument.MeterRegistry; @Slf4j public class DefaultMetricsProvider implements MetricsProvider { From 4d044940902c2dd426a04a1d2d7ac9291bf5fa7b Mon Sep 17 00:00:00 2001 From: yud8 Date: Wed, 12 Nov 2025 17:09:07 +0800 Subject: [PATCH 04/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670) --- .../dolphinscheduler_env.sh | 1 + .../dolphinscheduler_env.sh | 1 + .../dolphinscheduler_env.sh | 1 + deploy/kubernetes/dolphinscheduler/README.md | 1 + .../kubernetes/dolphinscheduler/values.yaml | 2 + docs/docs/zh/architecture/configuration.md | 33 ++--- .../docker/basic/docker-compose.yaml | 1 + .../datasource-clickhouse/docker-compose.yaml | 1 + .../datasource-hive/docker-compose.yaml | 1 + .../datasource-mysql/docker-compose.yaml | 1 + .../datasource-postgresql/docker-compose.yaml | 1 + .../datasource-sqlserver/docker-compose.yaml | 1 + .../docker/file-manage/docker-compose.yaml | 1 + .../docker/ldap-login/docker-compose.yaml | 1 + .../common/model/WorkerHeartBeat.java | 1 + .../metrics/BaseServerLoadProtection.java | 8 ++ .../BaseServerLoadProtectionConfig.java | 2 + .../meter/metrics/DefaultMetricsProvider.java | 132 ++++++++++++++++++ .../meter/metrics/SystemMetrics.java | 5 + .../src/locales/en_US/monitor.ts | 1 + .../src/locales/zh_CN/monitor.ts | 1 + .../views/monitor/servers/worker/index.tsx | 15 +- .../worker/task/WorkerHeartBeatTask.java | 1 + .../src/main/resources/application.yaml | 2 + 24 files changed, 198 insertions(+), 17 deletions(-) diff --git a/.github/workflows/cluster-test/mysql_with_mysql_registry/dolphinscheduler_env.sh b/.github/workflows/cluster-test/mysql_with_mysql_registry/dolphinscheduler_env.sh index 0ec5cb8b67bf..7b80bb44cd3f 100755 --- a/.github/workflows/cluster-test/mysql_with_mysql_registry/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/mysql_with_mysql_registry/dolphinscheduler_env.sh @@ -54,6 +54,7 @@ export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS= export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9 +export WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9 # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log #export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* diff --git a/.github/workflows/cluster-test/mysql_with_zookeeper_registry/dolphinscheduler_env.sh b/.github/workflows/cluster-test/mysql_with_zookeeper_registry/dolphinscheduler_env.sh index d769a3a02dc9..e9513d1d6b77 100755 --- a/.github/workflows/cluster-test/mysql_with_zookeeper_registry/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/mysql_with_zookeeper_registry/dolphinscheduler_env.sh @@ -53,6 +53,7 @@ export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS= export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9 +export WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9 # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log #export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* diff --git a/.github/workflows/cluster-test/postgresql_with_zookeeper_registry/dolphinscheduler_env.sh b/.github/workflows/cluster-test/postgresql_with_zookeeper_registry/dolphinscheduler_env.sh index 05f3203b5051..75ffa11e5791 100644 --- a/.github/workflows/cluster-test/postgresql_with_zookeeper_registry/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/postgresql_with_zookeeper_registry/dolphinscheduler_env.sh @@ -53,6 +53,7 @@ export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS= export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9 +export WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9 # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log #export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* diff --git a/deploy/kubernetes/dolphinscheduler/README.md b/deploy/kubernetes/dolphinscheduler/README.md index a89dcbc20746..8cc7a3fe1fd6 100644 --- a/deploy/kubernetes/dolphinscheduler/README.md +++ b/deploy/kubernetes/dolphinscheduler/README.md @@ -320,6 +320,7 @@ Please refer to the [Quick Start in Kubernetes](../../../docs/docs/en/guide/inst | worker.env.WORKER_MAX_HEARTBEAT_INTERVAL | string | `"10s"` | Worker heartbeat interval | | worker.env.WORKER_SERVER_LOAD_PROTECTION_ENABLED | bool | `false` | If set true, will open worker overload protection | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max disk usage , when the worker's disk usage is smaller then this value, worker server can be dispatched tasks. | +| worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max data basedir disk usage, when the worker's data basedir disk usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max jvm cpu usage, when the worker's jvm cpu usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max system cpu usage, when the worker's system cpu usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max memory usage , when the worker's memory usage is smaller then this value, worker server can be dispatched tasks. | diff --git a/deploy/kubernetes/dolphinscheduler/values.yaml b/deploy/kubernetes/dolphinscheduler/values.yaml index 4a53dc723b78..1f2504b26bb0 100644 --- a/deploy/kubernetes/dolphinscheduler/values.yaml +++ b/deploy/kubernetes/dolphinscheduler/values.yaml @@ -692,6 +692,8 @@ worker: WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.7 # -- Worker max disk usage , when the worker's disk usage is smaller then this value, worker server can be dispatched tasks. WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.7 + # -- Worker max data basedir disk usage, when the worker's data basedir disk usage is smaller then this value, worker server can be dispatched tasks. + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.7 # -- Worker execute thread number to limit task instances WORKER_EXEC_THREADS: "100" # -- Worker heartbeat interval diff --git a/docs/docs/zh/architecture/configuration.md b/docs/docs/zh/architecture/configuration.md index 6d9f1061f9f1..0a94d6a9ae5e 100644 --- a/docs/docs/zh/architecture/configuration.md +++ b/docs/docs/zh/architecture/configuration.md @@ -303,22 +303,23 @@ common.properties配置文件目前主要是配置hadoop/s3/yarn/applicationId 位置:`worker-server/conf/application.yaml` -| 参数 | 默认值 | 描述 | -|-----------------------------------------------------------------------------|-----------|-----------------------------------------------------------------------------------------| -| worker.listen-port | 1234 | worker监听端口 | -| worker.max-heartbeat-interval | 10s | worker最大心跳间隔 | -| worker.host-weight | 100 | 派发任务时,worker主机的权重 | -| worker.tenant-auto-create | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | -| worker.server-load-protection.enabled | true | 是否开启系统保护策略 | -| worker.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.8 | worker最大系统cpu使用值,只有当前系统cpu使用值低于最大系统cpu使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的操作系统CPU | -| worker.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.8 | worker最大JVM cpu使用值,只有当前JVM cpu使用值低于最大JVM cpu使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的JVM CPU | -| worker.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.8 | worker最大系统 内存使用值,只有当前系统内存使用值低于最大系统内存使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的操作系统内存 | -| worker.server-load-protection.max-disk-usage-percentage-thresholds | 0.8 | worker最大系统磁盘使用值,只有当前系统磁盘使用值低于最大系统磁盘使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的操作系统磁盘空间 | -| worker.alert-listen-host | localhost | alert监听host | -| worker.alert-listen-port | 50052 | alert监听端口 | -| worker.physical-task-config.task-executor-thread-size | 100 | Worker中任务最大并发度 | -| worker.tenant-config.auto-create-tenant-enabled | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | -| worker.tenant-config.default-tenant-enabled | false | 如果设置为true, 将会使用worker服务启动用户作为 `default` 租户。 | +| 参数 | 默认值 | 描述 | +|---------------------------------------------------------------------------------|-----------|---------------------------------------------------------------------------------------------------------------------------| +| worker.listen-port | 1234 | worker监听端口 | +| worker.max-heartbeat-interval | 10s | worker最大心跳间隔 | +| worker.host-weight | 100 | 派发任务时,worker主机的权重 | +| worker.tenant-auto-create | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | +| worker.server-load-protection.enabled | true | 是否开启系统保护策略 | +| worker.server-load-protection.max-system-cpu-usage-percentage-thresholds | 0.8 | worker最大系统cpu使用值,只有当前系统cpu使用值低于最大系统cpu使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的操作系统CPU | +| worker.server-load-protection.max-jvm-cpu-usage-percentage-thresholds | 0.8 | worker最大JVM cpu使用值,只有当前JVM cpu使用值低于最大JVM cpu使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的JVM CPU | +| worker.server-load-protection.max-system-memory-usage-percentage-thresholds | 0.8 | worker最大系统 内存使用值,只有当前系统内存使用值低于最大系统内存使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的操作系统内存 | +| worker.server-load-protection.max-disk-usage-percentage-thresholds | 0.8 | worker最大系统磁盘使用值,只有当前系统磁盘使用值低于最大系统磁盘使用值,worker服务才能接收任务. 默认值为0.8: 会使用80%的操作系统磁盘空间 | +| worker.server-load-protection.max-data-basedir-disk-usage-percentage-thresholds | 0.8 | worker最大data.basedir.path目录磁盘使用值,只有当前data.basedir.path目录磁盘使用值低于该值,worker服务才能接收任务. 默认值为0.8: 会使用80%的data.basedir.path目录磁盘空间 | +| worker.alert-listen-host | localhost | alert监听host | +| worker.alert-listen-port | 50052 | alert监听端口 | +| worker.physical-task-config.task-executor-thread-size | 100 | Worker中任务最大并发度 | +| worker.tenant-config.auto-create-tenant-enabled | true | 租户对应于系统的用户,由worker提交作业.如果系统没有该用户,则在参数worker.tenant.auto.create为true后自动创建。 | +| worker.tenant-config.default-tenant-enabled | false | 如果设置为true, 将会使用worker服务启动用户作为 `default` 租户。 | ## Alert Server相关配置 diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/basic/docker-compose.yaml b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/basic/docker-compose.yaml index 5438b59e8a5b..1bf166ac08ce 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/basic/docker-compose.yaml +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/basic/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-clickhouse/docker-compose.yaml b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-clickhouse/docker-compose.yaml index f3047c718ff7..b385d9d85d1d 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-clickhouse/docker-compose.yaml +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-clickhouse/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-hive/docker-compose.yaml b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-hive/docker-compose.yaml index 1cbd859d29ce..66000c5150f4 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-hive/docker-compose.yaml +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-hive/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-mysql/docker-compose.yaml b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-mysql/docker-compose.yaml index 5e8bb90f2818..89a40b609ecb 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-mysql/docker-compose.yaml +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-mysql/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-postgresql/docker-compose.yaml b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-postgresql/docker-compose.yaml index c2bcec2e61fb..2ae3ae8f37f1 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-postgresql/docker-compose.yaml +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-postgresql/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-sqlserver/docker-compose.yaml b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-sqlserver/docker-compose.yaml index eb9b8e5f6b76..553f98f80fbc 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-sqlserver/docker-compose.yaml +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/datasource-sqlserver/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/docker-compose.yaml b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/docker-compose.yaml index df7257a6b260..66411d6326b0 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/docker-compose.yaml +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/file-manage/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/ldap-login/docker-compose.yaml b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/ldap-login/docker-compose.yaml index 4b76e114e7fb..0d506f422029 100644 --- a/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/ldap-login/docker-compose.yaml +++ b/dolphinscheduler-api-test/dolphinscheduler-api-test-case/src/test/resources/docker/ldap-login/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java index 6ccf01e4abfc..2d73afa4e7ad 100644 --- a/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java +++ b/dolphinscheduler-common/src/main/java/org/apache/dolphinscheduler/common/model/WorkerHeartBeat.java @@ -31,4 +31,5 @@ public class WorkerHeartBeat extends BaseHeartBeat implements HeartBeat { private int workerHostWeight; // worker host weight private double threadPoolUsage; // worker waiting task count private String workerGroup; + private double dataBasedirPathDiskUsagePercentage; // data basedir path disk usage percentage } diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java index 7d5ae76a4060..6ec526cee138 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java @@ -64,6 +64,14 @@ public boolean isOverload(SystemMetrics systemMetrics) { baseServerLoadProtectionConfig.getMaxSystemMemoryUsagePercentageThresholds()); return true; } + if (systemMetrics.getDataBasedirPathUsedPercentage() > baseServerLoadProtectionConfig + .getMaxDataBasedirDiskUsagePercentageThresholds()) { + log.info( + "OverLoad: the DataBasedirPathDiskUsagePercentage: {} is over then the maxDataBasedirDiskUsagePercentageThresholds {}", + systemMetrics.getDataBasedirPathUsedPercentage(), + baseServerLoadProtectionConfig.getMaxDataBasedirDiskUsagePercentageThresholds()); + return true; + } return false; } } diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtectionConfig.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtectionConfig.java index f10b63cd18c9..0324b423d767 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtectionConfig.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtectionConfig.java @@ -32,4 +32,6 @@ public abstract class BaseServerLoadProtectionConfig { protected double maxDiskUsagePercentageThresholds = 0.7; + protected double maxDataBasedirDiskUsagePercentageThresholds = 0.7; + } diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java index c7465a889fac..b045aa4e53f3 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/DefaultMetricsProvider.java @@ -17,9 +17,18 @@ package org.apache.dolphinscheduler.meter.metrics; +import org.apache.dolphinscheduler.common.constants.Constants; import org.apache.dolphinscheduler.common.utils.OSUtils; +import org.apache.dolphinscheduler.common.utils.PropertyUtils; + +import java.nio.file.FileStore; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.function.Supplier; import lombok.extern.slf4j.Slf4j; +import io.micrometer.core.instrument.Gauge; import io.micrometer.core.instrument.Meter; import io.micrometer.core.instrument.MeterRegistry; @@ -28,8 +37,12 @@ public class DefaultMetricsProvider implements MetricsProvider { private final MeterRegistry meterRegistry; + // Data basedir path constant + private static final String DEFAULT_DATA_BASEDIR_PATH = "/tmp/dolphinscheduler"; + public DefaultMetricsProvider(MeterRegistry meterRegistry) { this.meterRegistry = meterRegistry; + registerDataBasedirPathMetrics(); } private SystemMetrics systemMetrics; @@ -41,12 +54,128 @@ public DefaultMetricsProvider(MeterRegistry meterRegistry) { private static final long SYSTEM_METRICS_REFRESH_INTERVAL = 1_000L; + // Data basedir path metrics + private double dataBasedirPathTotalBytes = 0.0; + private double dataBasedirPathFreeBytes = 0.0; + private volatile boolean dataBasedirPathMetricsRegistered = false; + private String registeredDataBasedirPath = ""; + + /** + * Register data basedir path metrics to micrometer + */ + private void registerDataBasedirPathMetrics() { + try { + String dataBasedirPath = getDataBasedirPath(); + Path path = Paths.get(dataBasedirPath); + + // Check if path exists, if not, try to create it + if (!Files.exists(path)) { + log.info("Data basedir path {} does not exist, trying to create it", dataBasedirPath); + Files.createDirectories(path); + } + + // Register gauges for data basedir path disk usage + Gauge.builder("data.basedir.path.total", (Supplier) this::getDataBasedirPathTotalBytes) + .description("Total space of data basedir path") + .tag("path", dataBasedirPath) + .register(meterRegistry); + + Gauge.builder("data.basedir.path.free", (Supplier) this::getDataBasedirPathFreeBytes) + .description("Free space of data basedir path") + .tag("path", dataBasedirPath) + .register(meterRegistry); + + Gauge.builder("data.basedir.path.used", (Supplier) this::getDataBasedirPathUsedBytes) + .description("Used space of data basedir path") + .tag("path", dataBasedirPath) + .register(meterRegistry); + + Gauge.builder("data.basedir.path.used.percentage", + (Supplier) this::getDataBasedirPathUsedPercentage) + .description("Used space percentage of data basedir path") + .tag("path", dataBasedirPath) + .register(meterRegistry); + + registeredDataBasedirPath = dataBasedirPath; + dataBasedirPathMetricsRegistered = true; + log.info("Successfully registered data basedir path metrics for path: {}", dataBasedirPath); + } catch (Exception e) { + log.warn("Failed to register data basedir path metrics", e); + } + } + + /** + * Get data basedir path from configuration + * @return data basedir path + */ + private String getDataBasedirPath() { + try { + return PropertyUtils.getString(Constants.DATA_BASEDIR_PATH, DEFAULT_DATA_BASEDIR_PATH); + } catch (Exception e) { + log.warn("Failed to get data.basedir.path from configuration, using default: {}", DEFAULT_DATA_BASEDIR_PATH, + e); + return DEFAULT_DATA_BASEDIR_PATH; + } + } + + /** + * Refresh data basedir path disk usage metrics + */ + private void refreshDataBasedirPathMetrics() { + try { + if (!dataBasedirPathMetricsRegistered) { + return; + } + + String dataBasedirPath = getDataBasedirPath(); + // If the path has changed, we should re-register the metrics + if (!registeredDataBasedirPath.equals(dataBasedirPath)) { + log.info("Data basedir path changed from {} to {}, re-registering metrics", registeredDataBasedirPath, + dataBasedirPath); + registerDataBasedirPathMetrics(); + return; + } + + Path path = Paths.get(dataBasedirPath); + FileStore fileStore = Files.getFileStore(path); + + dataBasedirPathTotalBytes = fileStore.getTotalSpace(); + dataBasedirPathFreeBytes = fileStore.getUsableSpace(); + } catch (Exception e) { + log.warn("Failed to refresh data basedir path metrics", e); + } + } + + // Getters for data basedir path metrics + public double getDataBasedirPathTotalBytes() { + return dataBasedirPathTotalBytes; + } + + public double getDataBasedirPathFreeBytes() { + return dataBasedirPathFreeBytes; + } + + public double getDataBasedirPathUsedBytes() { + return getDataBasedirPathTotalBytes() - getDataBasedirPathFreeBytes(); + } + + public double getDataBasedirPathUsedPercentage() { + double total = getDataBasedirPathTotalBytes(); + if (total <= 0) { + return 0.0; + } + return getDataBasedirPathUsedBytes() / total; + } + @Override public SystemMetrics getSystemMetrics() { if (System.currentTimeMillis() - lastRefreshTime < SYSTEM_METRICS_REFRESH_INTERVAL) { return systemMetrics; } + // Refresh data basedir path metrics + refreshDataBasedirPathMetrics(); + double systemCpuUsage = meterRegistry.get("system.cpu.usage").gauge().value(); if (Double.compare(systemCpuUsage, Double.NaN) == 0) { systemCpuUsage = lastSystemCpuUsage; @@ -96,6 +225,9 @@ public SystemMetrics getSystemMetrics() { .diskUsed(diskToTalBytes - diskFreeBytes) .diskTotal(diskToTalBytes) .diskUsedPercentage((diskToTalBytes - diskFreeBytes) / diskToTalBytes) + .dataBasedirPathUsed(getDataBasedirPathUsedBytes()) + .dataBasedirPathTotal(getDataBasedirPathTotalBytes()) + .dataBasedirPathUsedPercentage(getDataBasedirPathUsedPercentage()) .build(); lastRefreshTime = System.currentTimeMillis(); return systemMetrics; diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java index ca2f152eb8f3..588189236fef 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/SystemMetrics.java @@ -54,4 +54,9 @@ public class SystemMetrics { private double diskTotal; private double diskUsedPercentage; + // Data basedir path disk usage + private double dataBasedirPathUsed; + private double dataBasedirPathTotal; + private double dataBasedirPathUsedPercentage; + } diff --git a/dolphinscheduler-ui/src/locales/en_US/monitor.ts b/dolphinscheduler-ui/src/locales/en_US/monitor.ts index 54431534322f..01945456fca8 100644 --- a/dolphinscheduler-ui/src/locales/en_US/monitor.ts +++ b/dolphinscheduler-ui/src/locales/en_US/monitor.ts @@ -33,6 +33,7 @@ export default { cpu_usage: 'CPU Usage', memory_usage: 'Memory Usage', disk_usage: 'Disk Usage', + data_basedir_disk_usage: 'Data Directory Disk Usage', thread_pool_usage: 'Thread Pool Usage', create_time: 'Create Time', last_heartbeat_time: 'Last Heartbeat Time', diff --git a/dolphinscheduler-ui/src/locales/zh_CN/monitor.ts b/dolphinscheduler-ui/src/locales/zh_CN/monitor.ts index ef78068a61d9..23aab9fbda88 100644 --- a/dolphinscheduler-ui/src/locales/zh_CN/monitor.ts +++ b/dolphinscheduler-ui/src/locales/zh_CN/monitor.ts @@ -33,6 +33,7 @@ export default { cpu_usage: '处理器使用量', memory_usage: '内存使用量', disk_usage: '磁盘使用量', + data_basedir_disk_usage: '数据目录磁盘使用量', thread_pool_usage: '线程池使用量', create_time: '创建时间', last_heartbeat_time: '最后心跳时间', diff --git a/dolphinscheduler-ui/src/views/monitor/servers/worker/index.tsx b/dolphinscheduler-ui/src/views/monitor/servers/worker/index.tsx index 34ff4786acb5..a462548476e2 100644 --- a/dolphinscheduler-ui/src/views/monitor/servers/worker/index.tsx +++ b/dolphinscheduler-ui/src/views/monitor/servers/worker/index.tsx @@ -118,7 +118,7 @@ const worker = defineComponent({ - +
@@ -158,6 +158,19 @@ const worker = defineComponent({
+ + +
+ {item && ( + + )} +
+
+
diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java index 47d0f9c7dd45..43388c53e494 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/task/WorkerHeartBeatTask.java @@ -79,6 +79,7 @@ public WorkerHeartBeat getHeartBeat() { .jvmNonHeapMax(systemMetrics.getJvmNonHeapMax()) .memoryUsage(systemMetrics.getSystemMemoryUsedPercentage()) .diskUsage(systemMetrics.getDiskUsedPercentage()) + .dataBasedirPathDiskUsagePercentage(systemMetrics.getDataBasedirPathUsedPercentage()) .processId(processId) .workerHostWeight(workerConfig.getHostWeight()) .threadPoolUsage(taskExecutorContainer.slotUsage()) diff --git a/dolphinscheduler-worker/src/main/resources/application.yaml b/dolphinscheduler-worker/src/main/resources/application.yaml index 8c85f1f5eb5c..8756a2e78f36 100644 --- a/dolphinscheduler-worker/src/main/resources/application.yaml +++ b/dolphinscheduler-worker/src/main/resources/application.yaml @@ -59,6 +59,8 @@ worker: max-system-memory-usage-percentage-thresholds: 0.8 # Worker max disk usage , when the worker's disk usage is smaller then this value, worker server can be dispatched tasks. max-disk-usage-percentage-thresholds: 0.8 + # Worker max data basedir disk usage, when the worker's data basedir disk usage is smaller then this value, worker server can be dispatched tasks. + max-data-basedir-disk-usage-percentage-thresholds: 0.8 task-execute-threads-full-policy: REJECT physical-task-config: # The number of threads in the Physical task engine that used to execute tasks From 2f1152456c83a07b116d2296e03aa1639f505de3 Mon Sep 17 00:00:00 2001 From: yud8 Date: Wed, 12 Nov 2025 18:41:11 +0800 Subject: [PATCH 05/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670)Test --- .../server/worker/config/WorkerServerLoadProtectionTest.java | 1 + 1 file changed, 1 insertion(+) diff --git a/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionTest.java b/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionTest.java index e393cfa9c387..52892c78dbd7 100644 --- a/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionTest.java +++ b/dolphinscheduler-worker/src/test/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionTest.java @@ -34,6 +34,7 @@ void isOverload() { .systemCpuUsagePercentage(0.71) .jvmCpuUsagePercentage(0.71) .diskUsedPercentage(0.71) + .dataBasedirPathUsedPercentage(0.71) .build(); workerConfig.getServerLoadProtection().setEnabled(false); From 9cc1ae854a63f69d5fe806b5f8784c7989d1b353 Mon Sep 17 00:00:00 2001 From: yud8 Date: Thu, 13 Nov 2025 11:26:21 +0800 Subject: [PATCH 06/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670)format --- dolphinscheduler-ui/src/views/monitor/servers/worker/index.tsx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/dolphinscheduler-ui/src/views/monitor/servers/worker/index.tsx b/dolphinscheduler-ui/src/views/monitor/servers/worker/index.tsx index a462548476e2..cd77bb1e126f 100644 --- a/dolphinscheduler-ui/src/views/monitor/servers/worker/index.tsx +++ b/dolphinscheduler-ui/src/views/monitor/servers/worker/index.tsx @@ -164,7 +164,8 @@ const worker = defineComponent({ {item && ( )} From c743c798350e665856ca82ad102517d0fe81b794 Mon Sep 17 00:00:00 2001 From: yud8 Date: Thu, 13 Nov 2025 11:26:41 +0800 Subject: [PATCH 07/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670)format --- deploy/kubernetes/dolphinscheduler/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deploy/kubernetes/dolphinscheduler/README.md b/deploy/kubernetes/dolphinscheduler/README.md index 4c2ab7fc406c..1e15ed9ef876 100644 --- a/deploy/kubernetes/dolphinscheduler/README.md +++ b/deploy/kubernetes/dolphinscheduler/README.md @@ -323,8 +323,8 @@ Please refer to the [Quick Start in Kubernetes](../../../docs/docs/en/guide/inst | worker.env.WORKER_HOST_WEIGHT | string | `"100"` | Worker host weight to dispatch tasks | | worker.env.WORKER_MAX_HEARTBEAT_INTERVAL | string | `"10s"` | Worker heartbeat interval | | worker.env.WORKER_SERVER_LOAD_PROTECTION_ENABLED | bool | `false` | If set true, will open worker overload protection | -| worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max disk usage , when the worker's disk usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max data basedir disk usage, when the worker's data basedir disk usage is smaller then this value, worker server can be dispatched tasks. | +| worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max disk usage , when the worker's disk usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max jvm cpu usage, when the worker's jvm cpu usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max system cpu usage, when the worker's system cpu usage is smaller then this value, worker server can be dispatched tasks. | | worker.env.WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS | float | `0.7` | Worker max memory usage , when the worker's memory usage is smaller then this value, worker server can be dispatched tasks. | From f8fc71336c460cc4890eb2b35757948180bec618 Mon Sep 17 00:00:00 2001 From: yud8 Date: Mon, 17 Nov 2025 18:07:32 +0800 Subject: [PATCH 08/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670)Add data basedir disk usage threshold but set to 1.0 (100%) to effectively disable the check --- .../server/master/config/MasterServerLoadProtectionConfig.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java index c1cda12ce072..3ab05fceb11e 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java @@ -28,4 +28,6 @@ public class MasterServerLoadProtectionConfig extends BaseServerLoadProtectionCo private int maxConcurrentWorkflowInstances = Integer.MAX_VALUE; + // Add data basedir disk usage threshold but set to 1.0 (100%) to effectively disable the check + protected double maxDataBasedirDiskUsagePercentageThresholds = 1.0; } From 5efd83a75a59e913cd882371d00869af74267260 Mon Sep 17 00:00:00 2001 From: yud8 Date: Tue, 18 Nov 2025 14:12:19 +0800 Subject: [PATCH 09/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670) test docker-compose.yaml --- .../postgresql_with_postgresql_registry/dolphinscheduler_env.sh | 1 + .../src/test/resources/docker/basic/docker-compose.yaml | 1 + .../resources/docker/datasource-clickhouse/docker-compose.yaml | 1 + .../resources/docker/datasource-dolphindb/docker-compose.yaml | 1 + .../test/resources/docker/datasource-hive/docker-compose.yaml | 1 + .../test/resources/docker/datasource-mysql/docker-compose.yaml | 1 + .../resources/docker/datasource-postgresql/docker-compose.yaml | 1 + .../resources/docker/datasource-sqlserver/docker-compose.yaml | 1 + .../src/test/resources/docker/file-manage/docker-compose.yaml | 1 + .../src/test/resources/docker/python-task/docker-compose.yaml | 1 + .../src/test/resources/docker/workflow-http/docker-compose.yaml | 1 + 11 files changed, 11 insertions(+) diff --git a/.github/workflows/cluster-test/postgresql_with_postgresql_registry/dolphinscheduler_env.sh b/.github/workflows/cluster-test/postgresql_with_postgresql_registry/dolphinscheduler_env.sh index 79a13d1a6a6f..f28b78f70924 100644 --- a/.github/workflows/cluster-test/postgresql_with_postgresql_registry/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/postgresql_with_postgresql_registry/dolphinscheduler_env.sh @@ -54,6 +54,7 @@ export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS= export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9 +export WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.9 # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log #export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/basic/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/basic/docker-compose.yaml index d124cd90e2a0..4344b15ee865 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/basic/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/basic/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-clickhouse/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-clickhouse/docker-compose.yaml index 2cf3743c5e74..cc130c364b90 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-clickhouse/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-clickhouse/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-dolphindb/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-dolphindb/docker-compose.yaml index 413c67e3dbfc..a886f149af30 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-dolphindb/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-dolphindb/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-hive/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-hive/docker-compose.yaml index 9c36411a118b..b0fc0fb9ac1b 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-hive/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-hive/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-mysql/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-mysql/docker-compose.yaml index a1d2b1c72ff6..e0d945f2ce02 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-mysql/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-mysql/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-postgresql/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-postgresql/docker-compose.yaml index 6d851e195ede..23d8a94f4cd5 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-postgresql/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-postgresql/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-sqlserver/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-sqlserver/docker-compose.yaml index b91073845d27..4ec432c962e0 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-sqlserver/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/datasource-sqlserver/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml index 81f9c98559fe..d098706dc042 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/file-manage/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/python-task/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/python-task/docker-compose.yaml index d66469dedfb3..a1478c1bfaf6 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/python-task/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/python-task/docker-compose.yaml @@ -32,6 +32,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" diff --git a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/workflow-http/docker-compose.yaml b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/workflow-http/docker-compose.yaml index 3d766e4223f4..9d05361a176a 100644 --- a/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/workflow-http/docker-compose.yaml +++ b/dolphinscheduler-e2e/dolphinscheduler-e2e-case/src/test/resources/docker/workflow-http/docker-compose.yaml @@ -29,6 +29,7 @@ services: WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_MEMORY_USAGE_PERCENTAGE_THRESHOLDS: 0.95 + WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.95 WORKER_TENANT_CONFIG_AUTO_CREATE_TENANT_ENABLED: 'true' ports: - "12345:12345" From 4c63a83300309dd7604c41bccd0e8347e08011c7 Mon Sep 17 00:00:00 2001 From: yud8 Date: Tue, 18 Nov 2025 14:23:02 +0800 Subject: [PATCH 10/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670) Fix field shadowing and improve code clarity --- .../master/config/MasterServerLoadProtectionConfig.java | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java index 3ab05fceb11e..2341649beda8 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java @@ -30,4 +30,9 @@ public class MasterServerLoadProtectionConfig extends BaseServerLoadProtectionCo // Add data basedir disk usage threshold but set to 1.0 (100%) to effectively disable the check protected double maxDataBasedirDiskUsagePercentageThresholds = 1.0; + + @Override + public double getMaxDataBasedirDiskUsagePercentageThresholds() { + return maxDataBasedirDiskUsagePercentageThresholds; + } } From e4a8be041e0d2634474ad61e83c30b640021eda7 Mon Sep 17 00:00:00 2001 From: yud8 Date: Tue, 18 Nov 2025 16:20:48 +0800 Subject: [PATCH 11/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670) fix : --- .../postgresql_with_postgresql_registry/dolphinscheduler_env.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cluster-test/postgresql_with_postgresql_registry/dolphinscheduler_env.sh b/.github/workflows/cluster-test/postgresql_with_postgresql_registry/dolphinscheduler_env.sh index f28b78f70924..a5cb9c8b467f 100644 --- a/.github/workflows/cluster-test/postgresql_with_postgresql_registry/dolphinscheduler_env.sh +++ b/.github/workflows/cluster-test/postgresql_with_postgresql_registry/dolphinscheduler_env.sh @@ -54,7 +54,7 @@ export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_CPU_USAGE_PERCENTAGE_THRESHOLDS= export WORKER_SERVER_LOAD_PROTECTION_MAX_JVM_CPU_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_SYSTEM_MEMORY_USAGE_PERCENTAGE_THRESHOLDS=0.9 export WORKER_SERVER_LOAD_PROTECTION_MAX_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9 -export WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS: 0.9 +export WORKER_SERVER_LOAD_PROTECTION_MAX_DATA_BASEDIR_DISK_USAGE_PERCENTAGE_THRESHOLDS=0.9 # applicationId auto collection related configuration, the following configurations are unnecessary if setting appId.collect=log #export HADOOP_CLASSPATH=`hadoop classpath`:${DOLPHINSCHEDULER_HOME}/tools/libs/* From af5f1aa9728d7300301349202124024013bfa72b Mon Sep 17 00:00:00 2001 From: yud8 Date: Tue, 18 Nov 2025 17:33:31 +0800 Subject: [PATCH 12/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670) fix :MasterServerLoadProtectionConfig --- .../config/MasterServerLoadProtectionConfig.java | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java index 2341649beda8..d6fb4bf599d5 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java @@ -28,11 +28,9 @@ public class MasterServerLoadProtectionConfig extends BaseServerLoadProtectionCo private int maxConcurrentWorkflowInstances = Integer.MAX_VALUE; - // Add data basedir disk usage threshold but set to 1.0 (100%) to effectively disable the check - protected double maxDataBasedirDiskUsagePercentageThresholds = 1.0; - - @Override - public double getMaxDataBasedirDiskUsagePercentageThresholds() { - return maxDataBasedirDiskUsagePercentageThresholds; + // Master node does not monitor disk usage of data.basedir.path directory, + // set threshold to 1.0 (100%) to disable the check + public MasterServerLoadProtectionConfig() { + this.maxDataBasedirDiskUsagePercentageThresholds = 1.0; } } From 69089d471f4ec962b11820d72fe970a34573ae4f Mon Sep 17 00:00:00 2001 From: yud8 Date: Mon, 24 Nov 2025 11:28:43 +0800 Subject: [PATCH 13/13] [Improvement][Worker-monitoring] Add disk usage monitoring for data.basedir.path directory (#17670) move disk monitoring to workers only - Remove master disk checks, keep only for workers - Clean up related configs and constructors - Fix config reference in WorkerServerLoadProtection --- .../config/MasterServerLoadProtectionConfig.java | 5 ----- .../meter/metrics/BaseServerLoadProtection.java | 9 +-------- .../metrics/BaseServerLoadProtectionConfig.java | 2 -- .../worker/config/WorkerServerLoadProtection.java | 14 +++++++++++++- .../config/WorkerServerLoadProtectionConfig.java | 3 +++ 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java index d6fb4bf599d5..c1cda12ce072 100644 --- a/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java +++ b/dolphinscheduler-master/src/main/java/org/apache/dolphinscheduler/server/master/config/MasterServerLoadProtectionConfig.java @@ -28,9 +28,4 @@ public class MasterServerLoadProtectionConfig extends BaseServerLoadProtectionCo private int maxConcurrentWorkflowInstances = Integer.MAX_VALUE; - // Master node does not monitor disk usage of data.basedir.path directory, - // set threshold to 1.0 (100%) to disable the check - public MasterServerLoadProtectionConfig() { - this.maxDataBasedirDiskUsagePercentageThresholds = 1.0; - } } diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java index 6ec526cee138..9dcda4d9808d 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtection.java @@ -64,14 +64,7 @@ public boolean isOverload(SystemMetrics systemMetrics) { baseServerLoadProtectionConfig.getMaxSystemMemoryUsagePercentageThresholds()); return true; } - if (systemMetrics.getDataBasedirPathUsedPercentage() > baseServerLoadProtectionConfig - .getMaxDataBasedirDiskUsagePercentageThresholds()) { - log.info( - "OverLoad: the DataBasedirPathDiskUsagePercentage: {} is over then the maxDataBasedirDiskUsagePercentageThresholds {}", - systemMetrics.getDataBasedirPathUsedPercentage(), - baseServerLoadProtectionConfig.getMaxDataBasedirDiskUsagePercentageThresholds()); - return true; - } + return false; } } diff --git a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtectionConfig.java b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtectionConfig.java index 0324b423d767..f10b63cd18c9 100644 --- a/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtectionConfig.java +++ b/dolphinscheduler-meter/src/main/java/org/apache/dolphinscheduler/meter/metrics/BaseServerLoadProtectionConfig.java @@ -32,6 +32,4 @@ public abstract class BaseServerLoadProtectionConfig { protected double maxDiskUsagePercentageThresholds = 0.7; - protected double maxDataBasedirDiskUsagePercentageThresholds = 0.7; - } diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtection.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtection.java index e21e18a5afd7..64c7f3668cfc 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtection.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtection.java @@ -33,13 +33,16 @@ public class WorkerServerLoadProtection extends BaseServerLoadProtection { @Autowired private PhysicalTaskExecutorContainerProvider physicalTaskExecutorContainerDelegator; + private final WorkerServerLoadProtectionConfig workerServerLoadProtectionConfig; + public WorkerServerLoadProtection(WorkerConfig workerConfig) { super(workerConfig.getServerLoadProtection()); + this.workerServerLoadProtectionConfig = workerConfig.getServerLoadProtection(); } @Override public boolean isOverload(SystemMetrics systemMetrics) { - if (!baseServerLoadProtectionConfig.isEnabled()) { + if (!workerServerLoadProtectionConfig.isEnabled()) { return false; } @@ -47,6 +50,15 @@ public boolean isOverload(SystemMetrics systemMetrics) { return true; } + if (systemMetrics.getDataBasedirPathUsedPercentage() > workerServerLoadProtectionConfig + .getMaxDataBasedirDiskUsagePercentageThresholds()) { + log.info( + "OverLoad: the DataBasedirPathDiskUsagePercentage: {} is over then the maxDataBasedirDiskUsagePercentageThresholds {}", + systemMetrics.getDataBasedirPathUsedPercentage(), + workerServerLoadProtectionConfig.getMaxDataBasedirDiskUsagePercentageThresholds()); + return true; + } + if (physicalTaskExecutorContainerDelegator.getExecutorContainer().slotUsage() == 1) { log.info("OverLoad: the TaskExecutorContainer slot usage is 1"); return true; diff --git a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionConfig.java b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionConfig.java index b024ac15cf79..f4e3ec7b738b 100644 --- a/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionConfig.java +++ b/dolphinscheduler-worker/src/main/java/org/apache/dolphinscheduler/server/worker/config/WorkerServerLoadProtectionConfig.java @@ -25,4 +25,7 @@ @Data @EqualsAndHashCode(callSuper = true) public class WorkerServerLoadProtectionConfig extends BaseServerLoadProtectionConfig { + + private double maxDataBasedirDiskUsagePercentageThresholds = 0.7; + }