From dad96db674faf1721652f49ba4e9658983baa78c Mon Sep 17 00:00:00 2001 From: Sebastian Bernauer Date: Thu, 2 Nov 2023 14:09:52 +0100 Subject: [PATCH] feat: Support graceful shutdown (#407) * feat: Support graceful shutdown * update docs * docs * changelog * link code in docs * increase default of datanodes to 30 min * move into constants * use new operator-rs * docs: Format 15 minutes * Use new operator-rs * improve docs * fix link * use operator-rs 0.55.0 * fixup * improve docs * set error context * Added a high level description of graceful shutdown * Revert "Added a high level description of graceful shutdown" This reverts commit 7733ec13fb98d4a7a96946badf88fbc5ce2f9c6c. Moved to https://github.com/stackabletech/documentation/pull/473 * Move rustdoc above field attributes * Avoid snafu context(false) * docs wording * newline * fix: Vector graceful shutdown * downgrade ring again * fix links * use new operator-rs * chore: Bump operator-rs to 0.56.0 * Revert "chore: Bump operator-rs to 0.56.0" This reverts commit 4e14d5748e84dd7cf6bd2f03eca26edcc591fcff. * Update docs/modules/hdfs/pages/usage-guide/operations/graceful-shutdown.adoc Co-authored-by: Siegfried Weber --------- Co-authored-by: Jim Halfpenny Co-authored-by: Siegfried Weber --- CHANGELOG.md | 2 + deploy/helm/hdfs-operator/crds/crds.yaml | 24 ++++++++++++ .../operations/graceful-shutdown.adoc | 37 +++++++++++++++++-- rust/crd/src/constants.rs | 9 +++++ rust/crd/src/lib.rs | 26 +++++++++++++ rust/operator/src/container.rs | 31 +++++++++++++--- rust/operator/src/hdfs_controller.rs | 10 ++++- .../src/operations/graceful_shutdown.rs | 26 +++++++++++++ rust/operator/src/operations/mod.rs | 1 + tests/templates/kuttl/smoke/30-assert.yaml.j2 | 3 ++ 10 files changed, 160 insertions(+), 9 deletions(-) create mode 100644 rust/operator/src/operations/graceful_shutdown.rs diff --git a/CHANGELOG.md b/CHANGELOG.md index 0aa1b08d..ab22f63d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ All notable changes to this project will be documented in this file. - Default stackableVersion to operator version ([#381]). - Configuration overrides for the JVM security properties, such as DNS caching ([#384]). - Support PodDisruptionBudgets ([#394]). +- Support graceful shutdown ([#407]). - Added support for 3.2.4, 3.3.6 ([#409]). ### Changed @@ -33,6 +34,7 @@ All notable changes to this project will be documented in this file. [#402]: https://github.com/stackabletech/hdfs-operator/pull/402 [#404]: https://github.com/stackabletech/hdfs-operator/pull/404 [#405]: https://github.com/stackabletech/hdfs-operator/pull/405 +[#407]: https://github.com/stackabletech/hdfs-operator/pull/407 [#409]: https://github.com/stackabletech/hdfs-operator/pull/409 ## [23.7.0] - 2023-07-14 diff --git a/deploy/helm/hdfs-operator/crds/crds.yaml b/deploy/helm/hdfs-operator/crds/crds.yaml index 786ecd00..0c8745de 100644 --- a/deploy/helm/hdfs-operator/crds/crds.yaml +++ b/deploy/helm/hdfs-operator/crds/crds.yaml @@ -576,6 +576,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string logging: default: enableVectorAgent: null @@ -4069,6 +4073,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string logging: default: enableVectorAgent: null @@ -7621,6 +7629,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string logging: default: enableVectorAgent: null @@ -11105,6 +11117,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string logging: default: enableVectorAgent: null @@ -14606,6 +14622,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string logging: default: enableVectorAgent: null @@ -18090,6 +18110,10 @@ spec: type: array type: object type: object + gracefulShutdownTimeout: + description: Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + nullable: true + type: string logging: default: enableVectorAgent: null diff --git a/docs/modules/hdfs/pages/usage-guide/operations/graceful-shutdown.adoc b/docs/modules/hdfs/pages/usage-guide/operations/graceful-shutdown.adoc index 9b00f4c8..c5ba0d1c 100644 --- a/docs/modules/hdfs/pages/usage-guide/operations/graceful-shutdown.adoc +++ b/docs/modules/hdfs/pages/usage-guide/operations/graceful-shutdown.adoc @@ -1,6 +1,37 @@ = Graceful shutdown -Graceful shutdown of HDFS nodes is either not supported by the product itself -or we have not implemented it yet. +You can configure the graceful shutdown as described in xref:concepts:operations/graceful_shutdown.adoc[]. -Outstanding implementation work for the graceful shutdowns of all products where this functionality is relevant is tracked in https://github.com/stackabletech/issues/issues/357 +== JournalNodes + +As a default, JournalNodes have `15 minutes` to shut down gracefully. + +The JournalNode process will always run as PID `1` and will receive a `SIGTERM` signal when Kubernetes wants to terminate the Pod. +It will log the received signal as shown in the log below and initiate a graceful shutdown. +After the graceful shutdown timeout runs out, and the process still didn't exit, Kubernetes will issue a `SIGKILL` signal. + +https://github.com/apache/hadoop/blob/a585a73c3e02ac62350c136643a5e7f6095a3dbb/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/qjournal/server/JournalNode.java#L272[This] is the relevant code that gets executed in the JournalNodes as of HDFS version `3.3.4`. + + +[source,text] +---- +2023-10-10 13:37:41,525 ERROR server.JournalNode (LogAdapter.java:error(75)) - RECEIVED SIGNAL 15: SIGTERM +2023-10-10 13:37:41,526 INFO server.JournalNode (LogAdapter.java:info(51)) - SHUTDOWN_MSG: +/************************************************************ +SHUTDOWN_MSG: Shutting down JournalNode at hdfs-journalnode-default-0/10.244.0.38 +************************************************************/ +---- + +== NameNodes + +As a default, NameNodes have `15 minutes` to shut down gracefully. +They use the same mechanism described above. + +https://github.com/apache/hadoop/blob/a585a73c3e02ac62350c136643a5e7f6095a3dbb/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/namenode/NameNode.java#L1080[This] is the relevant code that gets executed in the NameNodes as of HDFS version `3.3.4`. + +== DataNodes + +As a default, DataNodes have `30 minutes` to shut down gracefully. +They use the same mechanism described above. + +https://github.com/apache/hadoop/blob/a585a73c3e02ac62350c136643a5e7f6095a3dbb/hadoop-hdfs-project/hadoop-hdfs/src/main/java/org/apache/hadoop/hdfs/server/datanode/DataNode.java#L2004[This] is the relevant code that gets executed in the DataNodes as of HDFS version `3.3.4`. diff --git a/rust/crd/src/constants.rs b/rust/crd/src/constants.rs index ae8cde43..5fe554b6 100644 --- a/rust/crd/src/constants.rs +++ b/rust/crd/src/constants.rs @@ -1,3 +1,5 @@ +use stackable_operator::time::Duration; + pub const DEFAULT_DFS_REPLICATION_FACTOR: u8 = 3; pub const CONTROLLER_NAME: &str = "hdfsclusters.hdfs.stackable.tech"; @@ -41,6 +43,13 @@ pub const DEFAULT_JOURNAL_NODE_HTTP_PORT: u16 = 8480; pub const DEFAULT_JOURNAL_NODE_HTTPS_PORT: u16 = 8481; pub const DEFAULT_JOURNAL_NODE_RPC_PORT: u16 = 8485; +pub const DEFAULT_JOURNAL_NODE_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = + Duration::from_minutes_unchecked(15); +pub const DEFAULT_NAME_NODE_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = + Duration::from_minutes_unchecked(15); +pub const DEFAULT_DATA_NODE_GRACEFUL_SHUTDOWN_TIMEOUT: Duration = + Duration::from_minutes_unchecked(30); + // hdfs-site.xml pub const DFS_NAMENODE_NAME_DIR: &str = "dfs.namenode.name.dir"; pub const DFS_NAMENODE_SHARED_EDITS_DIR: &str = "dfs.namenode.shared.edits.dir"; diff --git a/rust/crd/src/lib.rs b/rust/crd/src/lib.rs index 3947fa2d..02dc7bf4 100644 --- a/rust/crd/src/lib.rs +++ b/rust/crd/src/lib.rs @@ -30,6 +30,7 @@ use stackable_operator::{ role_utils::{GenericRoleConfig, Role, RoleGroup, RoleGroupRef}, schemars::{self, JsonSchema}, status::condition::{ClusterCondition, HasStatusCondition}, + time::Duration, }; use strum::{Display, EnumIter, EnumString}; @@ -160,6 +161,7 @@ pub trait MergedConfig { None } fn affinity(&self) -> &StackableAffinity; + fn graceful_shutdown_timeout(&self) -> Option<&Duration>; /// Main container shared by all roles fn hdfs_logging(&self) -> ContainerLogConfig; /// Vector container shared by all roles @@ -845,6 +847,9 @@ pub struct NameNodeConfig { pub logging: Logging, #[fragment_attrs(serde(default))] pub affinity: StackableAffinity, + /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + #[fragment_attrs(serde(default))] + pub graceful_shutdown_timeout: Option, } impl MergedConfig for NameNodeConfig { @@ -856,6 +861,10 @@ impl MergedConfig for NameNodeConfig { &self.affinity } + fn graceful_shutdown_timeout(&self) -> Option<&Duration> { + self.graceful_shutdown_timeout.as_ref() + } + fn hdfs_logging(&self) -> ContainerLogConfig { self.logging .containers @@ -920,6 +929,7 @@ impl NameNodeConfigFragment { }, logging: product_logging::spec::default_logging(), affinity: get_affinity(cluster_name, role), + graceful_shutdown_timeout: Some(DEFAULT_NAME_NODE_GRACEFUL_SHUTDOWN_TIMEOUT), } } } @@ -1005,6 +1015,9 @@ pub struct DataNodeConfig { pub logging: Logging, #[fragment_attrs(serde(default))] pub affinity: StackableAffinity, + /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + #[fragment_attrs(serde(default))] + pub graceful_shutdown_timeout: Option, } impl MergedConfig for DataNodeConfig { @@ -1018,6 +1031,10 @@ impl MergedConfig for DataNodeConfig { &self.affinity } + fn graceful_shutdown_timeout(&self) -> Option<&Duration> { + self.graceful_shutdown_timeout.as_ref() + } + fn hdfs_logging(&self) -> ContainerLogConfig { self.logging .containers @@ -1073,6 +1090,7 @@ impl DataNodeConfigFragment { }, logging: product_logging::spec::default_logging(), affinity: get_affinity(cluster_name, role), + graceful_shutdown_timeout: Some(DEFAULT_DATA_NODE_GRACEFUL_SHUTDOWN_TIMEOUT), } } } @@ -1156,6 +1174,9 @@ pub struct JournalNodeConfig { pub logging: Logging, #[fragment_attrs(serde(default))] pub affinity: StackableAffinity, + /// Time period Pods have to gracefully shut down, e.g. `30m`, `1h` or `2d`. Consult the operator documentation for details. + #[fragment_attrs(serde(default))] + pub graceful_shutdown_timeout: Option, } impl MergedConfig for JournalNodeConfig { @@ -1167,6 +1188,10 @@ impl MergedConfig for JournalNodeConfig { &self.affinity } + fn graceful_shutdown_timeout(&self) -> Option<&Duration> { + self.graceful_shutdown_timeout.as_ref() + } + fn hdfs_logging(&self) -> ContainerLogConfig { self.logging .containers @@ -1210,6 +1235,7 @@ impl JournalNodeConfigFragment { }, logging: product_logging::spec::default_logging(), affinity: get_affinity(cluster_name, role), + graceful_shutdown_timeout: Some(DEFAULT_JOURNAL_NODE_GRACEFUL_SHUTDOWN_TIMEOUT), } } } diff --git a/rust/operator/src/container.rs b/rust/operator/src/container.rs index dfb092ab..23c5b3fb 100644 --- a/rust/operator/src/container.rs +++ b/rust/operator/src/container.rs @@ -28,7 +28,13 @@ use stackable_hdfs_crd::{ storage::DataNodeStorageConfig, DataNodeContainer, HdfsCluster, HdfsPodRef, HdfsRole, MergedConfig, NameNodeContainer, }; -use stackable_operator::builder::SecretFormat; +use stackable_operator::{ + builder::SecretFormat, + product_logging::framework::{ + create_vector_shutdown_file_command, remove_vector_shutdown_file_command, + }, + utils::COMMON_BASH_TRAP_FUNCTIONS, +}; use stackable_operator::{ builder::{ resources::ResourceRequirementsBuilder, ContainerBuilder, PodBuilder, @@ -45,9 +51,12 @@ use stackable_operator::{ }, kube::ResourceExt, memory::{BinaryMultiple, MemoryQuantity}, - product_logging, - product_logging::spec::{ - ConfigMapLogConfig, ContainerLogConfig, ContainerLogConfigChoice, CustomContainerLogConfig, + product_logging::{ + self, + spec::{ + ConfigMapLogConfig, ContainerLogConfig, ContainerLogConfigChoice, + CustomContainerLogConfig, + }, }, }; use std::{collections::BTreeMap, str::FromStr}; @@ -437,9 +446,21 @@ impl ContainerConfig { HDFS_LOG4J_CONFIG_FILE, merged_config.hdfs_logging(), )); + args.push_str(&format!( - "{hadoop_home}/bin/hdfs {role}\n", + "\ +{COMMON_BASH_TRAP_FUNCTIONS} +{remove_vector_shutdown_file_command} +prepare_signal_handlers +{hadoop_home}/bin/hdfs {role} & +wait_for_termination $! +{create_vector_shutdown_file_command} +", hadoop_home = Self::HADOOP_HOME, + remove_vector_shutdown_file_command = + remove_vector_shutdown_file_command(STACKABLE_LOG_DIR), + create_vector_shutdown_file_command = + create_vector_shutdown_file_command(STACKABLE_LOG_DIR), )); } ContainerConfig::Zkfc { .. } => { diff --git a/rust/operator/src/hdfs_controller.rs b/rust/operator/src/hdfs_controller.rs index b9bd40be..7b0ae249 100644 --- a/rust/operator/src/hdfs_controller.rs +++ b/rust/operator/src/hdfs_controller.rs @@ -54,7 +54,10 @@ use crate::{ discovery::build_discovery_configmap, event::{build_invalid_replica_message, publish_event}, kerberos, - operations::pdb::add_pdbs, + operations::{ + graceful_shutdown::{self, add_graceful_shutdown_config}, + pdb::add_pdbs, + }, product_logging::{extend_role_group_config_map, resolve_vector_aggregator_address}, OPERATOR_NAME, }; @@ -204,6 +207,9 @@ pub enum Error { source: PropertiesWriterError, rolegroup: String, }, + + #[snafu(display("failed to configure graceful shutdown"))] + GracefulShutdown { source: graceful_shutdown::Error }, } impl ReconcilerError for Error { @@ -694,6 +700,8 @@ fn rolegroup_statefulset( ) .context(FailedToCreateContainerAndVolumeConfigurationSnafu)?; + add_graceful_shutdown_config(merged_config, &mut pb).context(GracefulShutdownSnafu)?; + let mut pod_template = pb.build_template(); if let Some(pod_overrides) = hdfs.pod_overrides_for_role(role) { pod_template.merge_from(pod_overrides.clone()); diff --git a/rust/operator/src/operations/graceful_shutdown.rs b/rust/operator/src/operations/graceful_shutdown.rs new file mode 100644 index 00000000..ff79f384 --- /dev/null +++ b/rust/operator/src/operations/graceful_shutdown.rs @@ -0,0 +1,26 @@ +use snafu::{ResultExt, Snafu}; +use stackable_hdfs_crd::MergedConfig; +use stackable_operator::builder::PodBuilder; + +#[derive(Debug, Snafu)] +pub enum Error { + #[snafu(display("Failed to set terminationGracePeriod"))] + SetTerminationGracePeriod { + source: stackable_operator::builder::pod::Error, + }, +} + +pub fn add_graceful_shutdown_config( + merged_config: &(dyn MergedConfig + Send + 'static), + pod_builder: &mut PodBuilder, +) -> Result<(), Error> { + // This must be always set by the merge mechanism, as we provide a default value, + // users can not disable graceful shutdown. + if let Some(graceful_shutdown_timeout) = merged_config.graceful_shutdown_timeout() { + pod_builder + .termination_grace_period(graceful_shutdown_timeout) + .context(SetTerminationGracePeriodSnafu)?; + } + + Ok(()) +} diff --git a/rust/operator/src/operations/mod.rs b/rust/operator/src/operations/mod.rs index d3cf6e9c..92ca2ec7 100644 --- a/rust/operator/src/operations/mod.rs +++ b/rust/operator/src/operations/mod.rs @@ -1 +1,2 @@ +pub mod graceful_shutdown; pub mod pdb; diff --git a/tests/templates/kuttl/smoke/30-assert.yaml.j2 b/tests/templates/kuttl/smoke/30-assert.yaml.j2 index 2d82e9b5..15c2ad33 100644 --- a/tests/templates/kuttl/smoke/30-assert.yaml.j2 +++ b/tests/templates/kuttl/smoke/30-assert.yaml.j2 @@ -23,6 +23,7 @@ spec: - name: vector {% endif %} - name: zkfc + terminationGracePeriodSeconds: 900 status: readyReplicas: 2 replicas: 2 @@ -46,6 +47,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} - name: vector {% endif %} + terminationGracePeriodSeconds: 900 status: readyReplicas: 1 replicas: 1 @@ -69,6 +71,7 @@ spec: {% if lookup('env', 'VECTOR_AGGREGATOR') %} - name: vector {% endif %} + terminationGracePeriodSeconds: 1800 status: readyReplicas: {{ test_scenario['values']['number-of-datanodes'] }} replicas: {{ test_scenario['values']['number-of-datanodes'] }}