Skip to content

Commit

Permalink
Add a log message describing the active processes using a mount point…
Browse files Browse the repository at this point in the history
… during unmounting (#2719)

This includes refactoring the init, finalize, and update entrypoints to include PATH dirs
  • Loading branch information
dreambeyondorange authored May 2, 2024
1 parent 8446686 commit 99e0d1b
Show file tree
Hide file tree
Showing 12 changed files with 71 additions and 4 deletions.
2 changes: 2 additions & 0 deletions cookbooks/aws-parallelcluster-entrypoints/recipes/finalize.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

include_recipe "aws-parallelcluster-platform::enable_chef_error_handler"

include_recipe "aws-parallelcluster-shared::setup_envars"

fetch_config 'Fetch and load cluster configs'

if is_custom_node?
Expand Down
2 changes: 2 additions & 0 deletions cookbooks/aws-parallelcluster-entrypoints/recipes/init.rb
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@

include_recipe "aws-parallelcluster-platform::enable_chef_error_handler"

include_recipe "aws-parallelcluster-shared::setup_envars"

os_type 'Validate OS type specified by the user is the same as the OS identified by Ohai'

# Validate init system
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
# limitations under the License.
include_recipe "aws-parallelcluster-shared::setup_envars"

# Fetch and load cluster configs
include_recipe 'aws-parallelcluster-platform::update'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
@included_recipes = []
%w(
aws-parallelcluster-platform::enable_chef_error_handler
aws-parallelcluster-shared::setup_envars
aws-parallelcluster-computefleet::custom_parallelcluster_node
aws-parallelcluster-platform::finalize
aws-parallelcluster-slurm::finalize
Expand Down Expand Up @@ -50,6 +51,7 @@
expected_recipes = if is_custom_node
%w(
aws-parallelcluster-platform::enable_chef_error_handler
aws-parallelcluster-shared::setup_envars
aws-parallelcluster-computefleet::custom_parallelcluster_node
aws-parallelcluster-platform::finalize
aws-parallelcluster-slurm::finalize
Expand All @@ -58,6 +60,7 @@
else
%w(
aws-parallelcluster-platform::enable_chef_error_handler
aws-parallelcluster-shared::setup_envars
aws-parallelcluster-platform::finalize
aws-parallelcluster-slurm::finalize
aws-parallelcluster-environment::finalize
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
before do
@included_recipes = []
%w(
aws-parallelcluster-shared::setup_envars
aws-parallelcluster-platform::update
aws-parallelcluster-environment::update
aws-parallelcluster-slurm::update
Expand Down Expand Up @@ -47,6 +48,7 @@
cached(:node) { chef_run.node }

expected_recipes = %w(
aws-parallelcluster-shared::setup_envars
aws-parallelcluster-platform::update
aws-parallelcluster-environment::update
aws-parallelcluster-slurm::update
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@
# Path needs to be fully qualified, for example "shared/temp" becomes "/shared/temp"
efs_shared_dir = "/#{efs_shared_dir}" unless efs_shared_dir.start_with?('/')
# Unmount EFS
file_utils "check active processes on #{efs_shared_dir}" do
file efs_shared_dir
action :check_active_processes
end
execute 'unmount efs' do
command "umount -fl #{efs_shared_dir}"
retries 10
Expand Down
28 changes: 28 additions & 0 deletions cookbooks/aws-parallelcluster-environment/resources/file_utils.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# frozen_string_literal: true

# Copyright:: 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"). You may not use this file except in compliance
# with the License. A copy of the License is located at http://aws.amazon.com/apache2.0/
# or in the "LICENSE.txt" file accompanying this file. This file is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, express or implied. See the License for the specific language governing permissions and
# limitations under the License.

provides :file_utils
unified_mode true

property :file, String, required: %i(check_active_processes)

default_action :check_active_processes

action :check_active_processes do
file = new_resource.file
Chef::Log.info("The following processes are using #{file}")
execute "active processes" do
retries 3
retry_delay 3
timeout 10
live_stream true
command "fuser -mv #{file}"
end
end
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,10 @@
return if on_docker?
new_resource.fsx_fs_id_array.dup.each_with_index do |_fsx_fs_id, index|
fsx = FSx.new(node, new_resource, index)

file_utils "check active processes on #{fsx.shared_dir}" do
file fsx.shared_dir
action :check_active_processes
end
execute "unmount fsx #{fsx.shared_dir}" do
command "umount -fl #{fsx.shared_dir}"
retries 10
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@

action :unmount do
shared_dir = format_directory(new_resource.shared_dir)

file_utils "check active processes on #{shared_dir}" do
file shared_dir
action :check_active_processes
end
# TODO: can we use mount resource to unmount and disable (see raid)
execute "unmount volume #{shared_dir}" do
command "umount -fl #{shared_dir}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def mock_already_installed(package, expected_version, installed)
cd efs-utils-#{utils_version}
./build-deb.sh
apt-get -y install ./build/amazon-efs-utils*deb
EFSUTILSINSTALL
EFSUTILSINSTALL
end

context "utils package not yet installed" do
Expand Down Expand Up @@ -365,6 +365,13 @@ def mock_already_installed(package, expected_version, installed)
is_expected.to unmount_efs('unmount')
end

it 'checks active processes' do
is_expected.to check_active_processes_file_utils('check active processes on /shared_dir_1')
.with(file: '/shared_dir_1')
is_expected.to check_active_processes_file_utils('check active processes on /shared_dir_2')
.with(file: '/shared_dir_2')
end

it 'unmounts efs only if mounted' do
is_expected.not_to run_execute('unmount efs')
.with(command: 'umount -fl /shared_dir_1')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,9 @@

it 'unmounts fsx only if mounted' do
is_expected.not_to run_execute('unmount fsx /shared_dir_1')

is_expected.to check_active_processes_file_utils('check active processes on /shared_dir_1')
is_expected.to check_active_processes_file_utils('check active processes on /shared_dir_2')
.with(file: '/shared_dir_2')
is_expected.to run_execute('unmount fsx /shared_dir_2')
.with(command: "umount -fl /shared_dir_2")
.with(retries: 10)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@
is_expected.not_to run_execute('unmount volume')
end

it 'checks active processes' do
is_expected.to check_active_processes_file_utils('check active processes on /SHARED_DIR')
.with(file: '/SHARED_DIR')
end

it "removes volume /SHARED_DIR from /etc/fstab" do
is_expected.to edit_delete_lines("remove volume /SHARED_DIR from /etc/fstab")
.with(path: "/etc/fstab")
Expand Down Expand Up @@ -179,6 +184,11 @@
allow(Dir).to receive(:empty?).with("/SHARED_DIR").and_return(is_dir_empty)
end

it 'checks active processes' do
is_expected.to check_active_processes_file_utils('check active processes on /SHARED_DIR')
.with(file: '/SHARED_DIR')
end

it 'unmounts volume' do
is_expected.to unmount_volume('unmount')
is_expected.to run_execute('unmount volume /SHARED_DIR')
Expand Down

0 comments on commit 99e0d1b

Please sign in to comment.