Skip to content

Commit

Permalink
Add script changes to run tensor flow model with control client APIs …
Browse files Browse the repository at this point in the history
…on HNS bucket (#2406)

* trigger tf model with hns bucket

* test changes

* lint fix

* remove test changes

* small fix
  • Loading branch information
Tulsishah authored Aug 28, 2024
1 parent 7f25a41 commit 2f6ebc3
Show file tree
Hide file tree
Showing 6 changed files with 68 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,8 @@ VM_NAME="tf-resnet-7d"
ZONE_NAME="us-west1-b"
ARTIFACTS_BUCKET_PATH="gs://gcsfuse-ml-tests-logs/ci_artifacts/tf/resnet"
TEST_SCRIPT_PATH="github/gcsfuse/perfmetrics/scripts/ml_tests/tf/resnet/setup_host_and_run_model.sh"
BUCKET_TYPE="non-hns"

cd "${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/perfmetrics/scripts/continuous_test/ml_tests/"

source run_and_manage_test.sh $VM_NAME $ZONE_NAME $ARTIFACTS_BUCKET_PATH $TEST_SCRIPT_PATH
source run_and_manage_test.sh $VM_NAME $ZONE_NAME $ARTIFACTS_BUCKET_PATH $TEST_SCRIPT_PATH "" $BUCKET_TYPE
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# This will stop execution when any command will have non-zero status.
set -e

VM_NAME="tf-resnet-7d-a100-gpu-hns-bucket"
ZONE_NAME="us-central1-f"
ARTIFACTS_BUCKET_PATH="gs://gcsfuse-ml-tests-logs/ci_artifacts/tf/resnet_hns"
TEST_SCRIPT_PATH="github/gcsfuse/perfmetrics/scripts/ml_tests/tf/resnet/setup_host_and_run_model.sh"
BUCKET_TYPE="hns"

cd "${KOKORO_ARTIFACTS_DIR}/github/gcsfuse/perfmetrics/scripts/continuous_test/ml_tests/"

source run_and_manage_test.sh $VM_NAME $ZONE_NAME $ARTIFACTS_BUCKET_PATH $TEST_SCRIPT_PATH "" $BUCKET_TYPE
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# Copyright 2024 Google LLC
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Config file for kokoro test
build_file: "gcsfuse/perfmetrics/scripts/continuous_test/ml_tests/tf/resnet_hns/build.sh"

# 1 hours timeout.
timeout_mins: 60
5 changes: 4 additions & 1 deletion perfmetrics/scripts/ml_tests/tf/resnet/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,5 +31,8 @@ WORKDIR "/tf_test/"
COPY ./perfmetrics/scripts/ml_tests/tf/resnet/setup_scripts/setup_container.sh .
COPY ./perfmetrics/scripts/ml_tests/tf/resnet/setup_scripts/resnet_runner.py .

ARG BUCKET_TYPE
ENV BUCKET_TYPE=${BUCKET_TYPE}

RUN mkdir -p "myBucket"
ENTRYPOINT ["/bin/bash", "-c", "./setup_container.sh"]
ENTRYPOINT ["/bin/bash", "-c", "./setup_container.sh ${BUCKET_TYPE}"]
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
# This will stop execution when any command will have non-zero status.
set -e

BUCKET_TYPE=$1
cd "$HOME/github/gcsfuse/perfmetrics/scripts"

echo "Setting up the machine with Docker and Nvidia Driver..."
Expand All @@ -26,7 +27,7 @@ cd "$HOME/github/gcsfuse/"
mkdir container_artifacts && mkdir container_artifacts/logs && mkdir container_artifacts/output

echo "Building tf DLC docker image containing all tensorflow libraries..."
sudo docker build . -f perfmetrics/scripts/ml_tests/tf/resnet/Dockerfile -t tf-dlc-gcsfuse --build-arg DLC_IMAGE_NAME=tf-gpu.2-13
sudo docker build . -f perfmetrics/scripts/ml_tests/tf/resnet/Dockerfile -t tf-dlc-gcsfuse --build-arg DLC_IMAGE_NAME=tf-gpu.2-13 --build-arg BUCKET_TYPE="${BUCKET_TYPE}"

echo "Running the docker image build in the previous step..."
sudo docker run --gpus all --name tf_model_container --privileged -d \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
# and epochs functionality, and runs the model

# Install go lang
BUCKET_TYPE=$1
wget -O go_tar.tar.gz https://go.dev/dl/go1.23.0.linux-amd64.tar.gz -q
sudo rm -rf /usr/local/go && tar -xzf go_tar.tar.gz && sudo mv go /usr/local
export PATH=$PATH:/usr/local/go/bin
Expand All @@ -39,11 +40,22 @@ echo "logging:
backup-file-count: 3
compress: true
" > /tmp/gcsfuse_config.yaml

TEST_BUCKET="gcsfuse-ml-tf-data"
DIR="resnet"
# Enable the enable-hns flag to run tests on the folder APIs with an HNS bucket.
if [ ${BUCKET_TYPE} == "hns" ];
then
TEST_BUCKET="gcsfuse-ml-data-hns-central1"
echo "enable-hns: true" >> /tmp/gcsfuse_config.yaml
DIR=${DIR}_${BUCKET_TYPE}
fi

nohup gcsfuse/gcsfuse --foreground \
--implicit-dirs \
--stackdriver-export-interval 60s \
--config-file /tmp/gcsfuse_config.yaml \
gcsfuse-ml-tf-data myBucket > /home/output/gcsfuse.out 2> /home/output/gcsfuse.err &
$TEST_BUCKET myBucket > /home/output/gcsfuse.out 2> /home/output/gcsfuse.err &

# Install tensorflow model garden library
pip3 install --user tf-models-official==2.13.2
Expand Down Expand Up @@ -190,7 +202,7 @@ sed -i "$lines"'d' $train_lib_file
x=$((x-1))
sed -i "$x"'r bypassed_code.py' $train_lib_file

ARTIFACTS_BUCKET_PATH="gs://gcsfuse-ml-tests-logs/ci_artifacts/tf/resnet"
ARTIFACTS_BUCKET_PATH="gs://gcsfuse-ml-tests-logs/ci_artifacts/tf/${DIR}"
echo "Update status file"
echo "RUNNING" > status.txt
gsutil cp status.txt $ARTIFACTS_BUCKET_PATH/
Expand Down

0 comments on commit 2f6ebc3

Please sign in to comment.