Skip to content

Commit

Permalink
Revert "Attempt to speed up syncing"
Browse files Browse the repository at this point in the history
This reverts commit ec7c42c.
  • Loading branch information
ashleykleynhans committed Jul 31, 2024
1 parent d068791 commit 0d8b268
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 38 deletions.
4 changes: 2 additions & 2 deletions docker-bake.hcl
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ variable "BASE_IMAGE_REPOSITORY" {
}

variable "BASE_IMAGE_VERSION" {
default = "2.2.3"
default = "2.2.2"
}

variable "CUDA_VERSION" {
Expand All @@ -40,7 +40,7 @@ variable "PYTHON_VERSION" {

target "default" {
dockerfile = "Dockerfile"
tags = ["${REGISTRY}/${REGISTRY_USER}/${APP}:${RELEASE}.post7"]
tags = ["${REGISTRY}/${REGISTRY_USER}/${APP}:${RELEASE}.post6"]
args = {
RELEASE = "${RELEASE}"
BASE_IMAGE = "${BASE_IMAGE_REPOSITORY}:${BASE_IMAGE_VERSION}-python${PYTHON_VERSION}-cuda${CUDA_VERSION}-torch${TORCH_VERSION}"
Expand Down
72 changes: 36 additions & 36 deletions scripts/pre_start.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,9 @@ EOF
sync_directory() {
local src_dir="$1"
local dst_dir="$2"
local num_parallel_jobs=4
local size_threshold=10485760 # 10M in bytes
local use_compression=${3:-false}

echo "SYNC: Syncing from ${src_dir} to ${dst_dir}, please wait..."
echo "SYNC: Syncing from ${src_dir} to ${dst_dir}, please wait (this can take a few minutes)..."

# Ensure destination directory exists
mkdir -p "${dst_dir}"
Expand All @@ -51,39 +50,40 @@ sync_directory() {
echo "SYNC: File system type: ${workspace_fs}"

if [ "${workspace_fs}" = "fuse" ]; then
echo "SYNC: Using optimized multi-process sync for FUSE filesystem"

# Function to sync large files using dd
sync_large_files() {
find "$src_dir" -type f -printf '%s %P\0' |
awk -v threshold="$size_threshold" -v RS='\0' -F' ' '$1 >= threshold {print $2}' |
xargs -0 -P $num_parallel_jobs -I {} \
sh -c 'dd if="$1/{}" of="$2/{}" bs=1M' "$src_dir" "$dst_dir"
}

# Function to sync small files using cpio
sync_small_files() {
find "$src_dir" -type f -printf '%s %P\0' |
awk -v threshold="$size_threshold" -v RS='\0' -F' ' '$1 < threshold {print $2}' |
cpio -0pdm "$dst_dir"
}

# Function to sync directory structure
sync_dir_structure() {
find "$src_dir" -type d -print0 |
cpio -0pdm "$dst_dir"
}

# Sync directory structure
sync_dir_structure

# Sync large and small files in parallel
sync_large_files &
sync_small_files &
wait

# Update file attributes
find "$src_dir" -print0 | xargs -0 -P $num_parallel_jobs touch --reference="$src_dir/{}" "$dst_dir/{}"
if [ "$use_compression" = true ]; then
echo "SYNC: Using tar with zstd compression for sync"
else
echo "SYNC: Using tar without compression for sync"
fi

# Get total size of source directory
local total_size=$(du -sb "${src_dir}" | cut -f1)

# Base tar command with optimizations
local tar_cmd="tar --create \
--file=- \
--directory="${src_dir}" \
--exclude='*.pyc' \
--exclude='__pycache__' \
--exclude='*.log' \
--blocking-factor=64 \
--record-size=64K \
--sparse \
."

# Base tar extract command
local tar_extract_cmd="tar --extract \
--file=- \
--directory="${dst_dir}" \
--blocking-factor=64 \
--record-size=64K \
--sparse"

if [ "$use_compression" = true ]; then
$tar_cmd | zstd -T0 -1 | pv -s ${total_size} | zstd -d -T0 | $tar_extract_cmd
else
$tar_cmd | pv -s ${total_size} | $tar_extract_cmd
fi

elif [ "${workspace_fs}" = "overlay" ] || [ "${workspace_fs}" = "xfs" ]; then
echo "SYNC: Using rsync for sync"
Expand Down

0 comments on commit 0d8b268

Please sign in to comment.