From c2cd5eeef0b1e216ac62c5a718f03eb331b785c7 Mon Sep 17 00:00:00 2001 From: Qiye Tan Date: Mon, 8 Dec 2025 14:08:59 -0800 Subject: [PATCH 1/2] Fix build compatibility with older PyTorch versions Differential Revision: D88671081 --- comms/torchcomms/ncclx/TorchCommNCCLXCCA.cpp | 50 +++++--------------- 1 file changed, 12 insertions(+), 38 deletions(-) diff --git a/comms/torchcomms/ncclx/TorchCommNCCLXCCA.cpp b/comms/torchcomms/ncclx/TorchCommNCCLXCCA.cpp index d37a48c8..2adcd319 100644 --- a/comms/torchcomms/ncclx/TorchCommNCCLXCCA.cpp +++ b/comms/torchcomms/ncclx/TorchCommNCCLXCCA.cpp @@ -7,46 +7,20 @@ // Helper to detect if c10::CachingAllocator constants exist namespace { -// Helper to get kLargeBuffer from c10::CachingAllocator if it exists -template -struct LargeBufferGetter { - static constexpr size_t get() { - return 20971520; // 20MB (20 * 1024 * 1024) - fallback - } -}; - -// Specialization when c10::CachingAllocator::kLargeBuffer exists -template <> -struct LargeBufferGetter< - std::void_t> { - static constexpr size_t get() { - return c10::CachingAllocator::kLargeBuffer; - } -}; - -// Helper to get kSmallBuffer from c10::CachingAllocator if it exists -template -struct SmallBufferGetter { - static constexpr size_t get() { - return 2097152; // 2MB (2 * 1024 * 1024) - fallback - } -}; - -// Specialization when c10::CachingAllocator::kSmallBuffer exists -template <> -struct SmallBufferGetter< - std::void_t> { - static constexpr size_t get() { - return c10::CachingAllocator::kSmallBuffer; - } -}; - -inline size_t getLargeBufferSize() { - return LargeBufferGetter<>::get(); +size_t getLargeBufferSize() { +#if __has_include() + return c10::CachingAllocator::kLargeBuffer; +#else + return 20971520; // 20MB (20 * 1024 * 1024) - fallback +#endif } -inline size_t getSmallBufferSize() { - return SmallBufferGetter<>::get(); +size_t getSmallBufferSize() { +#if __has_include() + return c10::CachingAllocator::kSmallBuffer; +#else + return 2097152; // 2MB (2 * 1024 * 1024) - fallback +#endif } } // namespace From 0cb89d2c23566aa7d710ab9d10d002279adab800 Mon Sep 17 00:00:00 2001 From: Qiye Tan Date: Mon, 8 Dec 2025 14:08:59 -0800 Subject: [PATCH 2/2] Skip MemPoolTest on github workflow (#80) Summary: MemPoolTest does not support on some old hardware Differential Revision: D88655756 --- .github/workflows/build_test.yaml | 2 +- .../scripts/run_tests_integration_py.sh | 38 ++++++++++++++++++- 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml index 2ff51f14..47848050 100644 --- a/.github/workflows/build_test.yaml +++ b/.github/workflows/build_test.yaml @@ -62,5 +62,5 @@ jobs: python -c "import torchcomms; import torchcomms._transport" # Run tests - comms/torchcomms/scripts/run_tests_integration_py.sh + comms/torchcomms/scripts/run_tests_integration_py.sh -s MemPoolTest.py pytest -v comms/torchcomms/tests/unit/py diff --git a/comms/torchcomms/scripts/run_tests_integration_py.sh b/comms/torchcomms/scripts/run_tests_integration_py.sh index 5a1e84eb..0b160a7e 100755 --- a/comms/torchcomms/scripts/run_tests_integration_py.sh +++ b/comms/torchcomms/scripts/run_tests_integration_py.sh @@ -9,9 +9,45 @@ set -ex cd "$(dirname "$0")/../tests/integration/py" +SKIP_TESTS="" + +# Function to display usage +usage() { + echo "Usage: $0 [options]" + echo "Options:" + echo " --skips, -s Comma-separated list of tests to skip" + exit 1 +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + key="$1" + case $key in + --skips|-s) + SKIP_TESTS="$2" + shift 2 + ;; + *) + echo "Unknown option: $1" + usage + ;; + esac +done + run_tests () { + # Convert comma-separated skip tests to array + IFS=',' read -ra SKIP_TESTS_ARRAY <<< "$SKIP_TESTS" for file in *Test.py; do - torchrun --nnodes 1 --nproc_per_node 4 "$file" --verbose + skip=false + for skip_file in "${SKIP_TESTS_ARRAY[@]}"; do + if [[ "$file" == "$skip_file" ]]; then + skip=true + break + fi + done + if ! $skip; then + torchrun --nnodes 1 --nproc_per_node 4 "$file" --verbose + fi done }