Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/build_test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,5 @@ jobs:
python -c "import torchcomms; import torchcomms._transport"

# Run tests
comms/torchcomms/scripts/run_tests_integration_py.sh
comms/torchcomms/scripts/run_tests_integration_py.sh -s MemPoolTest.py
pytest -v comms/torchcomms/tests/unit/py
50 changes: 12 additions & 38 deletions comms/torchcomms/ncclx/TorchCommNCCLXCCA.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,46 +7,20 @@
// Helper to detect if c10::CachingAllocator constants exist
namespace {

// Helper to get kLargeBuffer from c10::CachingAllocator if it exists
template <typename = void>
struct LargeBufferGetter {
static constexpr size_t get() {
return 20971520; // 20MB (20 * 1024 * 1024) - fallback
}
};

// Specialization when c10::CachingAllocator::kLargeBuffer exists
template <>
struct LargeBufferGetter<
std::void_t<decltype(c10::CachingAllocator::kLargeBuffer)>> {
static constexpr size_t get() {
return c10::CachingAllocator::kLargeBuffer;
}
};

// Helper to get kSmallBuffer from c10::CachingAllocator if it exists
template <typename = void>
struct SmallBufferGetter {
static constexpr size_t get() {
return 2097152; // 2MB (2 * 1024 * 1024) - fallback
}
};

// Specialization when c10::CachingAllocator::kSmallBuffer exists
template <>
struct SmallBufferGetter<
std::void_t<decltype(c10::CachingAllocator::kSmallBuffer)>> {
static constexpr size_t get() {
return c10::CachingAllocator::kSmallBuffer;
}
};

inline size_t getLargeBufferSize() {
return LargeBufferGetter<>::get();
size_t getLargeBufferSize() {
#if __has_include(<c10/core/AllocatorConfig.h>)
return c10::CachingAllocator::kLargeBuffer;
#else
return 20971520; // 20MB (20 * 1024 * 1024) - fallback
#endif
}

inline size_t getSmallBufferSize() {
return SmallBufferGetter<>::get();
size_t getSmallBufferSize() {
#if __has_include(<c10/core/AllocatorConfig.h>)
return c10::CachingAllocator::kSmallBuffer;
#else
return 2097152; // 2MB (2 * 1024 * 1024) - fallback
#endif
}
} // namespace

Expand Down
38 changes: 37 additions & 1 deletion comms/torchcomms/scripts/run_tests_integration_py.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,45 @@ set -ex

cd "$(dirname "$0")/../tests/integration/py"

SKIP_TESTS=""

# Function to display usage
usage() {
echo "Usage: $0 [options]"
echo "Options:"
echo " --skips, -s Comma-separated list of tests to skip"
exit 1
}

# Parse command line arguments
while [[ $# -gt 0 ]]; do
key="$1"
case $key in
--skips|-s)
SKIP_TESTS="$2"
shift 2
;;
*)
echo "Unknown option: $1"
usage
;;
esac
done

run_tests () {
# Convert comma-separated skip tests to array
IFS=',' read -ra SKIP_TESTS_ARRAY <<< "$SKIP_TESTS"
for file in *Test.py; do
torchrun --nnodes 1 --nproc_per_node 4 "$file" --verbose
skip=false
for skip_file in "${SKIP_TESTS_ARRAY[@]}"; do
if [[ "$file" == "$skip_file" ]]; then
skip=true
break
fi
done
if ! $skip; then
torchrun --nnodes 1 --nproc_per_node 4 "$file" --verbose
fi
done
}

Expand Down
Loading