diff --git a/csrc/aio/common/deepspeed_aio_common.cpp b/csrc/aio/common/deepspeed_aio_common.cpp index 0f2895dfa328..a65cc500cc82 100644 --- a/csrc/aio/common/deepspeed_aio_common.cpp +++ b/csrc/aio/common/deepspeed_aio_common.cpp @@ -301,9 +301,8 @@ int regular_read(const char* filename, std::vector& buffer) } while (r > 0); if (read_bytes != num_bytes) { - std::cerr << "read error " - << " read_bytes (read) = " << read_bytes << " num_bytes (fstat) = " << num_bytes - << std::endl; + std::cerr << "read error " << " read_bytes (read) = " << read_bytes + << " num_bytes (fstat) = " << num_bytes << std::endl; } assert(read_bytes == num_bytes); close(fd); diff --git a/csrc/aio/py_lib/deepspeed_py_aio.cpp b/csrc/aio/py_lib/deepspeed_py_aio.cpp index 387b713f2bfc..0556f5aa8168 100644 --- a/csrc/aio/py_lib/deepspeed_py_aio.cpp +++ b/csrc/aio/py_lib/deepspeed_py_aio.cpp @@ -72,9 +72,8 @@ int deepspeed_py_aio_write(const torch::Tensor& buffer, const std::chrono::duration fn_time = std::chrono::high_resolution_clock::now() - start_time; - std::cout << "Elapsed time(usec): " - << "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6 - << std::endl; + std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6 + << " call = " << fn_time.count() * 1e6 << std::endl; return 0; } @@ -118,8 +117,7 @@ int deepspeed_py_aio_read(torch::Tensor& buffer, const std::chrono::duration fn_time = std::chrono::high_resolution_clock::now() - start_time; - std::cout << "Elapsed time(usec): " - << "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6 - << std::endl; + std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6 + << " call = " << fn_time.count() * 1e6 << std::endl; return 0; } diff --git a/csrc/aio/py_lib/deepspeed_py_aio_handle.cpp b/csrc/aio/py_lib/deepspeed_py_aio_handle.cpp index c21e92de9449..23ddabe260d4 100644 --- a/csrc/aio/py_lib/deepspeed_py_aio_handle.cpp +++ b/csrc/aio/py_lib/deepspeed_py_aio_handle.cpp @@ -93,9 +93,8 @@ int deepspeed_aio_handle_t::read(torch::Tensor& buffer, const char* filename, co if (validate) { validate_aio_operation(true, filename, read_buffer, num_file_bytes); } const std::chrono::duration fn_time = std::chrono::high_resolution_clock::now() - start_time; - std::cout << "Elapsed time(usec): " - << "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6 - << std::endl; + std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6 + << " call = " << fn_time.count() * 1e6 << std::endl; return 0; } @@ -128,9 +127,8 @@ int deepspeed_aio_handle_t::write(const torch::Tensor& buffer, const std::chrono::duration fn_time = std::chrono::high_resolution_clock::now() - start_time; - std::cout << "Elapsed time(usec): " - << "aio = " << aio_time.count() * 1e6 << " call = " << fn_time.count() * 1e6 - << std::endl; + std::cout << "Elapsed time(usec): " << "aio = " << aio_time.count() * 1e6 + << " call = " << fn_time.count() * 1e6 << std::endl; return 0; } diff --git a/csrc/aio/py_lib/deepspeed_py_copy.cpp b/csrc/aio/py_lib/deepspeed_py_copy.cpp index 8a59107dd347..c597b91d05c9 100644 --- a/csrc/aio/py_lib/deepspeed_py_copy.cpp +++ b/csrc/aio/py_lib/deepspeed_py_copy.cpp @@ -10,7 +10,7 @@ Functionality for swapping optimizer tensors to/from (NVMe) storage devices. #include "deepspeed_py_copy.h" #include -#define ROUND_DOWN(size, step) ((size) & ~((step)-1)) +#define ROUND_DOWN(size, step) ((size) & ~((step) - 1)) #if defined(__AVX512__) or defined(__AVX256__) union AVX_Data { diff --git a/csrc/deepspeed4science/evoformer_attn/gemm_kernel_utils.h b/csrc/deepspeed4science/evoformer_attn/gemm_kernel_utils.h index 2a4300c5cac1..c102234a4dfb 100644 --- a/csrc/deepspeed4science/evoformer_attn/gemm_kernel_utils.h +++ b/csrc/deepspeed4science/evoformer_attn/gemm_kernel_utils.h @@ -125,11 +125,10 @@ struct CheckArch { std::cerr << #PTR " is not correctly aligned\n"; \ return false; \ } -#define EVOFORMER_CHECK(COND, ERR) \ - if (!(COND)) { \ - std::cerr << "[Evoformer Attention]" \ - << "'" #COND "' failed: " << ERR << "\n"; \ - return false; \ +#define EVOFORMER_CHECK(COND, ERR) \ + if (!(COND)) { \ + std::cerr << "[Evoformer Attention]" << "'" #COND "' failed: " << ERR << "\n"; \ + return false; \ } #endif diff --git a/csrc/includes/simd.h b/csrc/includes/simd.h index f5bfb45dd2e2..a205026ec7c1 100644 --- a/csrc/includes/simd.h +++ b/csrc/includes/simd.h @@ -27,7 +27,7 @@ inline void writeAs(void* dst, const T& val) std::memcpy(dst, &val, sizeof(T)); } -#define ROUND_DOWN(size, step) ((size) & ~((step)-1)) +#define ROUND_DOWN(size, step) ((size) & ~((step) - 1)) #if defined(__AVX512__) #define SIMD_STORE(a, d) _mm512_storeu_ps(a, d) diff --git a/csrc/xpu/includes/simd.h b/csrc/xpu/includes/simd.h old mode 100755 new mode 100644 index f77568be7835..097e2d8585cc --- a/csrc/xpu/includes/simd.h +++ b/csrc/xpu/includes/simd.h @@ -13,7 +13,7 @@ #define TILE (128 * 1024 * 1024) #if defined(__AVX512__) or defined(__AVX256__) -#define ROUND_DOWN(size, step) ((size) & ~((step)-1)) +#define ROUND_DOWN(size, step) ((size) & ~((step) - 1)) #if defined(__AVX512__) #define SIMD_STORE(a, d) _mm512_storeu_ps(a, d) diff --git a/csrc/xpu/includes/type_shim.h b/csrc/xpu/includes/type_shim.h index fa41757c895b..1897afd1fea2 100644 --- a/csrc/xpu/includes/type_shim.h +++ b/csrc/xpu/includes/type_shim.h @@ -82,11 +82,11 @@ } template -__inline__ __attribute__((always_inline)) T reduce_block_into_lanes( - T* x, - T val, - int lanes = 1, - bool share_result = false) // lanes is intended to be <= 32. +__inline__ __attribute__((always_inline)) T +reduce_block_into_lanes(T* x, + T val, + int lanes = 1, + bool share_result = false) // lanes is intended to be <= 32. { auto item_ct1 = sycl::ext::oneapi::experimental::this_nd_item<3>(); int tid = item_ct1.get_local_id(2) + item_ct1.get_local_id(1) * item_ct1.get_local_range(2); diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index c0fc5dba9d33..2e2c880fbeb0 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -1,5 +1,5 @@ accelerate -clang-format==16.0.2 +clang-format==18.1.3 comet_ml>=3.41.0 deepspeed-kernels ; sys_platform == 'linux' docutils<0.18