From ff628b15e2ca705288490b04ca2721ecedfcad90 Mon Sep 17 00:00:00 2001 From: chivatam Date: Sun, 7 Sep 2025 10:26:19 -0400 Subject: [PATCH 1/8] rocshmem dependencies --- docker/amd-docker.Dockerfile | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/docker/amd-docker.Dockerfile b/docker/amd-docker.Dockerfile index 7d2d3ae5..c46ba18c 100644 --- a/docker/amd-docker.Dockerfile +++ b/docker/amd-docker.Dockerfile @@ -57,3 +57,58 @@ RUN sudo pip install \ packaging \ wheel \ tinygrad + +RUN sudo apt-get update -y \ + && sudo apt-get install -y --no-install-recommends \ + autoconf \ + automake \ + libtool \ + pkg-config \ + build-essential \ + gfortran \ + flex \ + bison \ + && sudo rm -rf /var/lib/apt/lists/* + +ENV UCX_INSTALL_DIR=/opt/ucx +ENV OMPI_INSTALL_DIR=/opt/openmpi +ENV ROCSHMEM_INSTALL_DIR=/opt/rocshmem +ENV ROCM_PATH=/opt/rocm + +RUN cd /tmp \ + && git clone https://github.com/openucx/ucx.git -b v1.17.x \ + && cd ucx \ + && ./autogen.sh \ + && ./configure --prefix=${UCX_INSTALL_DIR} --with-rocm=${ROCM_PATH} --enable-mt \ + && make -j$(nproc) \ + && sudo make install \ + && cd / \ + && sudo rm -rf /tmp/ucx + +RUN cd /tmp \ + && git clone --recursive https://github.com/open-mpi/ompi.git -b v5.0.x \ + && cd ompi \ + && ./autogen.pl \ + && ./configure --prefix=${OMPI_INSTALL_DIR} --with-rocm=${ROCM_PATH} --with-ucx=${UCX_INSTALL_DIR} \ + && make -j$(nproc) \ + && sudo make install \ + && cd / \ + && sudo rm -rf /tmp/ompi + +ENV PATH="${OMPI_INSTALL_DIR}/bin:${PATH}" +ENV LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${UCX_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}" + + +RUN cd /tmp \ + && git clone https://github.com/ROCm/rocSHMEM.git \ + && cd rocSHMEM \ + && mkdir build \ + && cd build \ + && MPI_ROOT=${OMPI_INSTALL_DIR} UCX_ROOT=${UCX_INSTALL_DIR} CMAKE_PREFIX_PATH="${ROCM_PATH}:$CMAKE_PREFIX_PATH" \ + ../scripts/build_configs/ipc_single ${ROCSHMEM_INSTALL_DIR} \ + && cd / \ + && sudo rm -rf /tmp/rocSHMEM + + +ENV ROCSHMEM_INSTALL_DIR=${ROCSHMEM_INSTALL_DIR} +ENV LD_LIBRARY_PATH="${ROCSHMEM_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}" From a30b0d8c0b85b33971bed907e84c532be6a1654f Mon Sep 17 00:00:00 2001 From: chivatam Date: Mon, 8 Sep 2025 22:50:40 -0400 Subject: [PATCH 2/8] fix: update UCX build configuration and dependencies for improved compatibility --- docker/amd-docker.Dockerfile | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/docker/amd-docker.Dockerfile b/docker/amd-docker.Dockerfile index c46ba18c..55c2ec64 100644 --- a/docker/amd-docker.Dockerfile +++ b/docker/amd-docker.Dockerfile @@ -1,6 +1,8 @@ FROM ghcr.io/actions/actions-runner:latest ENV CXX=clang++ +ENV UCX_CXX=g++ +ENV UCX_CC=gcc RUN sudo apt-get update -y \ && sudo apt-get install -y software-properties-common \ @@ -68,6 +70,9 @@ RUN sudo apt-get update -y \ gfortran \ flex \ bison \ + libomp-dev \ + libhwloc-dev \ + libnuma-dev \ && sudo rm -rf /var/lib/apt/lists/* ENV UCX_INSTALL_DIR=/opt/ucx @@ -79,7 +84,7 @@ RUN cd /tmp \ && git clone https://github.com/openucx/ucx.git -b v1.17.x \ && cd ucx \ && ./autogen.sh \ - && ./configure --prefix=${UCX_INSTALL_DIR} --with-rocm=${ROCM_PATH} --enable-mt \ + && CC=gcc CXX=g++ ./configure --prefix=${UCX_INSTALL_DIR} --with-rocm=${ROCM_PATH} --enable-mt --disable-optimizations \ && make -j$(nproc) \ && sudo make install \ && cd / \ @@ -96,7 +101,7 @@ RUN cd /tmp \ && sudo rm -rf /tmp/ompi ENV PATH="${OMPI_INSTALL_DIR}/bin:${PATH}" -ENV LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${UCX_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}" +ENV LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${UCX_INSTALL_DIR}/lib:/opt/rocm/lib" RUN cd /tmp \ From 7762e8958393790f945b8f90b267a5dcb12d215d Mon Sep 17 00:00:00 2001 From: chivatam Date: Sun, 14 Sep 2025 00:41:09 -0400 Subject: [PATCH 3/8] rocshmem test payload --- scripts/rocshmem_test_payload.json | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 scripts/rocshmem_test_payload.json diff --git a/scripts/rocshmem_test_payload.json b/scripts/rocshmem_test_payload.json new file mode 100644 index 00000000..dfac34c6 --- /dev/null +++ b/scripts/rocshmem_test_payload.json @@ -0,0 +1,9 @@ +{ + "lang": "cpp", + "sources": { + "simple_test.cc": "#include \n#include \n\nint main() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n \n return 0;\n}", + "test.sh": "#!/bin/bash\nset -e\n\necho \"=== Simple ROCshmem Test ===\"\n\n# Test compilation only\nhipcc -c -fgpu-rdc -x hip simple_test.cc \\\n -I${ROCM_PATH:-/opt/rocm}/include \\\n -I${ROCSHMEM_INSTALL_DIR:-/opt/rocshmem}/include \\\n -I${OMPI_INSTALL_DIR:-/opt/openmpi}/include \\\n -o simple_test.o\n\necho \"Compilation successful!\"\n\n# Test linking\nhipcc -fgpu-rdc --hip-link simple_test.o -o simple_test \\\n ${ROCSHMEM_INSTALL_DIR:-/opt/rocshmem}/lib/librocshmem.a \\\n ${OMPI_INSTALL_DIR:-/opt/openmpi}/lib/libmpi.so \\\n -L${ROCM_PATH:-/opt/rocm}/lib -lamdhip64 -lhsa-runtime64\n\necho \"Linking successful!\"\n\n# Run the test\n./simple_test\n\necho \"ROCshmem test completed successfully!\"" + }, + "main": "test.sh", + "mode": "test" +} From 6c57a27edf1a41b53af6c492ce78a0f07d721f44 Mon Sep 17 00:00:00 2001 From: chivatam Date: Sun, 14 Sep 2025 14:21:04 -0400 Subject: [PATCH 4/8] rocshmem with load_inline test --- scripts/rocshmem_test_payload.json | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/scripts/rocshmem_test_payload.json b/scripts/rocshmem_test_payload.json index dfac34c6..d547fdec 100644 --- a/scripts/rocshmem_test_payload.json +++ b/scripts/rocshmem_test_payload.json @@ -1,9 +1,8 @@ { - "lang": "cpp", + "lang": "python", "sources": { - "simple_test.cc": "#include \n#include \n\nint main() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n \n return 0;\n}", - "test.sh": "#!/bin/bash\nset -e\n\necho \"=== Simple ROCshmem Test ===\"\n\n# Test compilation only\nhipcc -c -fgpu-rdc -x hip simple_test.cc \\\n -I${ROCM_PATH:-/opt/rocm}/include \\\n -I${ROCSHMEM_INSTALL_DIR:-/opt/rocshmem}/include \\\n -I${OMPI_INSTALL_DIR:-/opt/openmpi}/include \\\n -o simple_test.o\n\necho \"Compilation successful!\"\n\n# Test linking\nhipcc -fgpu-rdc --hip-link simple_test.o -o simple_test \\\n ${ROCSHMEM_INSTALL_DIR:-/opt/rocshmem}/lib/librocshmem.a \\\n ${OMPI_INSTALL_DIR:-/opt/openmpi}/lib/libmpi.so \\\n -L${ROCM_PATH:-/opt/rocm}/lib -lamdhip64 -lhsa-runtime64\n\necho \"Linking successful!\"\n\n# Run the test\n./simple_test\n\necho \"ROCshmem test completed successfully!\"" + "rocshmem_test.py": "import torch\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/opt/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n \n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include\", \n f\"{ompi_path}/include\"\n ]\n \n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n \n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n \n extra_cflags = [\n \"-fgpu-rdc\",\n \"-x\", \"hip\"\n ]\n \n extra_ldflags = [\n \"-fgpu-rdc\",\n \"--hip-link\"\n ]\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = torch.utils.cpp_extension.load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_include_paths=include_dirs,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n library_dirs=library_dirs,\n libraries=libraries,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" }, - "main": "test.sh", + "main": "rocshmem_test.py", "mode": "test" } From 03b53ab59fc866ed0c7163e7914d09487551c22f Mon Sep 17 00:00:00 2001 From: Sai Enduri Date: Wed, 17 Sep 2025 02:43:21 -0700 Subject: [PATCH 5/8] fix payload import and lang --- scripts/rocshmem_test_payload.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/rocshmem_test_payload.json b/scripts/rocshmem_test_payload.json index d547fdec..200d4215 100644 --- a/scripts/rocshmem_test_payload.json +++ b/scripts/rocshmem_test_payload.json @@ -1,7 +1,7 @@ { - "lang": "python", + "lang": "py", "sources": { - "rocshmem_test.py": "import torch\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/opt/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n \n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include\", \n f\"{ompi_path}/include\"\n ]\n \n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n \n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n \n extra_cflags = [\n \"-fgpu-rdc\",\n \"-x\", \"hip\"\n ]\n \n extra_ldflags = [\n \"-fgpu-rdc\",\n \"--hip-link\"\n ]\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = torch.utils.cpp_extension.load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_include_paths=include_dirs,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n library_dirs=library_dirs,\n libraries=libraries,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" + "rocshmem_test.py": "import torch\nfrom torch.utils.cpp_extension import load_inline\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/opt/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n \n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include\", \n f\"{ompi_path}/include\"\n ]\n \n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n \n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n \n extra_cflags = [\n \"-fgpu-rdc\",\n \"-x\", \"hip\"\n ]\n \n extra_ldflags = [\n \"-fgpu-rdc\",\n \"--hip-link\"\n ]\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_include_paths=include_dirs,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n library_dirs=library_dirs,\n libraries=libraries,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" }, "main": "rocshmem_test.py", "mode": "test" From e7ec8adc73ea2a9705103072f0e243ad1ce99764 Mon Sep 17 00:00:00 2001 From: danielhua23 Date: Sat, 20 Sep 2025 10:57:57 +0000 Subject: [PATCH 6/8] correct payload --- scripts/rocshmem_test_payload.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rocshmem_test_payload.json b/scripts/rocshmem_test_payload.json index 200d4215..ffacd56b 100644 --- a/scripts/rocshmem_test_payload.json +++ b/scripts/rocshmem_test_payload.json @@ -1,7 +1,7 @@ { "lang": "py", "sources": { - "rocshmem_test.py": "import torch\nfrom torch.utils.cpp_extension import load_inline\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/opt/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n \n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include\", \n f\"{ompi_path}/include\"\n ]\n \n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n \n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n \n extra_cflags = [\n \"-fgpu-rdc\",\n \"-x\", \"hip\"\n ]\n \n extra_ldflags = [\n \"-fgpu-rdc\",\n \"--hip-link\"\n ]\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_include_paths=include_dirs,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n library_dirs=library_dirs,\n libraries=libraries,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" + "rocshmem_test.py": "import torch\nfrom torch.utils.cpp_extension import load_inline\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/opt/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n \n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include\", \n f\"{ompi_path}/include\"\n ]\n \n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n \n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n \n # 将库目录转换为链接器标志\n ldflags = []\n for lib_dir in library_dirs:\n ldflags.append(f\"-L{lib_dir}\")\n \n # 将库名称转换为链接器标志\n for lib in libraries:\n ldflags.append(f\"-l{lib}\")\n \n extra_cflags = [\n \"-fgpu-rdc\",\n \"-x\", \"hip\"\n ] + [f\"-I{include_dir}\" for include_dir in include_dirs]\n \n extra_ldflags = [\n \"-fgpu-rdc\",\n \"--hip-link\"\n ] + ldflags\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" }, "main": "rocshmem_test.py", "mode": "test" From 18610c8d813b0dbf9afec4cd385d6366f5812b43 Mon Sep 17 00:00:00 2001 From: danielhua23 Date: Sun, 21 Sep 2025 19:26:07 -0700 Subject: [PATCH 7/8] correct again --- scripts/rocshmem_test_payload.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/rocshmem_test_payload.json b/scripts/rocshmem_test_payload.json index ffacd56b..3f02f30c 100644 --- a/scripts/rocshmem_test_payload.json +++ b/scripts/rocshmem_test_payload.json @@ -1,7 +1,7 @@ { "lang": "py", "sources": { - "rocshmem_test.py": "import torch\nfrom torch.utils.cpp_extension import load_inline\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/opt/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n \n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include\", \n f\"{ompi_path}/include\"\n ]\n \n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n \n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n \n # 将库目录转换为链接器标志\n ldflags = []\n for lib_dir in library_dirs:\n ldflags.append(f\"-L{lib_dir}\")\n \n # 将库名称转换为链接器标志\n for lib in libraries:\n ldflags.append(f\"-l{lib}\")\n \n extra_cflags = [\n \"-fgpu-rdc\",\n \"-x\", \"hip\"\n ] + [f\"-I{include_dir}\" for include_dir in include_dirs]\n \n extra_ldflags = [\n \"-fgpu-rdc\",\n \"--hip-link\"\n ] + ldflags\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" + "rocshmem_test.py": "import torch\nfrom torch.utils.cpp_extension import load_inline\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/opt/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n \n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include\", \n f\"{ompi_path}/include\"\n ]\n \n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n \n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n \n \n ldflags = []\n for lib_dir in library_dirs:\n ldflags.append(f\"-L{lib_dir}\")\n \n \n for lib in libraries:\n ldflags.append(f\"-l{lib}\")\n \n extra_cflags = [\n \"-fgpu-rdc\",\n \"-x\", \"hip\"\n ] + [f\"-I{include_dir}\" for include_dir in include_dirs]\n \n extra_ldflags = [\n \"-fgpu-rdc\",\n \"--hip-link\"\n ] + ldflags\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" }, "main": "rocshmem_test.py", "mode": "test" From 4e3c33df0128d5b7f0c345dbe6b01b8f2ec101a7 Mon Sep 17 00:00:00 2001 From: danielhua23 Date: Mon, 22 Sep 2025 08:24:04 +0000 Subject: [PATCH 8/8] correct ut & rocshmem install path --- docker/amd-docker.Dockerfile | 2 +- scripts/rocshmem_test_payload.json | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docker/amd-docker.Dockerfile b/docker/amd-docker.Dockerfile index 55c2ec64..7b8406ad 100644 --- a/docker/amd-docker.Dockerfile +++ b/docker/amd-docker.Dockerfile @@ -110,7 +110,7 @@ RUN cd /tmp \ && mkdir build \ && cd build \ && MPI_ROOT=${OMPI_INSTALL_DIR} UCX_ROOT=${UCX_INSTALL_DIR} CMAKE_PREFIX_PATH="${ROCM_PATH}:$CMAKE_PREFIX_PATH" \ - ../scripts/build_configs/ipc_single ${ROCSHMEM_INSTALL_DIR} \ + sudo ../scripts/build_configs/ipc_single -DCMAKE_INSTALL_PREFIX=/opt/rocshmem \ && cd / \ && sudo rm -rf /tmp/rocSHMEM diff --git a/scripts/rocshmem_test_payload.json b/scripts/rocshmem_test_payload.json index 3f02f30c..89b81059 100644 --- a/scripts/rocshmem_test_payload.json +++ b/scripts/rocshmem_test_payload.json @@ -1,8 +1,9 @@ { "lang": "py", "sources": { - "rocshmem_test.py": "import torch\nfrom torch.utils.cpp_extension import load_inline\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/opt/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n \n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include\", \n f\"{ompi_path}/include\"\n ]\n \n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n \n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n \n \n ldflags = []\n for lib_dir in library_dirs:\n ldflags.append(f\"-L{lib_dir}\")\n \n \n for lib in libraries:\n ldflags.append(f\"-l{lib}\")\n \n extra_cflags = [\n \"-fgpu-rdc\",\n \"-x\", \"hip\"\n ] + [f\"-I{include_dir}\" for include_dir in include_dirs]\n \n extra_ldflags = [\n \"-fgpu-rdc\",\n \"--hip-link\"\n ] + ldflags\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" + "rocshmem_test.py": "import torch\nfrom torch.utils.cpp_extension import load_inline\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/home/runner/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n\n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include/rocshmem\",\n f\"{ompi_path}/include\"\n ]\n\n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n\n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n\n ldflags = []\n for lib_dir in library_dirs:\n ldflags.append(f\"-L{lib_dir}\")\n\n for lib in libraries:\n ldflags.append(f\"-l{lib}\")\n\n extra_cflags = [f\"-I{include_dir}\" for include_dir in include_dirs]\n\n extra_ldflags = [\n \"--hip-link\"\n ] + ldflags\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" }, "main": "rocshmem_test.py", "mode": "test" } +