diff --git a/docker/amd-docker.Dockerfile b/docker/amd-docker.Dockerfile index 5a4aa9be..f9c9ac33 100644 --- a/docker/amd-docker.Dockerfile +++ b/docker/amd-docker.Dockerfile @@ -1,6 +1,8 @@ FROM ghcr.io/actions/actions-runner:latest ENV CXX=clang++ +ENV UCX_CXX=g++ +ENV UCX_CC=gcc RUN sudo apt-get update -y \ && sudo apt-get install -y software-properties-common \ @@ -59,3 +61,61 @@ RUN sudo pip install \ tinygrad RUN sudo pip install git+https://github.com/ROCm/iris.git + +RUN sudo apt-get update -y \ + && sudo apt-get install -y --no-install-recommends \ + autoconf \ + automake \ + libtool \ + pkg-config \ + build-essential \ + gfortran \ + flex \ + bison \ + libomp-dev \ + libhwloc-dev \ + libnuma-dev \ + && sudo rm -rf /var/lib/apt/lists/* + +ENV UCX_INSTALL_DIR=/opt/ucx +ENV OMPI_INSTALL_DIR=/opt/openmpi +ENV ROCSHMEM_INSTALL_DIR=/opt/rocshmem +ENV ROCM_PATH=/opt/rocm + +RUN cd /tmp \ + && git clone https://github.com/openucx/ucx.git -b v1.17.x \ + && cd ucx \ + && ./autogen.sh \ + && CC=gcc CXX=g++ ./configure --prefix=${UCX_INSTALL_DIR} --with-rocm=${ROCM_PATH} --enable-mt --disable-optimizations \ + && make -j$(nproc) \ + && sudo make install \ + && cd / \ + && sudo rm -rf /tmp/ucx + +RUN cd /tmp \ + && git clone --recursive https://github.com/open-mpi/ompi.git -b v5.0.x \ + && cd ompi \ + && ./autogen.pl \ + && ./configure --prefix=${OMPI_INSTALL_DIR} --with-rocm=${ROCM_PATH} --with-ucx=${UCX_INSTALL_DIR} \ + && make -j$(nproc) \ + && sudo make install \ + && cd / \ + && sudo rm -rf /tmp/ompi + +ENV PATH="${OMPI_INSTALL_DIR}/bin:${PATH}" +ENV LD_LIBRARY_PATH="${OMPI_INSTALL_DIR}/lib:${UCX_INSTALL_DIR}/lib:/opt/rocm/lib" + + +RUN cd /tmp \ + && git clone https://github.com/ROCm/rocSHMEM.git \ + && cd rocSHMEM \ + && mkdir build \ + && cd build \ + && MPI_ROOT=${OMPI_INSTALL_DIR} UCX_ROOT=${UCX_INSTALL_DIR} CMAKE_PREFIX_PATH="${ROCM_PATH}:$CMAKE_PREFIX_PATH" \ + sudo ../scripts/build_configs/ipc_single -DCMAKE_INSTALL_PREFIX=/opt/rocshmem \ + && cd / \ + && sudo rm -rf /tmp/rocSHMEM + + +ENV ROCSHMEM_INSTALL_DIR=${ROCSHMEM_INSTALL_DIR} +ENV LD_LIBRARY_PATH="${ROCSHMEM_INSTALL_DIR}/lib:${LD_LIBRARY_PATH}" \ No newline at end of file diff --git a/scripts/rocshmem_test_payload.json b/scripts/rocshmem_test_payload.json new file mode 100644 index 00000000..dfb38e46 --- /dev/null +++ b/scripts/rocshmem_test_payload.json @@ -0,0 +1,8 @@ +{ + "lang": "py", + "sources": { + "rocshmem_test.py": "import torch\nfrom torch.utils.cpp_extension import load_inline\nimport os\n\ndef test_rocshmem_compilation():\n \"\"\"Test ROCshmem compilation using PyTorch's load_inline\"\"\"\n \n print(\"=== ROCshmem PyTorch Inline Test ===\")\n \n # C++ source code for ROCshmem test\n cpp_source = \"\"\"\n #include \n #include \n #include \n \n void test_rocshmem() {\n std::cout << \"Testing ROCshmem compilation...\" << std::endl;\n \n // Just test that we can compile and link with rocshmem\n // Don't actually initialize since we may not have proper MPI setup\n std::cout << \"ROCshmem headers included successfully!\" << std::endl;\n std::cout << \"Compilation test passed!\" << std::endl;\n }\n \n PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {\n m.def(\"test_rocshmem\", &test_rocshmem, \"Test ROCshmem compilation\");\n }\n \"\"\"\n \n # Set up include paths and libraries\n rocm_path = os.environ.get('ROCM_PATH', '/opt/rocm')\n rocshmem_path = os.environ.get('ROCSHMEM_INSTALL_DIR', '/home/runner/rocshmem')\n ompi_path = os.environ.get('OMPI_INSTALL_DIR', '/opt/openmpi')\n\n include_dirs = [\n f\"{rocm_path}/include\",\n f\"{rocshmem_path}/include/rocshmem\",\n f\"{ompi_path}/include\"\n ]\n\n library_dirs = [\n f\"{rocm_path}/lib\",\n f\"{rocshmem_path}/lib\",\n f\"{ompi_path}/lib\"\n ]\n\n libraries = [\n \"rocshmem\",\n \"mpi\", \n \"amdhip64\",\n \"hsa-runtime64\"\n ]\n\n ldflags = []\n for lib_dir in library_dirs:\n ldflags.append(f\"-L{lib_dir}\")\n\n for lib in libraries:\n ldflags.append(f\"-l{lib}\")\n\n extra_cflags = [f\"-I{include_dir}\" for include_dir in include_dirs]\n\n extra_ldflags = [\n \"--hip-link\"\n ] + ldflags\n \n try:\n # Use torch.utils.cpp_extension.load_inline to compile\n rocshmem_module = load_inline(\n name=\"rocshmem_test\",\n cpp_sources=cpp_source,\n extra_cflags=extra_cflags,\n extra_ldflags=extra_ldflags,\n verbose=True\n )\n \n print(\"Compilation successful!\")\n print(\"Linking successful!\")\n \n # Run the test\n rocshmem_module.test_rocshmem()\n \n print(\"ROCshmem test completed successfully!\")\n return True\n \n except Exception as e:\n print(f\"ROCshmem test failed: {e}\")\n return False\n\nif __name__ == \"__main__\":\n test_rocshmem_compilation()" + }, + "main": "rocshmem_test.py", + "mode": "test" +} \ No newline at end of file