diff --git a/.clang-format b/.clang-format new file mode 100644 index 00000000..305926ef --- /dev/null +++ b/.clang-format @@ -0,0 +1,71 @@ +--- +AlignAfterOpenBracket: Align +AlignConsecutiveMacros: 'true' +AlignConsecutiveAssignments: 'true' +AlignConsecutiveDeclarations: 'false' +AlignEscapedNewlines: Right +AlignOperands: 'true' +AlignTrailingComments: 'true' +AllowAllArgumentsOnNextLine: 'true' +AllowAllConstructorInitializersOnNextLine: 'true' +AllowAllParametersOfDeclarationOnNextLine: 'true' +AllowShortBlocksOnASingleLine: 'true' +AllowShortCaseLabelsOnASingleLine: 'true' +AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLambdasOnASingleLine: All +AllowShortLoopsOnASingleLine: 'true' +AlwaysBreakAfterDefinitionReturnType: None +AlwaysBreakAfterReturnType: None +AlwaysBreakBeforeMultilineStrings: 'true' +AlwaysBreakTemplateDeclarations: 'Yes' +BinPackArguments: 'false' +BinPackParameters: 'true' +BreakBeforeBinaryOperators: None +BreakBeforeBraces: Attach +BreakBeforeTernaryOperators: 'true' +BreakConstructorInitializers: AfterColon +BreakInheritanceList: BeforeColon +BreakStringLiterals: 'true' +ColumnLimit: '100' +CompactNamespaces: 'true' +ConstructorInitializerAllOnOneLineOrOnePerLine: 'false' +ConstructorInitializerIndentWidth: '8' +ContinuationIndentWidth: '2' +Cpp11BracedListStyle: 'true' +DerivePointerAlignment: 'false' +DisableFormat: 'false' +FixNamespaceComments: 'true' +IncludeBlocks: Merge +IndentCaseLabels: 'true' +IndentPPDirectives: AfterHash +IndentWidth: '4' +IndentWrappedFunctionNames: 'false' +KeepEmptyLinesAtTheStartOfBlocks: 'false' +Language: Cpp +MaxEmptyLinesToKeep: '1' +NamespaceIndentation: All +PointerAlignment: Right +SortIncludes: 'false' +SortUsingDeclarations: 'true' +SpaceAfterCStyleCast: 'false' +SpaceAfterLogicalNot: 'false' +SpaceAfterTemplateKeyword: 'false' +SpaceBeforeAssignmentOperators: 'true' +SpaceBeforeCpp11BracedList: 'true' +SpaceBeforeCtorInitializerColon: 'true' +SpaceBeforeInheritanceColon: 'true' +SpaceBeforeParens: ControlStatements +SpaceBeforeRangeBasedForLoopColon: 'true' +SpaceInEmptyParentheses: 'false' +SpacesBeforeTrailingComments: '1' +SpacesInAngles: 'false' +SpacesInCStyleCastParentheses: 'false' +SpacesInContainerLiterals: 'false' +SpacesInParentheses: 'false' +SpacesInSquareBrackets: 'false' +Standard: Cpp11 +TabWidth: '4' +UseTab: Always +AccessModifierOffset: -4 +... diff --git a/.github/workflows/continuous-integration.yaml b/.github/workflows/continuous-integration.yaml index c25b8d64..c460ddae 100644 --- a/.github/workflows/continuous-integration.yaml +++ b/.github/workflows/continuous-integration.yaml @@ -268,6 +268,11 @@ jobs: pretty: "LibRapid_Windows-Latest_MSVC_C++23" steps: + - name: Workflow Telemetry + uses: runforesight/workflow-telemetry-action@v1.8.7 + with: + theme: "dark" + - name: Checkout LibRapid uses: actions/checkout@v4 with: @@ -693,7 +698,7 @@ jobs: name: Trigger Documentation Build needs: [ compile, build-docs, quodona, code-coverage ] runs-on: ubuntu-latest - if: github.event_name != 'pull_request' + if: github.event_name == 'push' steps: - name: Checkout LibRapid uses: actions/checkout@v4 diff --git a/.github/workflows/wheels.yaml b/.github/workflows/wheels.yaml index c0823fa4..aad51502 100644 --- a/.github/workflows/wheels.yaml +++ b/.github/workflows/wheels.yaml @@ -270,8 +270,13 @@ jobs: # pythonType: "pp" steps: + - name: Workflow Telemetry + uses: runforesight/workflow-telemetry-action@v1.8.7 + with: + theme: "dark" + - name: Checkout LibRapid - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive @@ -284,28 +289,30 @@ jobs: run: pip install -r requirements.txt - name: Install XCode - if: matrix.os == 'macos-latest' + if: runner.os == 'macOS' uses: maxim-lobanov/setup-xcode@v1.5.1 with: xcode-version: latest - # This doesn't work for some reason - # - name: Install Clang - # if: matrix.os == 'macos-latest' - # uses: KyleMayes/install-llvm-action@v1 - # with: - # version: "15.0" - # directory: "./llvm" - # env: on - - name: Install Clang - if: matrix.os == 'macos-latest' + if: runner.os == 'macOS' run: | rm -f '/usr/local/bin/2to3*' brew install llvm libomp + - name: Install Clang + if: runner.os == 'Windows' + uses: KyleMayes/install-llvm-action@v1 + with: + version: '15.0' + directory: ${RUNNER_TOOL_CACHE} + env: on + - name: Build Wheels if: runner.os == 'macOS' + run: | + python -m pip install cibuildwheel + python -m cibuildwheel --output-dir wheelhouse env: CIBW_BUILD: ${{ matrix.pythonType }}${{ matrix.pythonVersion }}-${{ matrix.platformID }} CIBW_ARCHS: all @@ -321,15 +328,15 @@ jobs: LIBRAPID_GET_FFTW: OFF CC: /usr/local/opt/llvm/bin/clang CXX: /usr/local/opt/llvm/bin/clang++ - # CC: $(brew --prefix llvm)/bin/clang - # CXX: $(brew --prefix llvm)/bin/clang++ + CIBW_ENVIRONMENT: CC=/usr/local/opt/llvm/bin/clang CXX=/usr/local/opt/llvm/bin/clang++ + CMAKE_C_COMPILER: /usr/local/opt/llvm/bin/clang + CMAKE_CXX_COMPILER: /usr/local/opt/llvm/bin/clang++ + - name: Build Wheels + if: runner.os == 'Windows' run: | python -m pip install cibuildwheel python -m cibuildwheel --output-dir wheelhouse - - - name: Build Wheels - if: runner.os != 'macOS' env: CIBW_BUILD: ${{ matrix.pythonType }}${{ matrix.pythonVersion }}-${{ matrix.platformID }} CIBW_ARCHS: all @@ -342,10 +349,29 @@ jobs: GITHUB_ACTIONS: ON LIBRAPID_GET_BLAS: ON LIBRAPID_GET_FFTW: OFF + CC: ${RUNNER_TOOL_CACHE}/llvm/bin/clang + CXX: ${RUNNER_TOOL_CACHE}/llvm/bin/clang++ + CIBW_ENVIRONMENT: CC=${RUNNER_TOOL_CACHE}/llvm/bin/clang CXX=${RUNNER_TOOL_CACHE}/llvm/bin/clang++ + CMAKE_C_COMPILER: ${RUNNER_TOOL_CACHE}/llvm/bin/clang + CMAKE_CXX_COMPILER: ${RUNNER_TOOL_CACHE}/llvm/bin/clang++ + - name: Build Wheels + if: runner.os == 'Linux' run: | python -m pip install cibuildwheel python -m cibuildwheel --output-dir wheelhouse + env: + CIBW_BUILD: ${{ matrix.pythonType }}${{ matrix.pythonVersion }}-${{ matrix.platformID }} + CIBW_ARCHS: all + CIBW_MANYLINUX_X86_64_IMAGE: ${{ matrix.manylinux_image }} + CIBW_MANYLINUX_I686_IMAGE: ${{ matrix.manylinux_image }} + CIBW_MANYLINUX_PYPY_X86_64_IMAGE: ${{ matrix.manylinux_image }} + CIBW_MANYLINUX_PYPY_I686_IMAGE: ${{ matrix.manylinux_image }} + CIBW_BUILD_VERBOSITY: 1 + CMAKE_BUILD_PARALLEL_LEVEL: 1 + GITHUB_ACTIONS: ON + LIBRAPID_GET_BLAS: ON + LIBRAPID_GET_FFTW: OFF - name: Store Artifacts uses: actions/upload-artifact@v3 @@ -359,7 +385,7 @@ jobs: steps: - name: Checkout LibRapid - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive @@ -392,7 +418,7 @@ jobs: steps: - name: Checkout LibRapid - uses: actions/checkout@v3 + uses: actions/checkout@v4 with: submodules: recursive @@ -545,7 +571,7 @@ jobs: # # steps: # - name: Checkout LibRapid - # uses: actions/checkout@v3 + # uses: actions/checkout@v4 # with: # submodules: recursive # diff --git a/.gitignore b/.gitignore index 631f057f..54ac7d3b 100644 --- a/.gitignore +++ b/.gitignore @@ -32,5 +32,7 @@ librapid/librapid.egg-info librapid/bindings/python/generated dist/ +librapid/bindings/generators/*.hpp +librapid/bindings/generators/*.cpp *.pyc diff --git a/.readthedocs.yaml b/.readthedocs.yaml index e421c7e9..5ee5fc0f 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,7 +8,7 @@ formats: all build: os: ubuntu-22.04 tools: - python: "3.11" + python: "mambaforge-22.9" # Build documentation in the docs/ directory with Sphinx sphinx: diff --git a/CMakeLists.txt b/CMakeLists.txt index 2430a564..fac31980 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,6 +6,7 @@ cmake_policy(SET CMP0077 NEW) if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) message(STATUS "[ LIBRAPID ] LibRapid is a top-level project. Using C++23") set(CMAKE_CXX_STANDARD 23) + message(STATUS "[ LIBRAPID ] Building ${CMAKE_BUILD_TYPE}") endif () # LibRapid requires C++20 or later @@ -83,11 +84,28 @@ file(GLOB_RECURSE LIBRAPID_SOURCES "${CMAKE_CURRENT_SOURCE_DIR}/librapid/cxxblas/*.cxx" # Source files ) +# Extract system information +set(IS_LINUX OFF) +set(IS_MACOS OFF) +set(IS_WINDOWS OFF) +if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(IS_LINUX ON) +endif () +if (CMAKE_SYSTEM_NAME STREQUAL "Darwin") + set(IS_MACOS ON) +endif () +if (CMAKE_SYSTEM_NAME STREQUAL "Windows") + set(IS_WINDOWS ON) + + # Disable shared libraries, since they just cause problems on Windows + set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries" FORCE) +endif () + if (${SKBUILD}) message(STATUS "[ LIBRAPID ] Building for Python") set(module_name "_librapid") - set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries" FORCE) + set(LIBRAPID_QUIET ON) # Disable warnings for a cleaner output. message(STATUS "[ LIBRAPID ] Cloning PyBind11") FetchContent_Declare( @@ -117,28 +135,34 @@ if (${SKBUILD}) # Enable BLAS if (NOT ${LIBRAPID_NO_BLAS}) set(LIBRAPID_USE_BLAS ON) - endif() + endif () # Get BLAS if the environment variable is set if ($ENV{LIBRAPID_GET_BLAS}) set(LIBRAPID_GET_BLAS ON) - endif() + endif () # Use fast math if the environment variable is set if ($ENV{LIBRAPID_FAST_MATH}) set(LIBRAPID_FAST_MATH ON) - endif() + endif () # Get FFTW if the environment variable is set if ($ENV{LIBRAPID_GET_FFTW}) set(LIBRAPID_GET_FFTW ON) - endif() + endif () # Use CUDA/OpenCL only when not inside GitHub Actions if (NOT $ENV{GITHUB_ACTIONS}) set(LIBRAPID_USE_CUDA ON) set(LIBRAPID_USE_OPENCL ON) - endif() + set(LIBRAPID_NATIVE_ARCH ON) + else() + set(LIBRAPID_NO_ALWAYS_INLINE ON) # Reduce compile memory + endif () + + set(LIBRAPID_USE_OMP ON) + set(LIBRAPID_OPTIMISE_SMALL_ARRAYS OFF) # Disable multiprec set(LIBRAPID_USE_MULTIPREC OFF) @@ -146,22 +170,28 @@ if (${SKBUILD}) file(GLOB_RECURSE PYTHON_SOURCES - "${CMAKE_CURRENT_SOURCE_DIR}/librapid/bindings/python/*.hpp" # Header files - "${CMAKE_CURRENT_SOURCE_DIR}/librapid/bindings/python/*.cpp" # Source files + "${CMAKE_CURRENT_SOURCE_DIR}/librapid/bindings/python/*.hpp" # Header files + "${CMAKE_CURRENT_SOURCE_DIR}/librapid/bindings/python/*.cpp" # Source files ) pybind11_add_module( - ${module_name} MODULE + ${module_name} MODULE - ${LIBRAPID_SOURCES} - ${PYTHON_SOURCES} + ${LIBRAPID_SOURCES} + ${PYTHON_SOURCES} ) + target_compile_definitions(${module_name} PUBLIC + LIBRAPID_PYTHON + LIBRAPID_NO_ALWAYS_INLINE + WIN32_LEAN_AND_MEAN + ) install(TARGETS ${module_name} DESTINATION .) -else() +else () set(module_name "librapid") - add_library(${module_name} STATIC ${LIBRAPID_SOURCES}) -endif() + add_library(${module_name} STATIC ${LIBRAPID_SOURCES} + librapid/include/librapid/core/log.hpp) +endif () # clang-format off target_compile_definitions(${module_name} PUBLIC LIBRAPID_MAJOR=${LIBRAPID_MAJOR}) @@ -177,23 +207,6 @@ if (LIBRAPID_USE_PRECOMPILED_HEADER) target_precompile_headers(${module_name} PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/librapid/include/librapid/core/librapidPch.hpp") endif () -# Extract system information -set(IS_LINUX OFF) -set(IS_MACOS OFF) -set(IS_WINDOWS OFF) -if (CMAKE_SYSTEM_NAME STREQUAL "Linux") - set(IS_LINUX ON) -endif () -if (CMAKE_SYSTEM_NAME STREQUAL "Darwin") - set(IS_MACOS ON) -endif () -if (CMAKE_SYSTEM_NAME STREQUAL "Windows") - set(IS_WINDOWS ON) - - # Disable shared libraries, since they just cause problems on Windows - set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries" FORCE) -endif () - if (MINGW) message(FATAL_ERROR "LibRapid does not compile with MinGW. Please use MSVC or Clang instead.") endif () diff --git a/README.md b/README.md index 6b7625e5..86ab9eb7 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,8 @@ -![C++ Version](https://img.shields.io/badge/C++-20/23-purple.svg?style=flat&logo=c%2B%2B) ![License](https://img.shields.io/badge/License-MIT-orange.svg?style=flat) [![Discord](https://img.shields.io/discord/848914274105557043?color=blue&label=Discord&logo=Discord)](https://discord.gg/cGxTFTgCAC) +![C++ Version](https://img.shields.io/badge/C++-20/23-purple.svg?style=flat&logo=c%2B%2B) ![License](https://img.shields.io/badge/License-MIT-orange.svg?style=flat) [![Discord](https://img.shields.io/discord/848914274105557043?color=blue&label=Discord&logo=Discord)](https://discord.gg/cGxTFTgCAC)![PyPI - Downloads](https://img.shields.io/pypi/dm/librapid?label=PyPI%20Downloads&link=https%3A%2F%2Fpypi.org%2Fproject%2Flibrapid%2F) + --- diff --git a/docs/environment.yaml b/docs/environment.yaml index d51e6434..e2ea391d 100644 --- a/docs/environment.yaml +++ b/docs/environment.yaml @@ -2,8 +2,8 @@ name: docs # doxygen channels: - conda-forge dependencies: - - python=3.9 - - doxygen=1.9.1 + - python=3.12 + - doxygen=1.9.8 - pip - pip: - -r requirements.txt diff --git a/docs/source/apiReference.md b/docs/source/apiReference.md index 2bb431b4..79dda40c 100644 --- a/docs/source/apiReference.md +++ b/docs/source/apiReference.md @@ -44,6 +44,21 @@ Complex numbers and their operations. Mathematical sets. ::: +:::{grid-item-card} {octicon}`stack` Bitset +:link: bitset/bitset +:link-type: doc + +Optimised arrays of Boolean values with support for +bitwise operations. +::: + +:::{grid-item-card} {octicon}`book` Maps +:link: map.map +:link-type: doc + +Maps and dictionary-like objects. +::: + :::{grid-item-card} {octicon}`number` Mathematics :link: math/math :link-type: doc @@ -75,6 +90,8 @@ Arrays, Matrices and Linear Algebra Vectors Complex Numbers Sets +Bitset +Map Mathematics Multi-Precision Arithmetic Utilities diff --git a/docs/source/bitset/bitset.md b/docs/source/bitset/bitset.md new file mode 100644 index 00000000..2b3f992d --- /dev/null +++ b/docs/source/bitset/bitset.md @@ -0,0 +1,36 @@ +# Bitset + +::::{grid} 1 2 2 3 +:margin: 4 4 0 0 +:gutter: 1 + +:::{grid-item-card} {octicon}`cpu` Documentation +:link: bitsetListing +:link-type: doc + +View the API and documentation for LibRapid Sets. +::: + +:::{grid-item-card} {octicon}`tools` Examples +:link: examples +:link-type: doc + +See some examples of LibRapid's Set library in action +::: + +:::{grid-item-card} {octicon}`tools` Implementation Details +:link: implementation +:link-type: doc + +Learn about how LibRapid's Bitset library is implemented +::: +:::: + +```{toctree} + :hidden: + :maxdepth: 3 + + Documentation + Examples + Implementation Details +``` diff --git a/docs/source/bitset/bitsetListing.md b/docs/source/bitset/bitsetListing.md new file mode 100644 index 00000000..910ad61d --- /dev/null +++ b/docs/source/bitset/bitsetListing.md @@ -0,0 +1,4 @@ +# Bitset Listing + +```{doxygenfile} librapid/include/librapid/datastructures/bitset.hpp +``` diff --git a/docs/source/bitset/examples.md b/docs/source/bitset/examples.md new file mode 100644 index 00000000..51d4e388 --- /dev/null +++ b/docs/source/bitset/examples.md @@ -0,0 +1,3 @@ +# Bitset Examples + +*To do* diff --git a/docs/source/bitset/implementation.md b/docs/source/bitset/implementation.md new file mode 100644 index 00000000..be8c262d --- /dev/null +++ b/docs/source/bitset/implementation.md @@ -0,0 +1,3 @@ +# Bitset Implementation Details + +*To do* \ No newline at end of file diff --git a/docs/source/utilities/map.md b/docs/source/map/map.md similarity index 55% rename from docs/source/utilities/map.md rename to docs/source/map/map.md index 9315ded9..032a29b5 100644 --- a/docs/source/utilities/map.md +++ b/docs/source/map/map.md @@ -4,8 +4,8 @@ Both of these classes inherit from the ``std::map`` and ``std::unordered_map`` c few additional features to make them easier to use and more versatile. For the full documentation on the ``std::map`` and ``std::unordered_map`` classes, see -the [C++ reference](https://en.cppreference.com/w/cpp/container/map) -and [C++ reference](https://en.cppreference.com/w/cpp/container/unordered_map) pages. +the [map (C++ reference)](https://en.cppreference.com/w/cpp/container/map) +and [unordered map (C++ reference)](https://en.cppreference.com/w/cpp/container/unordered_map) pages. -```{doxygenfile} librapid/include/librapid/utilities/map.hpp +```{doxygenfile} librapid/include/librapid/datastructures/map.hpp ``` diff --git a/examples/example-array-2.cpp b/examples/example-array-2.cpp index 87388db1..8508aabf 100644 --- a/examples/example-array-2.cpp +++ b/examples/example-array-2.cpp @@ -12,8 +12,8 @@ auto main() -> int { std::vector> firstData = {{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}}; std::vector> secondData = {{1.0f}, {2.0f}, {3.0f}}; - auto firstMatrix = lrc::Array::fromData(firstData); - auto secondMatrix = lrc::Array::fromData(secondData); + auto firstMatrix = lrc::Array(firstData); + auto secondMatrix = lrc::Array(secondData); auto firstResult = lrc::dot(firstMatrix, secondMatrix); fmt::print("Left:\n{}\n", firstMatrix); fmt::print("Right:\n{}\n", secondMatrix); diff --git a/librapid/__init__.py b/librapid/__init__.py index 9527628c..cfe90934 100644 --- a/librapid/__init__.py +++ b/librapid/__init__.py @@ -1,6 +1,41 @@ import _librapid +class DataType: + def __init__(self, name, size): + self.name = name + self.size = size + + def __repr__(self): + return f"DataType(name={self.name}, size={self.size})" + + def __str__(self): + return self.name + + def __eq__(self, other): + return self.name == other.name and self.size == other.size + + def __hash__(self): + return hash((self.name, self.size)) + + +class Backend: + def __init__(self, name): + self.name = name + + def __repr__(self): + return f"Backend(name={self.name})" + + def __str__(self): + return self.name + + def __eq__(self, other): + return self.name == other.name + + def __hash__(self): + return hash(self.name) + + class Shape(_librapid.Shape): """ Stores the dimensions of an N-dimensional Array. @@ -8,4 +43,269 @@ class Shape(_librapid.Shape): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - \ No newline at end of file + + +def Array(*args, **kwargs): + """ + Creates an N-dimensional Array. + + Parameters + ---------- + shape : tuple + The dimensions of the Array. + dtype : DataType, optional + The data type of the Array. + backend : Backend, optional + The backend of the Array. + + Returns + ------- + Array + The created Array. + """ + + # Check for shape/data, dtype, and backend + shape = None + data = None + dtype = None + backend = None + + for i, arg in enumerate(args): + if i == 0: + data = arg + elif i == 1: + dtype = arg + elif i == 2: + backend = arg + + for key, value in kwargs.items(): + if key == "shape": + shape = value + elif key == "data": + data = value + elif key == "dtype": + dtype = value + elif key == "backend": + backend = value + else: + raise ValueError(f"Unknown argument {key}") + + if shape is None and data is None: + raise ValueError("Must specify either shape or data") + + if shape is not None and data is not None: + raise ValueError("Cannot specify both shape and data") + + if dtype is None: + dtype = float32 + + if backend is None: + backend = CPU + + if backend == CPU: + cpuTypeList = { + float32: _librapid.ArrayFloatCPU, + float64: _librapid.ArrayDoubleCPU, + int32: _librapid.ArrayInt32CPU, + int64: _librapid.ArrayInt64CPU, + uint32: _librapid.ArrayUInt32CPU, + uint64: _librapid.ArrayUInt64CPU, + Complex32: _librapid.ArrayComplexFloatCPU, + Complex64: _librapid.ArrayComplexDoubleCPU, + } + + arrayType = cpuTypeList.get(dtype, None) + elif backend == OpenCL: + if not _librapid.hasOpenCL(): + raise RuntimeError("OpenCL is not supported in this build " + "of librapid. Ensure OpenCL is installed " + "on your system and reinstall librapid " + "from source.") + + openclTypeList = { + float32: _librapid.ArrayFloatOpenCL, + float64: _librapid.ArrayDoubleOpenCL, + int32: _librapid.ArrayInt32OpenCL, + int64: _librapid.ArrayInt64OpenCL, + uint32: _librapid.ArrayUInt32OpenCL, + uint64: _librapid.ArrayUInt64OpenCL, + Complex32: _librapid.ArrayComplexFloatOpenCL, + Complex64: _librapid.ArrayComplexDoubleOpenCL, + } + + arrayType = openclTypeList.get(dtype, None) + elif backend == CUDA: + if not _librapid.hasCUDA(): + raise RuntimeError("CUDA is not supported in this build " + "of librapid. Ensure CUDA is installed " + "on your system and reinstall librapid " + "from source.") + + cudaTypeList = { + float32: _librapid.ArrayFloatCUDA, + float64: _librapid.ArrayDoubleCUDA, + int32: _librapid.ArrayInt32CUDA, + int64: _librapid.ArrayInt64CUDA, + uint32: _librapid.ArrayUInt32CUDA, + uint64: _librapid.ArrayUInt64CUDA, + Complex32: _librapid.ArrayComplexFloatCUDA, + Complex64: _librapid.ArrayComplexDoubleCUDA, + } + + arrayType = cudaTypeList.get(dtype, None) + else: + raise ValueError(f"Unknown backend {backend}") + + if arrayType is None: + raise ValueError(f"Unknown data type {dtype}") + + if shape is not None: + return arrayType(Shape(shape)) + elif data is not None: + return arrayType(data) + else: + raise RuntimeError("Unknown error") + + +def isArray(obj): + """ + Checks if an object is an Array. + + Parameters + ---------- + obj : object + The object to check. + + Returns + ------- + bool + True if the object is an Array, False otherwise. + """ + + if type(obj) in [ + _librapid.ArrayFloatCPU, + _librapid.ArrayDoubleCPU, + _librapid.ArrayInt32CPU, + _librapid.ArrayInt64CPU, + _librapid.ArrayUInt32CPU, + _librapid.ArrayUInt64CPU, + _librapid.ArrayComplexFloatCPU, + _librapid.ArrayComplexDoubleCPU + ]: + return True + + if _librapid.hasOpenCL() and type(obj) in [ + _librapid.ArrayFloatOpenCL, + _librapid.ArrayDoubleOpenCL, + _librapid.ArrayInt32OpenCL, + _librapid.ArrayInt64OpenCL, + _librapid.ArrayUInt32OpenCL, + _librapid.ArrayUInt64OpenCL, + _librapid.ArrayComplexFloatOpenCL, + _librapid.ArrayComplexDoubleOpenCL + ]: + return True + + if _librapid.hasCUDA() and type(obj) in [ + _librapid.ArrayFloatCUDA, + _librapid.ArrayDoubleCUDA, + _librapid.ArrayInt32CUDA, + _librapid.ArrayInt64CUDA, + _librapid.ArrayUInt32CUDA, + _librapid.ArrayUInt64CUDA, + _librapid.ArrayComplexFloatCUDA, + _librapid.ArrayComplexDoubleCUDA + ]: + return True + + +def hasOpenCL(): + """ + Checks if OpenCL is supported. + + Returns + ------- + bool + True if OpenCL is supported, False otherwise. + """ + + return _librapid.hasOpenCL() + + +def hasCUDA(): + """ + Checks if CUDA is supported. + + Returns + ------- + bool + True if CUDA is supported, False otherwise. + """ + + return _librapid.hasCUDA() + + +def setNumThreads(numThreads): + """ + Sets the number of threads to use for parallel operations. + + Parameters + ---------- + numThreads : int + The number of threads to use. + """ + + _librapid.setNumThreads(numThreads) + + +def getNumThreads(): + """ + Gets the number of threads used for parallel operations. + + Returns + ------- + int + The number of threads used. + """ + + return _librapid.getNumThreads() + + +def setSeed(seed): + """ + Sets the seed for random number generation. + + Parameters + ---------- + seed : int + The seed to use. + """ + + _librapid.setSeed(seed) + + +def getSeed(): + """ + Gets the seed for random number generation. + + Returns + ------- + int + The seed used. + """ + + return _librapid.getSeed() + + +float32 = DataType("float32", 4) +float64 = DataType("float64", 8) +int32 = DataType("int32", 4) +int64 = DataType("int64", 8) +uint32 = DataType("uint32", 4) +uint64 = DataType("uint64", 8) +Complex32 = DataType("Complex32", 8) +Complex64 = DataType("Complex64", 16) + +CPU = Backend("CPU") +OpenCL = Backend("OpenCL") +CUDA = Backend("CUDA") diff --git a/librapid/bindings/__init__.py b/librapid/bindings/__init__.py deleted file mode 100644 index 54168be4..00000000 --- a/librapid/bindings/__init__.py +++ /dev/null @@ -1 +0,0 @@ -print("YOYOYO WAZZUP") \ No newline at end of file diff --git a/librapid/bindings/generators/argument.py b/librapid/bindings/generators/argument.py index 98bd4930..bcc3054c 100644 --- a/librapid/bindings/generators/argument.py +++ b/librapid/bindings/generators/argument.py @@ -21,6 +21,7 @@ def __init__(self, *args, **kwargs): - default - const - ref + # - move - pointer - noConvert - returnPolicy @@ -32,10 +33,11 @@ def __init__(self, *args, **kwargs): self.name = kwargs.get("name", None) self.type = kwargs.get("type", None) self.default = kwargs.get("default", None) - self.const = kwargs.get("const", True) - self.ref = kwargs.get("ref", True) - self.pointer = kwargs.get("pointer", False) - self.noConvert = kwargs.get("noConvert", False) + self.const = kwargs.get("const", None) + self.ref = kwargs.get("ref", None) + # self.move = kwargs.get("move", None) + self.pointer = kwargs.get("pointer", None) + self.noConvert = kwargs.get("noConvert", None) for i in range(len(args)): if i == 0 and self.name is None: @@ -48,6 +50,8 @@ def __init__(self, *args, **kwargs): self.const = args[i] elif i == 4 and self.ref is None: self.ref = args[i] + # elif i == 5 and self.move is None: + # self.move = args[i] elif i == 5 and self.pointer is None: self.pointer = args[i] elif i == 6 and self.noConvert is None: @@ -61,6 +65,24 @@ def __init__(self, *args, **kwargs): if self.type is None: raise ValueError("Argument must have a type") + if self.const is None: + self.const = False + + if self.ref is None: + self.ref = False + + # if self.move is None: + # self.move = False + + if self.pointer is None: + self.pointer = False + + if self.noConvert is None: + self.noConvert = False + + # if self.move and self.ref: + # raise ValueError("Argument cannot be both a move and a reference") + self.isArgs = self.name == "*args" self.isKwargs = self.name == "**kwargs" @@ -71,6 +93,7 @@ def param(self): return f"py::kwargs kwargs" else: isPrimitiveType = isPrimitive(self.type) + # return f"{'const ' if self.const and not isPrimitiveType and not self.move else ''}{self.type} {'&' if self.ref and not isPrimitiveType else ''}{'&&' if self.move else ''}{'*' if self.pointer else ''}{self.name}" return f"{'const ' if self.const and not isPrimitiveType else ''}{self.type} {'&' if self.ref and not isPrimitiveType else ''}{'*' if self.pointer else ''}{self.name}" def declaration(self): diff --git a/librapid/bindings/generators/arrayGenerator.py b/librapid/bindings/generators/arrayGenerator.py index ff3627a4..7576ac80 100644 --- a/librapid/bindings/generators/arrayGenerator.py +++ b/librapid/bindings/generators/arrayGenerator.py @@ -9,14 +9,15 @@ # The set of Array types we support in Python arrayTypes = [] -for scalar in [("int16_t", "Int16"), - ("int32_t", "Int32"), +for scalar in [("int32_t", "Int32"), ("int64_t", "Int64"), + ("uint32_t", "UInt32"), + ("uint64_t", "UInt64"), ("float", "Float"), ("double", "Double"), ("lrc::Complex", "ComplexFloat"), ("lrc::Complex", "ComplexDouble")]: - for backend in ["CPU", "OpenCL", "CUDA"]: + for backend in ["CPU"]: # ["CPU", "OpenCL", "CUDA"]: arrayTypes.append({ "scalar": scalar[0], "backend": backend, @@ -29,7 +30,7 @@ def generateCppArrayType(config): def generateCppArrayViewType(config): - return f"lrc::array::GeneralArrayView<{generateCppArrayType(config)}>" + return f"lrc::array::GeneralArrayView<{generateCppArrayType(config)} &, lrc::Shape>" def generateFunctionsForArray(config): @@ -38,35 +39,33 @@ def generateFunctionsForArray(config): function.Function( name="__init__", args=[] - ) - ] + ), - # Static fromData (n dimensions) - for n in range(1, 9): - cppType = "" - for j in range(n): - cppType += "std::vector<" - cppType += config['scalar'] + ">" * n + # Copy constructor + function.Function( + name="__init__", + args=[ + argument.Argument( + name="other", + type=generateCppArrayType(config), + const=True, + ref=True + ) + ] + ), - methods.append( - function.Function( - name="fromData", - args=[ - argument.Argument( - name=f"array{n}D", - type=cppType, - const=True, - ref=True, - ) - ], - static=True, - op=f""" - return {generateCppArrayType(config)}::fromData(array{n}D); - """ - ) - ) + # Move constructor + function.Function( + name="__init__", + args=[ + argument.Argument( + name="other", + type=generateCppArrayType(config), + move=True + ) + ] + ), - methods += [ # Shape function.Function( name="__init__", @@ -111,7 +110,202 @@ def generateFunctionsForArray(config): ) ] ), + ] + + # Static fromData (n dimensions) + for n in range(1, 9): + cppType = ("std::vector<" * n) + config['scalar'] + (">" * n) + + methods.append( + function.Function( + name="__init__", + args=[ + argument.Argument( + name=f"array{n}D", + type=cppType, + const=True, + ref=True, + ) + ] + ) + ) + + methods += [ + # Get item + function.Function( + name="__getitem__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayType(config), + const=False, + ref=True + ), + argument.Argument( + name="index", + type="int64_t" + ) + ], + op=""" + return self[index]; + """ + ), + # Set item (GeneralArrayView) + function.Function( + name="__setitem__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayType(config), + const=False, + ref=True + ), + argument.Argument( + name="index", + type="int64_t" + ), + argument.Argument( + name="other", + type=generateCppArrayViewType(config), + const=True, + ref=True + ) + ], + op=""" + self[index] = other; + return self; + """ + ), + + # Set item (Array) + function.Function( + name="__setitem__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayType(config), + const=False, + ref=True + ), + argument.Argument( + name="index", + type="int64_t" + ), + argument.Argument( + name="other", + type=generateCppArrayType(config), + const=True, + ref=True + ) + ], + op=""" + self[index] = other; + return self; + """ + ), + + # Set item (Scalar) + function.Function( + name="__setitem__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayType(config), + const=False, + ref=True + ), + argument.Argument( + name="index", + type="int64_t" + ), + argument.Argument( + name="other", + type=config["scalar"], + const=True, + ref=True + ) + ], + op=""" + self[index] = other; + return self; + """ + ) + ] + + for operation in [("add", "+"), ("sub", "-"), ("mul", "*"), ("div", "/")]: + for dtype in [generateCppArrayType(config), generateCppArrayViewType(config), config["scalar"]]: + methods.append( + function.Function( + name=f"__i{operation[0]}__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayType(config), + const=False, + ref=True + ), + argument.Argument( + name="other", + type=dtype, + const=True, + ref=True + ) + ], + op=f""" + self += other; + return self; + """ + ) + ) + + methods.append( + function.Function( + name=f"__{operation[0]}__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayType(config), + const=True, + ref=True + ), + argument.Argument( + name="other", + type=dtype, + const=True, + ref=True + ) + ], + op=f""" + return (self {operation[1]} other).eval(); + """ + ) + ) + + methods.append( + function.Function( + name=f"__r{operation[0]}__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayType(config), + const=True, + ref=True + ), + argument.Argument( + name="other", + type=dtype, + const=True, + ref=True + ) + ], + op=f""" + return (other {operation[1]} self).eval(); + """ + ) + ) + + methods += [ # String representation function.Function( name="__str__", @@ -140,7 +334,15 @@ def generateFunctionsForArray(config): ) ], op=f""" - return fmt::format("", self.shape()); + std::string thisStr = fmt::format("{{}}", self); + std::string padded; + for (const auto &c : thisStr) {{ + padded += c; + if (c == '\\n') {{ + padded += std::string(16, ' '); + }} + }} + return fmt::format("", padded); """ ), @@ -190,6 +392,7 @@ def generateArrayModule(config): name=f"librapid.{config['name']}", includeGuard=includeGuard ) + arrayModule.addClass(arrayClass) arrayModule.functions.extend(functions) @@ -200,18 +403,8 @@ def writeArray(root, config): fileType = file.File( path=f"{root}/{config['name']}.cpp" ) - fileType.modules.append(generateArrayModule(config)) - interfaceFunctions = fileType.write() - # Run clang-format if possible - try: - import subprocess - - subprocess.run(["clang-format", "-i", fileType.path]) - except Exception as e: - print("Unable to run clang-format:", e) - return interfaceFunctions diff --git a/librapid/bindings/generators/boilerplate.py b/librapid/bindings/generators/boilerplate.py new file mode 100644 index 00000000..dc31a66e --- /dev/null +++ b/librapid/bindings/generators/boilerplate.py @@ -0,0 +1,20 @@ +import textwrap + + +def boilerplate(): + return textwrap.dedent(f""" + #pragma once + + #ifndef LIBRAPID_DEBUG + #define LIBRAPID_DEBUG + #endif + + #include + #include + #include + + #include + + namespace py = pybind11; + namespace lrc = librapid; + """).strip() diff --git a/librapid/bindings/generators/class_.py b/librapid/bindings/generators/class_.py index 48cc3b56..3ffd6ef6 100644 --- a/librapid/bindings/generators/class_.py +++ b/librapid/bindings/generators/class_.py @@ -1,5 +1,8 @@ from argument import Argument import function +import os +import textwrap +import boilerplate class Class: @@ -24,21 +27,68 @@ def genImplicitConversions(self): return ret - def genInterface(self, parent="module"): - ret = f"py::class_<{self.type}>({parent}, \"{self.name}\")\n" - for func in self.functions: - ret += func.gen(self, False) + def genInterface(self, parent="module", root="./", includeGuard=None): + mainInterface = f"py::class_<{self.type}> {self.name}Class({parent}, \"{self.name}\");\n" + includes = [] + + # Ensure directory exists + if not os.path.exists(f"{root}/{self.name}"): + os.makedirs(f"{root}/{self.name}") - if func is not self.functions[-1]: - ret += "\n" + # Ensure function names are unique + functionCount = 0 - ret += ";\n" + for func in self.functions: + functionName = f"librapidPython_{self.name}_{func.name}_{functionCount}" + fileName = f"{func.name}_{functionCount}" + filePath = f"{root}/{self.name}/{fileName}" + + # Write definition + with open(f"{filePath}.hpp", "w") as f: + f.write(textwrap.dedent(f""" + {boilerplate.boilerplate()} + + void {functionName}(py::class_<{self.type}>& module); + """)) + includes.append(f"{filePath}.hpp") + + # Write implementation + with open(f"{filePath}.cpp", "w") as f: + f.write(f"#include \"{fileName}.hpp\"\n") + + if includeGuard is not None: + f.write(f"#if {includeGuard}\n") + + f.write(textwrap.dedent(f""" + void {functionName}(py::class_<{self.type}>& {self.name}) {{ + {func.gen(self, True)}; + }} + """)) + + if includeGuard is not None: + f.write(f"#else\n") + f.write(textwrap.dedent(f""" + void {functionName}(py::class_<{self.type}>& module) {{ + return; + }} + """)) + f.write(f"#endif\n") + + # Add function call to interface + mainInterface += f"{functionName}({self.name}Class);\n" + + functionCount += 1 + + # ret += func.gen(self, False) + # + # if func is not self.functions[-1]: + # ret += "\n" if len(self.implicitConversions) > 0: - ret += "\n" - ret += self.genImplicitConversions() + mainInterface += "\n" + mainInterface += self.genImplicitConversions() - return ret + return mainInterface, includes def __str__(self): return self.name @@ -109,4 +159,6 @@ def __str__(self): vector.addImplicitConversion(vector) - print(vector.genInterface()) + mainInterface, includes = vector.genInterface(root="../python/generated", includeGuard="defined(LIBRAPID_HAS_CUDA)") + print(mainInterface) + print(includes) diff --git a/librapid/bindings/generators/file.py b/librapid/bindings/generators/file.py index e26983dc..f7985a06 100644 --- a/librapid/bindings/generators/file.py +++ b/librapid/bindings/generators/file.py @@ -12,20 +12,28 @@ def addModule(self, module): def genInterface(self): interfaceFunctions = [] + includes = [] ret = "" + + root = self.path[:self.path.rfind("/")] + for module in self.modules: - ret += module.genInterface() + moduleInterface, moduleIncludes = module.genInterface(root=root) + ret += moduleInterface ret += "\n" interfaceFunctions.append((module.genInterfaceDefinition, module.genInterfaceCall)) + includes += moduleIncludes - return ret, interfaceFunctions + return ret, interfaceFunctions, includes def write(self, path=None): interfaceFunctions = [] with open(path if path is not None else self.path, "w") as f: f.write("#include \"librapidPython.hpp\"\n\n") - interface, interfaceFunctionsTmp = self.genInterface() + interface, interfaceFunctionsTmp, includes = self.genInterface() + for include in includes: + f.write(f"#include \"{include.strip('../python/generated/')}.hpp\"\n") f.write(interface.strip()) interfaceFunctions.extend(interfaceFunctionsTmp) diff --git a/librapid/bindings/generators/generalArrayViewGenerator.py b/librapid/bindings/generators/generalArrayViewGenerator.py new file mode 100644 index 00000000..90d1a817 --- /dev/null +++ b/librapid/bindings/generators/generalArrayViewGenerator.py @@ -0,0 +1,337 @@ +import argument +import function +import class_ +import module +import file + +import itertools + +# The set of Array types we support in Python +arrayTypes = [] + +for scalar in [("int32_t", "Int32"), + ("int64_t", "Int64"), + ("uint32_t", "UInt32"), + ("uint64_t", "UInt64"), + ("float", "Float"), + ("double", "Double"), + ("lrc::Complex", "ComplexFloat"), + ("lrc::Complex", "ComplexDouble")]: + for backend in ["CPU"]: # ["CPU", "OpenCL", "CUDA"]: + arrayTypes.append({ + "scalar": scalar[0], + "backend": backend, + "name": f"GeneralArrayView{scalar[1]}{backend}" + }) + + +def generateCppArrayType(config): + return f"lrc::Array<{config['scalar']}, lrc::backend::{config['backend']}>" + + +def generateCppArrayViewType(config): + return f"lrc::array::GeneralArrayView<{generateCppArrayType(config)} &, lrc::Shape>" + + +def generateFunctionsForGeneralArrayView(config): + methods = [ + # Create a new GeneralArrayView + function.Function( + name="createFromArray", + args=[ + argument.Argument( + name="array", + type=generateCppArrayType(config), + const=False, + ref=True + ) + ], + op=f""" + return lrc::createGeneralArrayView(array); + """, + static=True + ), + + # Get item + function.Function( + name="__getitem__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=True, + ref=True + ), + argument.Argument( + name="index", + type="int64_t" + ) + ], + op=""" + return self[index]; + """ + ), + + # Set item (GeneralArrayView) + function.Function( + name="__setitem__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=False, + ref=True + ), + argument.Argument( + name="index", + type="int64_t" + ), + argument.Argument( + name="other", + type=generateCppArrayViewType(config), + const=True, + ref=True + ) + ], + op=""" + self[index] = other; + return self; + """ + ), + + # Set item (Array) + function.Function( + name="__setitem__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=False, + ref=True + ), + argument.Argument( + name="index", + type="int64_t" + ), + argument.Argument( + name="other", + type=generateCppArrayType(config), + const=True, + ref=True + ) + ], + op=""" + self[index] = other; + return self; + """ + ), + + # Set item (Scalar) + function.Function( + name="__setitem__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=False, + ref=True + ), + argument.Argument( + name="index", + type="int64_t" + ), + argument.Argument( + name="other", + type=config["scalar"], + const=True, + ref=True + ) + ], + op=""" + self[index] = other; + return self; + """ + ) + ] + + for operation in [("add", "+"), ("sub", "-"), ("mul", "*"), ("div", "/")]: + for dtype in [generateCppArrayViewType(config), generateCppArrayType(config), config["scalar"]]: + methods.append( + function.Function( + name=f"__i{operation[0]}__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=False, + ref=True + ), + argument.Argument( + name="other", + type=dtype, + const=True, + ref=True + ) + ], + op=f""" + self += other; + return self; + """ + ) + ) + + methods.append( + function.Function( + name=f"__{operation[0]}__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=True, + ref=True + ), + argument.Argument( + name="other", + type=dtype, + const=True, + ref=True + ) + ], + op=f""" + // Release the GIL to improve performance + py::gil_scoped_release release; + return (self {operation[1]} other).eval(); + """ + ) + ) + + methods.append( + function.Function( + name=f"__r{operation[0]}__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=True, + ref=True + ), + argument.Argument( + name="other", + type=dtype, + const=True, + ref=True + ) + ], + op=f""" + return (other {operation[1]} self).eval(); + """ + ) + ) + + methods += [ + # String representation + function.Function( + name="__str__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=True, + ref=True + ) + ], + op=""" + return fmt::format("{}", self); + """ + ), + + # String representation + function.Function( + name="__repr__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=True, + ref=True + ) + ], + op=f""" + std::string thisStr = fmt::format("{{}}", self); + std::string padded; + for (const auto &c : thisStr) {{ + padded += c; + if (c == '\\n') {{ + padded += std::string(27, ' '); + }} + }} + return fmt::format("", padded); + """ + ), + + # Format (__format__) + function.Function( + name="__format__", + args=[ + argument.Argument( + name="self", + type=generateCppArrayViewType(config), + const=True, + ref=True + ), + argument.Argument( + name="formatSpec", + type="std::string", + const=True, + ref=True + ) + ], + op=""" + std::string format = fmt::format("{{:{}}}", formatSpec); + return fmt::format(fmt::runtime(format), self); + """ + ) + ] + + return methods, [] + + +def generateGeneralArrayViewModule(config): + generalArrayViewClass = class_.Class( + name=config["name"], + type=generateCppArrayViewType(config) + ) + + methods, functions = generateFunctionsForGeneralArrayView(config) + generalArrayViewClass.functions.extend(methods) + + includeGuard = None + if config["backend"] == "CUDA": + includeGuard = "defined(LIBRAPID_HAS_CUDA)" + elif config["backend"] == "OpenCL": + includeGuard = "defined(LIBRAPID_HAS_OPENCL)" + + generalArrayViewModule = module.Module( + name=f"librapid.GeneralArrayView.{config['name']}", + includeGuard=includeGuard + ) + generalArrayViewModule.addClass(generalArrayViewClass) + generalArrayViewModule.functions.extend(functions) + + return generalArrayViewModule + + +def writeGeneralArrayView(root, config): + fileType = file.File( + path=f"{root}/GeneralArrayView_{config['name']}.cpp" + ) + fileType.modules.append(generateGeneralArrayViewModule(config)) + interfaceFunctions = fileType.write() + return interfaceFunctions + + +def write(root): + interfaces = [] + for config in arrayTypes: + interfaces.extend(writeGeneralArrayView(root, config)) + return interfaces diff --git a/librapid/bindings/generators/main.py b/librapid/bindings/generators/main.py index 6c623b46..bb0dd27e 100644 --- a/librapid/bindings/generators/main.py +++ b/librapid/bindings/generators/main.py @@ -3,22 +3,57 @@ import shapeGenerator import arrayGenerator +import generalArrayViewGenerator +import boilerplate outputDir = "../python/generated" -boilerplate = textwrap.dedent(f""" - #pragma once +postBoilerplate = textwrap.dedent(f""" +#if defined(LIBRAPID_HAS_OPENCL) + module.def("configureOpenCL", [](bool verbose, bool ask) {{ + lrc::configureOpenCL(verbose, ask); + }}, py::arg("verbose") = false, py::arg("ask") = false); +#else + module.def("configureOpenCL", [](bool verbose, bool ask) {{ + throw std::runtime_error("OpenCL is not supported in this build " + "of LibRapid. Please ensure OpenCL is " + "installed on your system and reinstall " + "LibRapid from source."); + }}, py::arg("verbose") = false, py::arg("ask") = false); +#endif - #define LIBRAPID_ASSERT - - #include - #include - #include - #include + module.def("hasOpenCL", []() {{ + #if defined(LIBRAPID_HAS_OPENCL) + return true; + #else + return false; + #endif + }}); + + module.def("hasCUDA", []() {{ + #if defined(LIBRAPID_HAS_CUDA) + return true; + #else + return false; + #endif + }}); + + module.def("setNumThreads", [](size_t numThreads) {{ + lrc::setNumThreads(numThreads); + }}, py::arg("numThreads")); - namespace py = pybind11; - namespace lrc = librapid; - """).strip() + module.def("getNumThreads", []() {{ + return lrc::getNumThreads(); + }}); + + module.def("setSeed", [](size_t seed) {{ + lrc::setSeed(seed); + }}, py::arg("seed")); + + module.def("getSeed", []() {{ + return lrc::getSeed(); + }}); +""").strip() def main(): @@ -30,9 +65,10 @@ def main(): interfaceFunctions += shapeGenerator.write(outputDir) interfaceFunctions += arrayGenerator.write(outputDir) + interfaceFunctions += generalArrayViewGenerator.write(outputDir) with open(f"{outputDir}/librapidPython.hpp", "w") as f: - f.write(boilerplate) + f.write(boilerplate.boilerplate()) f.write("\n\n") for interfaceDef, _ in interfaceFunctions: f.write(f"{interfaceDef()};\n") @@ -43,8 +79,40 @@ def main(): f.write(" module.doc() = \"Python bindings for librapid\";\n") for _, interfaceCall in interfaceFunctions: f.write(f" {interfaceCall('module')};\n") + + f.write("\n") + f.write(postBoilerplate) + f.write("\n") + f.write("}\n") + # Apply clang-format to the generated files + # import subprocess + # for file in os.listdir("../python/generated"): + # if file.endswith(".hpp") or file.endswith(".cpp"): + # try: + # subprocess.run(["clang-format", "-i", "-style=llvm", f"librapid/bindings/python/generated/{file}"], cwd="../../../") + # except Exception as e: + # print("Unable to run clang-format:", e) + + # Apply clang-format to the generated files (recursive) + import subprocess + prevChars = 0 + for root, dirs, files in os.walk("../python/generated"): + for file in files: + + print(" " * prevChars, end='\r') + text = f"Formatting {root}/{file}" + print(text, end='\r') + prevChars = len(text) + + + if file.endswith(".hpp") or file.endswith(".cpp"): + try: + subprocess.run(["clang-format", "-i", "-style=llvm", f"{root}/{file}"], cwd="./") + except Exception as e: + print(f"Unable to run clang-format on {root}/{file}:", e) + if __name__ == "__main__": main() diff --git a/librapid/bindings/generators/module.py b/librapid/bindings/generators/module.py index cc5c47e8..abc4985a 100644 --- a/librapid/bindings/generators/module.py +++ b/librapid/bindings/generators/module.py @@ -27,8 +27,9 @@ def genInterfaceCall(self, moduleName): tmpName = self.name.replace(".", "_") return f"genInterface_{tmpName}({moduleName})" - def genInterface(self): + def genInterface(self, root="./"): ret = f"{self.genInterfaceDefinition()} {{\n" + includes = [] if self.parent is None: moduleName = "module" @@ -40,7 +41,9 @@ def genInterface(self): ret += f"{moduleName}.doc() = \"{self.docstring}\";\n\n" for class_ in self.classes: - ret += class_.genInterface(moduleName) + classInterface, classIncludes = class_.genInterface(moduleName, root=root, includeGuard=self.includeGuard) + includes += classIncludes + ret += classInterface ret += "\n" for func in self.functions: @@ -50,15 +53,15 @@ def genInterface(self): ret += "}\n" if self.includeGuard is None: - return ret + return ret, includes else: - return textwrap.dedent(f""" + guardedInterface = textwrap.dedent(f""" #if {self.includeGuard} {ret} #else {self.genInterfaceDefinition()} {{}} #endif - """) + """), includes if __name__ == "__main__": diff --git a/librapid/bindings/generators/shapeGenerator.py b/librapid/bindings/generators/shapeGenerator.py index 703f8ccf..aab69fcf 100644 --- a/librapid/bindings/generators/shapeGenerator.py +++ b/librapid/bindings/generators/shapeGenerator.py @@ -184,6 +184,21 @@ """ ), + function.Function( + name="__len__", + args=[ + argument.Argument( + name="self", + type="lrc::Shape", + const=True, + ref=True + ) + ], + op=""" + return self.ndim(); + """ + ), + # Subshape function.Function( name="subshape", diff --git a/librapid/bindings/python/__init__.py b/librapid/bindings/python/__init__.py deleted file mode 100644 index fb026308..00000000 --- a/librapid/bindings/python/__init__.py +++ /dev/null @@ -1 +0,0 @@ -print("Hello from _librapid") \ No newline at end of file diff --git a/librapid/include/librapid/array/arrayContainer.hpp b/librapid/include/librapid/array/arrayContainer.hpp index 5ee80f21..bf2fbe5a 100644 --- a/librapid/include/librapid/array/arrayContainer.hpp +++ b/librapid/include/librapid/array/arrayContainer.hpp @@ -98,21 +98,19 @@ namespace librapid { /// Default constructor ArrayContainer(); - template - LIBRAPID_ALWAYS_INLINE ArrayContainer(const std::initializer_list &data); + // template + // LIBRAPID_ALWAYS_INLINE ArrayContainer(const std::initializer_list &data); - template - explicit LIBRAPID_ALWAYS_INLINE ArrayContainer(const std::vector &data); + // template + // explicit LIBRAPID_ALWAYS_INLINE ArrayContainer(const std::vector &data); // clang-format off #define SINIT(SUB_TYPE) std::initializer_list #define SVEC(SUB_TYPE) std::vector -#define ARRAY_FROM_DATA_DEF(TYPE_INIT, TYPE_VEC) \ - LIBRAPID_NODISCARD static LIBRAPID_ALWAYS_INLINE auto fromData(const TYPE_INIT &data) \ - -> ArrayContainer; \ - LIBRAPID_NODISCARD static LIBRAPID_ALWAYS_INLINE auto fromData(const TYPE_VEC &data) \ - -> ArrayContainer +#define ARRAY_FROM_DATA_DEF(TYPE_INIT, TYPE_VEC) \ + LIBRAPID_ALWAYS_INLINE ArrayContainer(const TYPE_INIT & data); \ + explicit LIBRAPID_ALWAYS_INLINE ArrayContainer(const TYPE_VEC &data) ; ARRAY_FROM_DATA_DEF(SINIT(Scalar), SVEC(Scalar)); ARRAY_FROM_DATA_DEF(SINIT(SINIT(Scalar)), SVEC(SVEC(Scalar))); @@ -182,8 +180,8 @@ namespace librapid { /// \tparam Args The argument types of the function /// \param function The function to assign template - LIBRAPID_ALWAYS_INLINE ArrayContainer( - const detail::Function &function) LIBRAPID_RELEASE_NOEXCEPT; + LIBRAPID_ALWAYS_INLINE + ArrayContainer(const detail::Function &function); /// \brief Reference an existing array container /// @@ -195,6 +193,10 @@ namespace librapid { /// \param other The array container to reference LIBRAPID_ALWAYS_INLINE ArrayContainer &operator=(const ArrayContainer &other) = default; + template + LIBRAPID_ALWAYS_INLINE ArrayContainer & + operator=(const array::GeneralArrayView &view); + LIBRAPID_ALWAYS_INLINE ArrayContainer &operator=(const Scalar &value); /// Assign a temporary array container to this array container. @@ -341,9 +343,9 @@ namespace librapid { /// \return Iterator LIBRAPID_ALWAYS_INLINE auto end(); - template + template void str(const fmt::formatter &format, char bracket, char separator, - Ctx &ctx) const; + const char (&formatString)[N], Ctx &ctx) const; private: ShapeType m_shape; // The shape type of the array @@ -355,20 +357,6 @@ namespace librapid { LIBRAPID_ALWAYS_INLINE ArrayContainer::ArrayContainer() : m_shape(StorageType_::template defaultShape()), m_size(0) {} - template - template - LIBRAPID_ALWAYS_INLINE ArrayContainer::ArrayContainer( - const std::initializer_list &data) : - m_shape({data.size()}), - m_size(data.size()), m_storage(StorageType::fromData(data)) {} - - template - template - LIBRAPID_ALWAYS_INLINE - ArrayContainer::ArrayContainer(const std::vector &data) : - m_shape({data.size()}), - m_size(data.size()), m_storage(StorageType::fromData(data)) {} - template LIBRAPID_ALWAYS_INLINE ArrayContainer::ArrayContainer(const Shape &shape) : @@ -515,10 +503,9 @@ namespace librapid { template template LIBRAPID_ALWAYS_INLINE ArrayContainer::ArrayContainer( - const detail::Function &function) LIBRAPID_RELEASE_NOEXCEPT - : m_shape(function.shape()), - m_size(function.size()), - m_storage(m_shape.size()) { + const detail::Function &function) : + m_shape(function.shape()), + m_size(function.size()), m_storage(m_shape.size()) { assign(function); } @@ -553,11 +540,23 @@ namespace librapid { return *this; } + template + template + LIBRAPID_ALWAYS_INLINE auto ArrayContainer::operator=( + const array::GeneralArrayView &view) -> ArrayContainer & { + m_shape = view.shape(); + m_size = view.size(); + m_storage.resize(m_shape.size(), 0); + for (int64_t i = 0; i < m_size; ++i) { m_storage[i] = view.scalar(i); } + return *this; + } + template LIBRAPID_ALWAYS_INLINE auto ArrayContainer::operator=(const Scalar &value) -> ArrayContainer & { - LIBRAPID_ASSERT(m_shape.ndim() == 0, "Cannot assign a scalar to an array"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, m_shape.ndim() == 0, "Cannot assign a scalar to an array with {} dimensions", m_shape.ndim()); m_storage[0] = value; return *this; } @@ -581,7 +580,8 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto ArrayContainer::operator[](int64_t index) const { - LIBRAPID_ASSERT( + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, index >= 0 && index < static_cast(m_shape[0]), "Index {} out of bounds in ArrayContainer::operator[] with leading dimension={}", index, @@ -628,7 +628,8 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto ArrayContainer::operator[](int64_t index) { - LIBRAPID_ASSERT( + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, index >= 0 && index < static_cast(m_shape[0]), "Index {} out of bounds in ArrayContainer::operator[] with leading dimension={}", index, @@ -677,7 +678,8 @@ namespace librapid { LIBRAPID_ALWAYS_INLINE auto ArrayContainer::operator()(Indices... indices) const -> DirectSubscriptType { - LIBRAPID_ASSERT( + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, m_shape.ndim() == sizeof...(Indices), "ArrayContainer::operator() called with {} indices, but array has {} dimensions", sizeof...(Indices), @@ -686,7 +688,8 @@ namespace librapid { int dim = 0; int64_t index = 0; for (int64_t i : {indices...}) { - LIBRAPID_ASSERT( + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, i >= 0 && i < static_cast(m_shape[dim]), "Index {} out of bounds in ArrayContainer::operator() with dimension={}", i, @@ -701,7 +704,8 @@ namespace librapid { LIBRAPID_ALWAYS_INLINE auto ArrayContainer::operator()(Indices... indices) -> DirectRefSubscriptType { - LIBRAPID_ASSERT( + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, m_shape.ndim() == sizeof...(Indices), "ArrayContainer::operator() called with {} indices, but array has {} dimensions", sizeof...(Indices), @@ -710,11 +714,12 @@ namespace librapid { int64_t index = 0; int64_t count = 0; for (int64_t i : {indices...}) { - LIBRAPID_ASSERT( + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, i >= 0 && i < static_cast(m_shape[count]), "Index {} out of bounds in ArrayContainer::operator() with dimension={}", i, - m_shape[index]); + m_shape[count]); index = index * m_shape[count++] + i; } return m_storage[index]; @@ -723,8 +728,9 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto ArrayContainer::get() const -> Scalar { - LIBRAPID_ASSERT(m_shape.ndim() == 0, - "Can only cast a scalar ArrayView to a salar object"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + m_shape.ndim() == 0, + "Can only cast a scalar ArrayView to a salar object. Array has {}", m_shape); return scalar(0); } @@ -764,10 +770,6 @@ namespace librapid { auto ptr = LIBRAPID_ASSUME_ALIGNED(m_storage.begin()); #if defined(LIBRAPID_NATIVE_ARCH) - LIBRAPID_ASSERT( - reinterpret_cast(ptr) % typetraits::TypeInfo::packetWidth == 0, - "ArrayContainer::packet called on unaligned storage"); - return xsimd::load_aligned(ptr + index); #else return xsimd::load_unaligned(ptr + index); @@ -786,9 +788,6 @@ namespace librapid { auto ptr = LIBRAPID_ASSUME_ALIGNED(m_storage.begin()); #if defined(LIBRAPID_NATIVE_ARCH) - LIBRAPID_ASSERT( - reinterpret_cast(ptr) % typetraits::TypeInfo::packetWidth == 0, - "ArrayContainer::packet called on unaligned storage"); value.store_aligned(ptr + index); #else value.store_unaligned(ptr + index); @@ -904,10 +903,11 @@ namespace librapid { } template - template + template LIBRAPID_ALWAYS_INLINE void ArrayContainer::str( - const fmt::formatter &format, char bracket, char separator, Ctx &ctx) const { - createGeneralArrayView(*this).str(format, bracket, separator, ctx); + const fmt::formatter &format, char bracket, char separator, + const char (&formatString)[N], Ctx &ctx) const { + createGeneralArrayView(*this).str(format, bracket, separator, formatString, ctx); } } // namespace array diff --git a/librapid/include/librapid/array/arrayFromData.hpp b/librapid/include/librapid/array/arrayFromData.hpp index 4f53cb9f..0fec65cc 100644 --- a/librapid/include/librapid/array/arrayFromData.hpp +++ b/librapid/include/librapid/array/arrayFromData.hpp @@ -1,104 +1,139 @@ #ifndef LIBRAPID_ARRAY_FROM_DATA_HPP #define LIBRAPID_ARRAY_FROM_DATA_HPP -namespace librapid { +namespace librapid::array { template - LIBRAPID_ALWAYS_INLINE auto array::ArrayContainer::fromData( - const std::initializer_list &data) -> ArrayContainer { - static_assert(!std::is_same_v, - "Cannot create a matrix from a 1D array"); - LIBRAPID_ASSERT(data.size() > 0, "Array must have at least one element"); - return ArrayContainer(data); + LIBRAPID_ALWAYS_INLINE ArrayContainer::ArrayContainer( + const std::initializer_list &data) : + m_shape({data.size()}), + m_size(data.size()), m_storage(StorageType::fromData(data)) { + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, data.size() > 0, "Array must have at least one element"); } template - LIBRAPID_ALWAYS_INLINE auto - array::ArrayContainer::fromData(const std::vector &data) - -> ArrayContainer { - static_assert(!std::is_same_v, - "Cannot create a matrix from a 1D array"); - LIBRAPID_ASSERT(data.size() > 0, "Array must have at least one element"); - return ArrayContainer(data); + LIBRAPID_ALWAYS_INLINE ArrayContainer::ArrayContainer::ArrayContainer( + const std::vector &data) : + m_shape({data.size()}), + m_size(data.size()), m_storage(StorageType::fromData(data)) { + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, data.size() > 0, "Array must have at least one element"); } template - LIBRAPID_ALWAYS_INLINE auto array::ArrayContainer::fromData( - const std::initializer_list> &data) -> ArrayContainer { - LIBRAPID_ASSERT(data.size() > 0, "Cannot create a zero-sized array"); + LIBRAPID_ALWAYS_INLINE ArrayContainer::ArrayContainer( + const std::initializer_list> &data) { + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, data.size() > 0, "Cannot create a zero-sized array"); if constexpr (std::is_same_v) { auto newShape = ShapeType({data.size(), data.begin()->size()}); auto res = ArrayContainer(newShape); for (size_t i = 0; i < data.size(); ++i) { - LIBRAPID_ASSERT(data.begin()[i].size() == newShape[1], - "Arrays must have consistent shapes"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::range_error, + data.begin()[i].size() == newShape[1], + "Arrays must have consistent shapes. {}th dimension had size {}, expected {}", + i, + data.begin()[i].size(), + newShape[1]); for (size_t j = 0; j < data.begin()[i].size(); ++j) { res(i, j) = data.begin()[i].begin()[j]; } } - return res; + + // return res; + *this = res; } else { auto newShape = ShapeType({data.size(), data.begin()->size()}); #if defined(LIBRAPID_ENABLE_ASSERT) for (size_t i = 0; i < data.size(); ++i) { - LIBRAPID_ASSERT(data.begin()[i].size() == newShape[1], - "Arrays must have consistent shapes"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::length_error, + data.begin()[i].size() == newShape[1], + "Arrays must have consistent shapes. {}th dimension had size {}, expected {}", + i, + data.begin()[i].size(), + newShape[1]); } #endif auto res = ArrayContainer(newShape); int64_t index = 0; - for (const auto &item : data) res[index++] = fromData(item); - return res; + for (const auto &item : data) res[index++] = ArrayContainer(item); + + // return res; + *this = res; } } template - LIBRAPID_ALWAYS_INLINE auto array::ArrayContainer::fromData( - const std::vector> &data) -> ArrayContainer { - LIBRAPID_ASSERT(data.size() > 0, "Cannot create a zero-sized array"); + LIBRAPID_ALWAYS_INLINE ArrayContainer::ArrayContainer( + const std::vector> &data) { + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, data.size() > 0, "Cannot create a zero-sized array"); if constexpr (std::is_same_v) { auto newShape = ShapeType({data.size(), data[0].size()}); auto res = ArrayContainer(newShape); for (size_t i = 0; i < data.size(); ++i) { - LIBRAPID_ASSERT(data[i].size() == newShape[1], - "Arrays must have consistent shapes"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::range_error, + data[i].size() == newShape[1], + "Arrays must have consistent shapes. {}th dimension had size {}, expected {}", + i, + data[i].size(), + newShape[1]); for (size_t j = 0; j < data[i].size(); ++j) { res(i, j) = data[i][j]; } } - return res; + + // return res; + *this = res; } else { auto newShape = ShapeType({data.size(), data.begin()->size()}); #if defined(LIBRAPID_ENABLE_ASSERT) for (size_t i = 0; i < data.size(); ++i) { - LIBRAPID_ASSERT(data.begin()[i].size() == newShape[1], - "Arrays must have consistent shapes"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::range_error, + data.begin()[i].size() == newShape[1], + "Arrays must have consistent shapes. {}th dimension had size {}, expected {}", + i, + data.begin()[i].size(), + newShape[1]); } #endif auto res = ArrayContainer(newShape); int64_t index = 0; - for (const auto &item : data) res[index++] = fromData(item); - return res; + for (const auto &item : data) res[index++] = ArrayContainer(item); + + // return res; + *this = res; } } #define HIGHER_DIMENSIONAL_FROM_DATA(TYPE) \ template \ - LIBRAPID_ALWAYS_INLINE auto array::ArrayContainer::fromData( \ - const TYPE &data) -> ArrayContainer { \ - LIBRAPID_ASSERT(data.size() > 0, "Cannot create a zero-sized array"); \ + LIBRAPID_ALWAYS_INLINE ArrayContainer::ArrayContainer( \ + const TYPE &data) { \ + LIBRAPID_ASSERT_WITH_EXCEPTION( \ + std::invalid_argument, data.size() > 0, "Cannot create a zero-sized array"); \ std::vector tmp(data.size()); \ int64_t index = 0; \ - for (const auto &item : data) tmp[index++] = std::move(fromData(item)); \ + for (const auto &item : data) tmp[index++] = std::move(ArrayContainer(item)); \ auto zeroShape = tmp[0].shape(); \ for (int64_t i = 0; i < data.size(); ++i) \ - LIBRAPID_ASSERT(tmp[i].shape().operator==(zeroShape), \ - "Arrays must have consistent shapes"); \ + LIBRAPID_ASSERT_WITH_EXCEPTION( \ + std::range_error, \ + tmp[i].shape().operator==(zeroShape), \ + "Arrays must have consistent shapes. {}th dimension had {}. Expected {}", \ + i, \ + tmp[i].shape(), \ + zeroShape); \ auto newShape = ShapeType::zeros(zeroShape.ndim() + 1); \ newShape[0] = data.size(); \ for (size_t i = 0; i < zeroShape.ndim(); ++i) { newShape[i + 1] = zeroShape[i]; } \ auto res = Array(newShape); \ for (int64_t i = 0; i < data.size(); ++i) res[i] = tmp[i]; \ - return res; \ + *this = res; \ } #define SINIT(SUB_TYPE) std::initializer_list @@ -120,6 +155,6 @@ namespace librapid { #undef SINIT #undef HIGHER_DIMENSIONAL_FROM_DATA -} // namespace librapid +} // namespace librapid::array #endif // LIBRAPID_ARRAY_FROM_DATA_HPP \ No newline at end of file diff --git a/librapid/include/librapid/array/arrayTypeDef.hpp b/librapid/include/librapid/array/arrayTypeDef.hpp index 30e830ee..9867df81 100644 --- a/librapid/include/librapid/array/arrayTypeDef.hpp +++ b/librapid/include/librapid/array/arrayTypeDef.hpp @@ -106,6 +106,7 @@ namespace librapid { Formatter m_formatter; \ char m_bracket = 's'; \ char m_separator = ' '; \ + char m_formatString[16] {}; \ \ template \ FMT_CONSTEXPR auto parse(ParseContext &ctx) -> const char * { \ @@ -121,29 +122,17 @@ namespace librapid { /* - "-|" for pipe separator */ \ /* - "-_" for underscore separator */ \ \ - auto it = ctx.begin(), end = ctx.end(); \ - if (it != end && *it == '~') { \ - ++it; \ - if (it != end && \ - (*it == 'r' || *it == 's' || *it == 'c' || *it == 'a' || *it == 'p')) { \ - m_bracket = *it++; \ - } \ - } \ - \ - if (it != end && *it == '-') { \ - ++it; \ - if (it != end) { m_separator = *it++; } \ - } \ - \ - ctx.advance_to(it); \ - \ - return m_formatter.parse(ctx); \ + auto it = ctx.begin(); \ + const char *ret = m_formatter.parse(ctx); \ + int index = 0; \ + for (auto itt = it; itt != ret; itt++) { m_formatString[index++] = *itt; } \ + return ret; \ } \ \ template \ FMT_CONSTEXPR auto format(const Type &val, FormatContext &ctx) const \ -> decltype(ctx.out()) { \ - val.str(m_formatter, m_bracket, m_separator, ctx); \ + val.str(m_formatter, m_bracket, m_separator, m_formatString, ctx); \ return ctx.out(); \ } \ }; \ diff --git a/librapid/include/librapid/array/assignOps.hpp b/librapid/include/librapid/array/assignOps.hpp index ba95ae6c..1b61d76f 100644 --- a/librapid/include/librapid/array/assignOps.hpp +++ b/librapid/include/librapid/array/assignOps.hpp @@ -43,7 +43,11 @@ namespace librapid { // static_assert( // typetraits::IsSame::Scalar>, // "Function return type must be the same as the array container's scalar type"); - LIBRAPID_ASSERT(lhs.shape() == function.shape(), "Shapes must be equal"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape() == function.shape(), + "Shapes must be equal. Expected {}, received {}", + lhs.shape(), + function.shape()); if constexpr (allowVectorisation) { for (int64_t index = 0; index < vectorSize; index += packetWidth) { @@ -100,7 +104,11 @@ namespace librapid { static_assert( typetraits::IsSame::Scalar>, "Function return type must be the same as the array container's scalar type"); - LIBRAPID_ASSERT(lhs.shape() == function.shape(), "Shapes must be equal"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape() == function.shape(), + "Shapes must be equal. Expected {}, received {}", + lhs.shape(), + function.shape()); if constexpr (allowVectorisation) { for (int64_t index = 0; index < vectorSize; index += packetWidth) { @@ -160,7 +168,11 @@ namespace librapid { // static_assert( // typetraits::IsSame::Scalar>, // "Function return type must be the same as the array container's scalar type"); - LIBRAPID_ASSERT(lhs.shape() == function.shape(), "Shapes must be equal"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape() == function.shape(), + "Shapes must be equal. Expected {}, received {}", + lhs.shape(), + function.shape()); if constexpr (allowVectorisation) { #pragma omp parallel for shared(vectorSize, lhs, function) default(none) \ @@ -220,7 +232,11 @@ namespace librapid { static_assert( typetraits::IsSame::Scalar>, "Function return type must be the same as the array container's scalar type"); - LIBRAPID_ASSERT(lhs.shape() == function.shape(), "Shapes must be equal"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape() == function.shape(), + "Shapes must be equal. Expected {}, received {}", + lhs.shape(), + function.shape()); if constexpr (allowVectorisation) { #pragma omp parallel for shared(vectorSize, lhs, function) default(none) \ @@ -266,7 +282,6 @@ namespace librapid { */ #if defined(LIBRAPID_HAS_OPENCL) - namespace opencl { template::type != ::librapid::detail::LibRapidType::Scalar, @@ -347,7 +362,6 @@ namespace librapid { #endif // LIBRAPID_HAS_OPENCL #if defined(LIBRAPID_HAS_CUDA) - namespace cuda { template::type != ::librapid::detail::LibRapidType::Scalar, diff --git a/librapid/include/librapid/array/function.hpp b/librapid/include/librapid/array/function.hpp index 8d92ab87..2a07ac4d 100644 --- a/librapid/include/librapid/array/function.hpp +++ b/librapid/include/librapid/array/function.hpp @@ -227,8 +227,8 @@ namespace librapid { LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Iterator begin() const; LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Iterator end() const; - template - void str(const fmt::formatter &format, char bracket, char separator, + template + void str(const fmt::formatter &format, char bracket, char separator, const char (&formatString)[N], Ctx &ctx) const; private: @@ -336,11 +336,11 @@ namespace librapid { } template - template + template LIBRAPID_ALWAYS_INLINE void Function::str(const fmt::formatter &format, char bracket, - char separator, Ctx &ctx) const { - createGeneralArrayView(*this).str(format, bracket, separator, ctx); + char separator, const char (&formatString)[N], Ctx &ctx) const { + createGeneralArrayView(*this).str(format, bracket, separator, formatString, ctx); } } // namespace detail } // namespace librapid diff --git a/librapid/include/librapid/array/generalArrayView.hpp b/librapid/include/librapid/array/generalArrayView.hpp index 32f4a56a..0f188445 100644 --- a/librapid/include/librapid/array/generalArrayView.hpp +++ b/librapid/include/librapid/array/generalArrayView.hpp @@ -41,8 +41,8 @@ namespace librapid { using ShapeType = ArrayViewShapeType; using StrideType = Stride; using StorageType = typename typetraits::TypeInfo::StorageType; - using ArrayType = array::ArrayContainer; - using Iterator = detail::ArrayIterator; + using ArrayType = array::ArrayContainer; + using Iterator = detail::ArrayIterator; /// Default constructor should never be used GeneralArrayView() = delete; @@ -57,17 +57,16 @@ namespace librapid { /// Copy an ArrayView object (const) /// \param other The array to copy - LIBRAPID_ALWAYS_INLINE GeneralArrayView(const GeneralArrayView &other) = default; + LIBRAPID_ALWAYS_INLINE GeneralArrayView(const GeneralArrayView &other); /// Constructs an ArrayView from a temporary instance /// \param other The ArrayView to move - LIBRAPID_ALWAYS_INLINE GeneralArrayView(GeneralArrayView &&other) = default; + LIBRAPID_ALWAYS_INLINE GeneralArrayView(GeneralArrayView &&other); /// Assigns another ArrayView object to this ArrayView. /// \param other The ArrayView to assign. /// \return A reference to this - LIBRAPID_ALWAYS_INLINE GeneralArrayView & - operator=(const GeneralArrayView &other) = default; + LIBRAPID_ALWAYS_INLINE GeneralArrayView &operator=(const GeneralArrayView &other); /// Assigns a temporary ArrayView to this ArrayView. /// \param other The ArrayView to move. @@ -85,6 +84,20 @@ namespace librapid { LIBRAPID_ALWAYS_INLINE GeneralArrayView & operator=(const ArrayContainer &other); + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView & + operator=(const detail::Function &function); + + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView & + operator=(const array::Transpose &transpose); + + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView & + operator=(const linalg::ArrayMultiply &matmul); + /// Access a sub-array of this ArrayView. /// \param index The index of the sub-array. /// \return An ArrayView from this @@ -106,6 +119,8 @@ namespace librapid { template LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE explicit operator CAST() const; + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE int64_t size() const; + /// Access the underlying shape of this ArrayView /// \return Shape object LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE ShapeType shape() const; @@ -141,6 +156,18 @@ namespace librapid { /// \return Scalar at the given index LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto scalar(int64_t index) const; + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView &operator+=(const T &other); + + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView &operator-=(const T &other); + + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView &operator*=(const T &other); + + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView &operator/=(const T &other); + /// Evaluate the contents of this ArrayView object and return an Array instance from /// it. Depending on your use case, this may result in more performant code, but the new /// Array will not reference the original data in the ArrayView. @@ -150,9 +177,9 @@ namespace librapid { LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Iterator begin() const; LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Iterator end() const; - template + template void str(const fmt::formatter &format, char bracket, char separator, - Ctx &ctx) const; + const char (&formatString)[N], Ctx &ctx) const; private: ArrayViewType m_ref; @@ -161,12 +188,6 @@ namespace librapid { int64_t m_offset = 0; }; - // template - // LIBRAPID_ALWAYS_INLINE - // GeneralArrayView::GeneralArrayView(ArrayViewType &array) : - // m_ref(array), - // m_shape(array.shape()), m_stride(array.shape()) {} - template LIBRAPID_ALWAYS_INLINE GeneralArrayView::GeneralArrayView( @@ -174,21 +195,74 @@ namespace librapid { m_ref(array), m_shape(array.shape()), m_stride(array.shape()) {} + template + LIBRAPID_ALWAYS_INLINE + GeneralArrayView::GeneralArrayView( + const GeneralArrayView &other) : + m_ref(other.m_ref), + m_shape(other.m_shape), m_stride(other.m_stride) {} + + template + LIBRAPID_ALWAYS_INLINE + GeneralArrayView::GeneralArrayView( + GeneralArrayView &&other) : + m_ref(other.m_ref), + m_shape(other.m_shape), m_stride(other.m_stride), m_offset(other.m_offset) {} + template LIBRAPID_ALWAYS_INLINE GeneralArrayView & GeneralArrayView::operator=(const Scalar &scalar) { - LIBRAPID_ASSERT(m_shape.ndim() == 0, "Cannot assign to a non-scalar ArrayView."); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + m_shape.ndim() == 0, + "Cannot assign to a non-scalar ArrayView with {}", + m_shape); m_ref.storage()[m_offset] = static_cast(scalar); return *this; } + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView & + GeneralArrayView::operator=( + const GeneralArrayView &other) { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + m_shape.operator==(other.shape()), + "GeneralArrayView assignment shape mismatch. {} vs {}", + m_shape, + other.shape()); + + ShapeType coord = ShapeType::zeros(m_shape.ndim()); + int64_t d = 0, p = 0; + int64_t idim = 0, adim = 0; + const int64_t ndim = m_shape.ndim(); + + do { + m_ref.storage()[p + m_offset] = other.scalar(d++); + + for (idim = 0; idim < ndim; ++idim) { + adim = ndim - idim - 1; + if (++coord[adim] == m_shape[adim]) { + coord[adim] = 0; + p = p - (m_shape[adim] - 1) * m_stride[adim]; + } else { + p = p + m_stride[adim]; + break; + } + } + } while (idim < ndim); + + return *this; + } + template template LIBRAPID_ALWAYS_INLINE GeneralArrayView & GeneralArrayView::operator=( const ArrayContainer &other) { - LIBRAPID_ASSERT(m_shape.operator==(other.shape()), - "Cannot assign to a non-scalar ArrayView."); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + m_shape.operator==(other.shape()), + "GeneralArrayView assignment shape mismatch. {} vs {}", + m_shape, + other.shape()); ShapeType coord = ShapeType::zeros(m_shape.ndim()); int64_t d = 0, p = 0; @@ -213,10 +287,112 @@ namespace librapid { return *this; } + template + template + LIBRAPID_ALWAYS_INLINE auto GeneralArrayView::operator=( + const detail::Function &function) -> GeneralArrayView & { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + m_shape.operator==(function.shape()), + "GeneralArrayView assignment shape mismatch. {} vs {}", + m_shape, + function.shape()); + + ShapeType coord = ShapeType::zeros(m_shape.ndim()); + int64_t d = 0, p = 0; + int64_t idim = 0, adim = 0; + const int64_t ndim = m_shape.ndim(); + + do { + m_ref.storage()[p + m_offset] = function.scalar(d++); + + for (idim = 0; idim < ndim; ++idim) { + adim = ndim - idim - 1; + if (++coord[adim] == m_shape[adim]) { + coord[adim] = 0; + p = p - (m_shape[adim] - 1) * m_stride[adim]; + } else { + p = p + m_stride[adim]; + break; + } + } + } while (idim < ndim); + + return *this; + } + + template + template + LIBRAPID_ALWAYS_INLINE auto GeneralArrayView::operator=( + const array::Transpose &transpose) -> GeneralArrayView & { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + m_shape.operator==(transpose.shape()), + "GeneralArrayView assignment shape mismatch. {} vs {}", + m_shape, + transpose.shape()); + + ShapeType coord = ShapeType::zeros(m_shape.ndim()); + int64_t d = 0, p = 0; + int64_t idim = 0, adim = 0; + const int64_t ndim = m_shape.ndim(); + + do { + m_ref.storage()[p + m_offset] = transpose.scalar(d++); + + for (idim = 0; idim < ndim; ++idim) { + adim = ndim - idim - 1; + if (++coord[adim] == m_shape[adim]) { + coord[adim] = 0; + p = p - (m_shape[adim] - 1) * m_stride[adim]; + } else { + p = p + m_stride[adim]; + break; + } + } + } while (idim < ndim); + + return *this; + } + + template + template + LIBRAPID_ALWAYS_INLINE auto GeneralArrayView::operator=( + const linalg::ArrayMultiply &matmul) -> GeneralArrayView & { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + m_shape.operator==(matmul.shape()), + "GeneralArrayView assignment shape mismatch. {} vs {}", + m_shape, + matmul.shape()); + + ShapeType coord = ShapeType::zeros(m_shape.ndim()); + int64_t d = 0, p = 0; + int64_t idim = 0, adim = 0; + const int64_t ndim = m_shape.ndim(); + + do { + m_ref.storage()[p + m_offset] = matmul.scalar(d++); + + for (idim = 0; idim < ndim; ++idim) { + adim = ndim - idim - 1; + if (++coord[adim] == m_shape[adim]) { + coord[adim] = 0; + p = p - (m_shape[adim] - 1) * m_stride[adim]; + } else { + p = p + m_stride[adim]; + break; + } + } + } while (idim < ndim); + + return *this; + } + template LIBRAPID_ALWAYS_INLINE const auto GeneralArrayView::operator[](int64_t index) const { - LIBRAPID_ASSERT( + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, index >= 0 && index < static_cast(m_shape[0]), "Index {} out of bounds in ArrayContainer::operator[] with leading dimension={}", index, @@ -235,7 +411,8 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto GeneralArrayView::operator[](int64_t index) { - LIBRAPID_ASSERT( + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, index >= 0 && index < static_cast(m_shape[0]), "Index {} out of bounds in ArrayContainer::operator[] with leading dimension={}", index, @@ -255,8 +432,11 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE CAST GeneralArrayView::get() const { - LIBRAPID_ASSERT(m_shape.ndim() == 0, - "Can only cast a scalar ArrayView to a salar object"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, + m_shape.ndim() == 0, + "Can only cast a scalar ArrayView to a salar object. ArrayView had {}", + m_shape); return scalar(0); } @@ -267,6 +447,12 @@ namespace librapid { return get(); } + template + LIBRAPID_ALWAYS_INLINE int64_t + GeneralArrayView::size() const { + return m_shape.size(); + } + template LIBRAPID_ALWAYS_INLINE auto GeneralArrayView::shape() const -> ShapeType { @@ -325,6 +511,38 @@ namespace librapid { return m_ref.scalar(m_offset + offset); } + template + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView & + GeneralArrayView::operator+=(const T &other) { + *this = *this + other; + return *this; + } + + template + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView & + GeneralArrayView::operator-=(const T &other) { + *this = *this - other; + return *this; + } + + template + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView & + GeneralArrayView::operator*=(const T &other) { + *this = *this * other; + return *this; + } + + template + template + LIBRAPID_ALWAYS_INLINE GeneralArrayView & + GeneralArrayView::operator/=(const T &other) { + *this = *this / other; + return *this; + } + template LIBRAPID_ALWAYS_INLINE auto GeneralArrayView::eval() const -> ArrayType { diff --git a/librapid/include/librapid/array/generalArrayViewToString.hpp b/librapid/include/librapid/array/generalArrayViewToString.hpp index 844c9b27..0cbde6c8 100644 --- a/librapid/include/librapid/array/generalArrayViewToString.hpp +++ b/librapid/include/librapid/array/generalArrayViewToString.hpp @@ -3,12 +3,55 @@ namespace librapid { namespace detail { + template + std::pair alignment(const char (&formatString)[N], const Val &value) { + std::string tmpFormat = fmt::format("{{:{}}}", formatString); + std::string formatted = fmt::vformat(tmpFormat, fmt::make_format_args(value)); + + if constexpr (std::is_integral_v>) { + return std::make_pair(formatted.length(), 0); + } else if constexpr (std::is_floating_point_v>) { + auto point = formatted.find('.'); + if (point == std::string::npos) { + return std::make_pair(formatted.length(), 0); + } else { + return std::make_pair(point, formatted.length() - point); + } + } + + return std::make_pair(0, 0); + } + + template + void generalArrayViewToStringColWidthFinder( + const array::GeneralArrayView &view, + const char (&formatString)[N], std::vector> &alignments) { + if (view.ndim() == 1) { + for (int64_t i = 0; i < static_cast(view.shape()[0]); i++) { + auto alignmentPair = alignment(formatString, view.scalar(i)); + if (i >= static_cast(alignments.size())) { + alignments.push_back(alignmentPair); + } else { + alignments[i].first = + ::librapid::max(alignments[i].first, alignmentPair.first); + alignments[i].second = + ::librapid::max(alignments[i].second, alignmentPair.second); + } + } + } else if (view.ndim() > 1) { + for (int64_t i = 0; i < static_cast(view.shape()[0]); i++) { + generalArrayViewToStringColWidthFinder(view[i], formatString, alignments); + } + } + } + template - void generalArrayViewToString( + size_t N, typename Ctx> + void generalArrayViewToStringImpl( const array::GeneralArrayView &view, - const fmt::formatter &formatter, char bracket, char separator, int64_t indent, - Ctx &ctx) { + const fmt::formatter &formatter, char bracket, char separator, + const char (&formatString)[N], int64_t indent, Ctx &ctx, + const std::vector> &alignments) { char bracketCharOpen, bracketCharClose; switch (bracket) { @@ -45,7 +88,15 @@ namespace librapid { } else if (view.ndim() == 1) { fmt::format_to(ctx.out(), "{}", bracketCharOpen); for (int64_t i = 0; i < static_cast(view.shape()[0]); i++) { + auto columnAlignment = alignments[i]; + auto valueSize = alignment(formatString, view.scalar(i)); + int64_t pre = max(columnAlignment.first - valueSize.first, 0), + post = max(columnAlignment.second - valueSize.second, 0); + + fmt::format_to(ctx.out(), "{}", std::string(pre, ' ')); formatter.format(view.scalar(i), ctx); + fmt::format_to(ctx.out(), "{}", std::string(post, ' ')); + if (i != view.shape()[0] - 1) { if (separator == ' ') { fmt::format_to(ctx.out(), " "); @@ -59,8 +110,14 @@ namespace librapid { fmt::format_to(ctx.out(), "{}", bracketCharOpen); for (int64_t i = 0; i < static_cast(view.shape()[0]); i++) { if (i > 0) fmt::format_to(ctx.out(), "{}", std::string(indent + 1, ' ')); - generalArrayViewToString( - view[i], formatter, bracket, separator, indent + 1, ctx); + generalArrayViewToStringImpl(view[i], + formatter, + bracket, + separator, + formatString, + indent + 1, + ctx, + alignments); if (i != view.shape()[0] - 1) { if (separator == ' ') { fmt::format_to(ctx.out(), "\n"); @@ -73,14 +130,28 @@ namespace librapid { fmt::format_to(ctx.out(), "{}", bracketCharClose); } } + + template + void generalArrayViewToString( + const array::GeneralArrayView &view, + const fmt::formatter &formatter, char bracket, char separator, + const char (&formatString)[N], int64_t indent, Ctx &ctx) { + std::vector> alignments; + generalArrayViewToStringColWidthFinder(view, formatString, alignments); + generalArrayViewToStringImpl( + view, formatter, bracket, separator, formatString, indent, ctx, alignments); + } } // namespace detail namespace array { template - template + template void GeneralArrayView::str( - const fmt::formatter &format, char bracket, char separator, Ctx &ctx) const { - detail::generalArrayViewToString(*this, format, bracket, separator, 0, ctx); + const fmt::formatter &format, char bracket, char separator, + const char (&formatString)[N], Ctx &ctx) const { + detail::generalArrayViewToString( + *this, format, bracket, separator, formatString, 0, ctx); } } // namespace array } // namespace librapid diff --git a/librapid/include/librapid/array/linalg/arrayMultiply.hpp b/librapid/include/librapid/array/linalg/arrayMultiply.hpp index 6090cc04..d286ec5b 100644 --- a/librapid/include/librapid/array/linalg/arrayMultiply.hpp +++ b/librapid/include/librapid/array/linalg/arrayMultiply.hpp @@ -166,9 +166,9 @@ namespace librapid { template void applyTo(array::ArrayContainer &out) const; - template + template void str(const fmt::formatter &format, char bracket, char separator, - Ctx &ctx) const; + const char (&formatString)[N], Ctx &ctx) const; private: bool m_transA; // Transpose state of A @@ -475,10 +475,11 @@ namespace librapid { template - template + template void ArrayMultiply::str( - const fmt::formatter &format, char bracket, char separator, Ctx &ctx) const { - eval().str(format, bracket, separator, ctx); + const fmt::formatter &format, char bracket, char separator, + const char (&formatString)[N], Ctx &ctx) const { + eval().str(format, bracket, separator, formatString, ctx); } } // namespace linalg diff --git a/librapid/include/librapid/array/linalg/transpose.hpp b/librapid/include/librapid/array/linalg/transpose.hpp index 3a0bf66d..179bea70 100644 --- a/librapid/include/librapid/array/linalg/transpose.hpp +++ b/librapid/include/librapid/array/linalg/transpose.hpp @@ -515,9 +515,9 @@ namespace librapid { /// \return Evaluated expression LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto eval() const; - template + template LIBRAPID_ALWAYS_INLINE void str(const fmt::formatter &format, char bracket, - char separator, Ctx &ctx) const; + char separator, const char (&formatString)[N], Ctx &ctx) const; private: ArrayType m_array; @@ -652,10 +652,10 @@ namespace librapid { }; template - template + template void Transpose::str(const fmt::formatter &format, char bracket, - char separator, Ctx &ctx) const { - eval().str(format, bracket, separator, ctx); + char separator, const char (&formatString)[N], Ctx &ctx) const { + eval().str(format, bracket, separator, formatString, ctx); } }; // namespace array diff --git a/librapid/include/librapid/array/operations.hpp b/librapid/include/librapid/array/operations.hpp index d17c2fc9..894f5c9c 100644 --- a/librapid/include/librapid/array/operations.hpp +++ b/librapid/include/librapid/array/operations.hpp @@ -91,8 +91,12 @@ const std::tuple &tup) { \ if constexpr (IsArrayType>::value) { \ if constexpr (IsArrayType>::value) { \ - LIBRAPID_ASSERT(std::get<0>(tup).shape() == std::get<1>(tup).shape(), \ - "Shapes must match for binary operations"); \ + LIBRAPID_ASSERT_WITH_EXCEPTION( \ + std::range_error, \ + std::get<0>(tup).shape() == std::get<1>(tup).shape(), \ + "Shapes must match for binary operations. {} vs {}", \ + std::get<0>(tup).shape(), \ + std::get<1>(tup).shape()); \ return std::get<0>(tup).shape(); \ } \ return std::get<0>(tup).shape(); \ @@ -626,18 +630,20 @@ namespace librapid { /// \param rhs The second array /// \return The element-wise sum of the two arrays template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator+(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Plus, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator+(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::Plus, LHS, RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::Plus>( std::forward(lhs), std::forward(rhs)); } template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator+(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Plus, LHS, RHS> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator+(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::Plus, LHS, RHS> { return detail::makeFunction, detail::Plus>( std::forward(lhs), std::forward(rhs)); } @@ -653,18 +659,20 @@ namespace librapid { /// \param rhs The second array /// \return The element-wise difference of the two arrays template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator-(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Minus, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::Minus, LHS, RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::Minus>( std::forward(lhs), std::forward(rhs)); } template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator-(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Minus, LHS, RHS> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::Minus, LHS, RHS> { return detail::makeFunction, detail::Minus>( std::forward(lhs), std::forward(rhs)); } @@ -680,18 +688,20 @@ namespace librapid { /// \param rhs The second array /// \return The element-wise product of the two arrays template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator*(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Multiply, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator*(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::Multiply, LHS, RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::Multiply>( std::forward(lhs), std::forward(rhs)); } template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator*(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Multiply, LHS, RHS> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator*(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::Multiply, LHS, RHS> { return detail::makeFunction, detail::Multiply>( std::forward(lhs), std::forward(rhs)); } @@ -707,18 +717,20 @@ namespace librapid { /// \param rhs The second array /// \return The element-wise division of the two arrays template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator/(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Divide, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator/(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::Divide, LHS, RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::Divide>( std::forward(lhs), std::forward(rhs)); } template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator/(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Divide, LHS, RHS> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator/(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::Divide, LHS, RHS> { return detail::makeFunction, detail::Divide>( std::forward(lhs), std::forward(rhs)); } @@ -736,18 +748,20 @@ namespace librapid { /// \param rhs The second array /// \return The element-wise comparison of the two arrays template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator<(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::LessThan, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator<(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::LessThan, LHS, RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::LessThan>( std::forward(lhs), std::forward(rhs)); } template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator<(LHS &&lhs, RHS &&rhs) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::LessThan, LHS, RHS> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator<(LHS &&lhs, RHS &&rhs) + -> detail::Function, detail::LessThan, LHS, RHS> { return detail::makeFunction, detail::LessThan>( std::forward(lhs), std::forward(rhs)); } @@ -766,9 +780,13 @@ namespace librapid { /// \return The element-wise comparison of the two arrays template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator>(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::GreaterThan, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + -> detail::Function, detail::GreaterThan, LHS, + RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::GreaterThan>(std::forward(lhs), std::forward(rhs)); @@ -776,8 +794,8 @@ namespace librapid { template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator>(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::GreaterThan, LHS, RHS> { + -> detail::Function, detail::GreaterThan, LHS, + RHS> { return detail::makeFunction, detail::GreaterThan>(std::forward(lhs), std::forward(rhs)); @@ -797,9 +815,13 @@ namespace librapid { /// \return The element-wise comparison of the two arrays template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator<=(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::LessThanEqual, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + -> detail::Function, detail::LessThanEqual, LHS, + RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::LessThanEqual>(std::forward(lhs), std::forward(rhs)); @@ -807,8 +829,8 @@ namespace librapid { template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator<=(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::LessThanEqual, LHS, RHS> { + -> detail::Function, detail::LessThanEqual, LHS, + RHS> { return detail::makeFunction, detail::LessThanEqual>(std::forward(lhs), std::forward(rhs)); @@ -828,9 +850,13 @@ namespace librapid { /// \return The element-wise comparison of the two arrays template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator>=(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::GreaterThanEqual, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + -> detail::Function, detail::GreaterThanEqual, LHS, + RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::GreaterThanEqual>(std::forward(lhs), std::forward(rhs)); @@ -838,8 +864,8 @@ namespace librapid { template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator>=(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::GreaterThanEqual, LHS, RHS> { + -> detail::Function, detail::GreaterThanEqual, LHS, + RHS> { return detail::makeFunction, detail::GreaterThanEqual>(std::forward(lhs), std::forward(rhs)); @@ -859,9 +885,13 @@ namespace librapid { /// \return The element-wise comparison of the two arrays template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator==(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::ElementWiseEqual, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + -> detail::Function, detail::ElementWiseEqual, LHS, + RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::ElementWiseEqual>(std::forward(lhs), std::forward(rhs)); @@ -869,8 +899,8 @@ namespace librapid { template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator==(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::ElementWiseEqual, LHS, RHS> { + -> detail::Function, detail::ElementWiseEqual, LHS, + RHS> { return detail::makeFunction, detail::ElementWiseEqual>(std::forward(lhs), std::forward(rhs)); @@ -890,9 +920,13 @@ namespace librapid { /// \return The element-wise comparison of the two arrays template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator!=(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::ElementWiseNotEqual, LHS, RHS> { - LIBRAPID_ASSERT(lhs.shape().operator==(rhs.shape()), "Shapes must be equal"); + -> detail::Function, detail::ElementWiseNotEqual, + LHS, RHS> { + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + lhs.shape().operator==(rhs.shape()), + "Shapes must be equal. {} vs {}", + lhs.shape(), + rhs.shape()); return detail::makeFunction, detail::ElementWiseNotEqual>(std::forward(lhs), std::forward(rhs)); @@ -900,8 +934,8 @@ namespace librapid { template = 0> LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator!=(LHS &&lhs, RHS &&rhs) - LIBRAPID_RELEASE_NOEXCEPT->detail::Function, - detail::ElementWiseNotEqual, LHS, RHS> { + -> detail::Function, detail::ElementWiseNotEqual, + LHS, RHS> { return detail::makeFunction, detail::ElementWiseNotEqual>(std::forward(lhs), std::forward(rhs)); @@ -912,9 +946,8 @@ namespace librapid { /// \param val The input array or function /// \return Negation function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto - operator-(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Neg, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(VAL &&val) + -> detail::Function, detail::Neg, VAL> { return detail::makeFunction, detail::Neg>( std::forward(val)); } @@ -928,8 +961,8 @@ namespace librapid { /// \param val The input array or function /// \return Sine function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sin(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Sin, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sin(VAL &&val) + -> detail::Function, detail::Sin, VAL> { return detail::makeFunction, detail::Sin>( std::forward(val)); } @@ -942,8 +975,8 @@ namespace librapid { /// \param val The input array or function /// \return Cosine function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto cos(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Cos, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto cos(VAL &&val) + -> detail::Function, detail::Cos, VAL> { return detail::makeFunction, detail::Cos>( std::forward(val)); } @@ -956,8 +989,8 @@ namespace librapid { /// \param val The input array or function /// \return Tangent function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto tan(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Tan, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto tan(VAL &&val) + -> detail::Function, detail::Tan, VAL> { return detail::makeFunction, detail::Tan>( std::forward(val)); } @@ -970,8 +1003,8 @@ namespace librapid { /// \param val The input array or function /// \return Arcsine function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto asin(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Asin, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto asin(VAL &&val) + -> detail::Function, detail::Asin, VAL> { return detail::makeFunction, detail::Asin>( std::forward(val)); } @@ -984,8 +1017,8 @@ namespace librapid { /// \param val The input array or function /// \return Arccosine function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto acos(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Acos, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto acos(VAL &&val) + -> detail::Function, detail::Acos, VAL> { return detail::makeFunction, detail::Acos>( std::forward(val)); } @@ -998,8 +1031,8 @@ namespace librapid { /// \param val The input array or function /// \return Arctangent function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto atan(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Atan, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto atan(VAL &&val) + -> detail::Function, detail::Atan, VAL> { return detail::makeFunction, detail::Atan>( std::forward(val)); } @@ -1012,8 +1045,8 @@ namespace librapid { /// \param val The input array or function /// \return Hyperbolic sine function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sinh(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Sinh, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sinh(VAL &&val) + -> detail::Function, detail::Sinh, VAL> { return detail::makeFunction, detail::Sinh>( std::forward(val)); } @@ -1026,8 +1059,8 @@ namespace librapid { /// \param val The input array or function /// \return Hyperbolic cosine function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto cosh(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Cosh, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto cosh(VAL &&val) + -> detail::Function, detail::Cosh, VAL> { return detail::makeFunction, detail::Cosh>( std::forward(val)); } @@ -1040,8 +1073,8 @@ namespace librapid { /// \param val The input array or function /// \return Hyperbolic tangent function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto tanh(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Tanh, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto tanh(VAL &&val) + -> detail::Function, detail::Tanh, VAL> { return detail::makeFunction, detail::Tanh>( std::forward(val)); } @@ -1054,8 +1087,8 @@ namespace librapid { /// \param val The input array or function /// \return Exponential function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto exp(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Exp, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto exp(VAL &&val) + -> detail::Function, detail::Exp, VAL> { return detail::makeFunction, detail::Exp>( std::forward(val)); } @@ -1068,8 +1101,8 @@ namespace librapid { /// \param val The input array or function /// \return Natural logarithm function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto log(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Log, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto log(VAL &&val) + -> detail::Function, detail::Log, VAL> { return detail::makeFunction, detail::Log>( std::forward(val)); } @@ -1082,8 +1115,8 @@ namespace librapid { /// \param val The input array or function /// \return Base 10 logarithm function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto log10(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Log10, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto log10(VAL &&val) + -> detail::Function, detail::Log10, VAL> { return detail::makeFunction, detail::Log10>( std::forward(val)); } @@ -1096,8 +1129,8 @@ namespace librapid { /// \param val The input array or function /// \return Base 2 logarithm function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto log2(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Log2, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto log2(VAL &&val) + -> detail::Function, detail::Log2, VAL> { return detail::makeFunction, detail::Log2>( std::forward(val)); } @@ -1110,8 +1143,8 @@ namespace librapid { /// \param val The input array or function /// \return Square root function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sqrt(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Sqrt, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sqrt(VAL &&val) + -> detail::Function, detail::Sqrt, VAL> { return detail::makeFunction, detail::Sqrt>( std::forward(val)); } @@ -1124,8 +1157,8 @@ namespace librapid { /// \param val The input array or function /// \return Cube root function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto cbrt(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Cbrt, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto cbrt(VAL &&val) + -> detail::Function, detail::Cbrt, VAL> { return detail::makeFunction, detail::Cbrt>( std::forward(val)); } @@ -1138,8 +1171,8 @@ namespace librapid { /// \param val The input array or function /// \return Absolute value function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto abs(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Abs, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto abs(VAL &&val) + -> detail::Function, detail::Abs, VAL> { return detail::makeFunction, detail::Abs>( std::forward(val)); } @@ -1152,8 +1185,8 @@ namespace librapid { /// \param val The input array or function /// \return Floor function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto floor(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Floor, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto floor(VAL &&val) + -> detail::Function, detail::Floor, VAL> { return detail::makeFunction, detail::Floor>( std::forward(val)); } @@ -1166,8 +1199,8 @@ namespace librapid { /// \param val The input array or function /// \return Ceiling function object template = 0> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto ceil(VAL &&val) LIBRAPID_RELEASE_NOEXCEPT - ->detail::Function, detail::Ceil, VAL> { + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto ceil(VAL &&val) + -> detail::Function, detail::Ceil, VAL> { return detail::makeFunction, detail::Ceil>( std::forward(val)); } diff --git a/librapid/include/librapid/array/pseudoConstructors.hpp b/librapid/include/librapid/array/pseudoConstructors.hpp index b6d11df4..e4214960 100644 --- a/librapid/include/librapid/array/pseudoConstructors.hpp +++ b/librapid/include/librapid/array/pseudoConstructors.hpp @@ -135,8 +135,11 @@ namespace librapid { template Array arange(Start start, Stop stop, Step step) { - LIBRAPID_ASSERT(step != 0, "Step size cannot be zero"); - LIBRAPID_ASSERT((stop - start) / step > 0, "Step size is invalid for the specified range"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, step != 0, "Step size cannot be zero"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + (stop - start) / step > 0, + "Step size is invalid for the specified range"); Shape shape = {(int64_t)::librapid::abs((stop - start) / step)}; Array result(shape); @@ -148,7 +151,9 @@ namespace librapid { template Array arange(T start, T stop) { - LIBRAPID_ASSERT((stop - start) > 0, "Step size is invalid for the specified range"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + (stop - start) > 0, + "Step size is invalid for the specified range"); Shape shape = {(int64_t)::librapid::abs(stop - start)}; Array result(shape); @@ -183,7 +188,8 @@ namespace librapid { template Array linspace(Start start, Stop stop, int64_t num, bool includeEnd = true) { - LIBRAPID_ASSERT(num > 0, "Number of samples must be greater than zero"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, num > 0, "Number of samples must be greater than zero"); auto startCast = static_cast(start); auto stopCast = static_cast(stop); @@ -199,7 +205,8 @@ namespace librapid { template Array logspace(Start start, Stop stop, int64_t num, bool includeEnd = true) { - LIBRAPID_ASSERT(num > 0, "Number of samples must be greater than zero"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, num > 0, "Number of samples must be greater than zero"); auto logLower = ::librapid::log(static_cast(start)); auto logUpper = ::librapid::log(static_cast(stop)); @@ -216,8 +223,8 @@ namespace librapid { return result; } - template + template Array random(const Shape &shape, Lower lower = 0, Upper upper = 1) { Array result(shape); fillRandom(result, static_cast(lower), static_cast(upper)); diff --git a/librapid/include/librapid/array/shape.hpp b/librapid/include/librapid/array/shape.hpp index 061e296b..207ccae9 100644 --- a/librapid/include/librapid/array/shape.hpp +++ b/librapid/include/librapid/array/shape.hpp @@ -327,16 +327,28 @@ namespace librapid { LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto Shape::operator[](Index index) const -> const SizeType & { static_assert(std::is_integral_v, "Index must be an integral type"); - LIBRAPID_ASSERT(index < m_dims, "Index out of bounds"); - LIBRAPID_ASSERT(index >= 0, "Index out of bounds"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::out_of_range, + index < m_dims, + "Index {} out of bounds for Shape with {} dimensions", + index, + m_dims); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::out_of_range, + index >= 0, + "Index out of bounds. Must be greater than 0. Received {}", + index); return m_data[index]; } template LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto Shape::operator[](Index index) -> SizeType & { static_assert(std::is_integral_v, "Index must be an integral type"); - LIBRAPID_ASSERT(index < m_dims, "Index out of bounds"); - LIBRAPID_ASSERT(index >= 0, "Index out of bounds"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::out_of_range, + index < m_dims, + "Index {} out of bounds for Shape with {} dimensions", + index, + m_dims); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::range_error, index >= 0, "Index {} out of bounds. Must be greater than 0", index); return m_data[index]; } @@ -355,10 +367,21 @@ namespace librapid { LIBRAPID_NODISCARD auto Shape::ndim() const -> int { return m_dims; } LIBRAPID_NODISCARD auto Shape::subshape(int start, int end) const -> Shape { - LIBRAPID_ASSERT(start <= end, "Start index must be less than end index"); - LIBRAPID_ASSERT(end <= m_dims, - "End index must be less than or equal to the number of dimensions"); - LIBRAPID_ASSERT(start >= 0, "Start index must be greater than or equal to 0"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + start <= end, + "Start index ({}) must not be greater than end index ({})", + start, + end); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, + end <= m_dims, + "End index ({}) must be less than or equal to the number of dimensions ({}).", + end, + m_dims); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::out_of_range, + start >= 0, + "Start index ({}) must be greater than or equal to 0", + start); Shape res; res.m_dims = end - start; @@ -388,7 +411,10 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE MatrixShape::MatrixShape(const std::initializer_list &vals) { - LIBRAPID_ASSERT(vals.size() <= 2, "MatrixShape must be initialized with 2 values"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + vals.size() <= 2, + "MatrixShape must be initialized with 2 values. Received {}", + vals.size()); if (vals.size() == 2) { m_rows = *(vals.begin()); m_cols = *(vals.begin() + 1); @@ -403,7 +429,10 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE MatrixShape::MatrixShape(const std::vector &vals) { - LIBRAPID_ASSERT(vals.size() <= 2, "MatrixShape must be initialized with 2 values"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + vals.size() <= 2, + "MatrixShape must be initialized with 2 values. Received {}", + vals.size()); if (vals.size() == 2) { m_rows = vals[0]; m_cols = vals[1]; @@ -417,9 +446,11 @@ namespace librapid { } LIBRAPID_ALWAYS_INLINE MatrixShape::MatrixShape(const Shape &other) { - LIBRAPID_ASSERT(other.ndim() <= 2, - "MatrixShape must be initialized with 2 dimension, but received {}", - other.ndim()); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, + other.ndim() <= 2, + "MatrixShape must be initialized with 2 dimension, but received {}", + other.ndim()); if (other.ndim() == 2) { m_rows = other[0]; m_cols = other[1]; @@ -435,7 +466,11 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto MatrixShape::operator=(const std::initializer_list &vals) -> MatrixShape & { - LIBRAPID_ASSERT(vals.size() <= 2, "MatrixShape must be initialized with 2 values"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, + vals.size() <= 2, + "MatrixShape must be initialized with 2 values, but received {}", + vals.size()); if (vals.size() == 2) { m_rows = *(vals.begin()); m_cols = *(vals.begin() + 1); @@ -452,7 +487,11 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto MatrixShape::operator=(const std::vector &vals) -> MatrixShape & { - LIBRAPID_ASSERT(vals.size() <= 2, "MatrixShape must be initialized with 2 values"); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, + vals.size() <= 2, + "MatrixShape must be initialized with 2 values, but received {}", + vals.size()); if (vals.size() == 2) { m_rows = vals[0]; m_cols = vals[1]; @@ -480,8 +519,12 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto MatrixShape::operator[](Index index) const -> const SizeType & { static_assert(std::is_integral_v, "Index must be an integral type"); - LIBRAPID_ASSERT(index < 2, "Index out of bounds"); - LIBRAPID_ASSERT(index >= 0, "Index out of bounds"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::out_of_range, + index < 2, + "Index {} out of bounds for MatrixShape with 2 dimensions", + index); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, index >= 0, "Index {} out of bounds. Must be greater than 0", index); return index == 0 ? m_rows : m_cols; } @@ -489,8 +532,12 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto MatrixShape::operator[](Index index) -> SizeType & { static_assert(std::is_integral_v, "Index must be an integral type"); - LIBRAPID_ASSERT(index < 2, "Index out of bounds"); - LIBRAPID_ASSERT(index >= 0, "Index out of bounds"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::out_of_range, + index < 2, + "Index {} out of bounds for MatrixShape with 2 dimensions", + index); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, index >= 0, "Index {} out of bounds. Must be greater than 0", index); return index == 0 ? m_rows : m_cols; } @@ -498,10 +545,20 @@ namespace librapid { LIBRAPID_ALWAYS_INLINE constexpr auto MatrixShape::ndim() const -> int { return 2; } LIBRAPID_ALWAYS_INLINE auto MatrixShape::subshape(int start, int end) const -> Shape { - LIBRAPID_ASSERT(start <= end, "Start index must be less than end index"); - LIBRAPID_ASSERT(end <= 2, - "End index must be less than or equal to the number of dimensions"); - LIBRAPID_ASSERT(start >= 0, "Start index must be greater than or equal to 0"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + start <= end, + "Start index ({}) must not be greater than end index ({})", + start, + end); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::out_of_range, + end <= 2, + "End index ({}) must be less than or equal to the number of dimensions (2).", + end); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::out_of_range, + start >= 0, + "Start index ({}) must be greater than or equal to 0", + start); Shape res = Shape::zeros(2); res[0] = m_rows; @@ -527,27 +584,38 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE VectorShape::VectorShape(const std::initializer_list &vals) { - LIBRAPID_ASSERT(vals.size() == 1, "MatrixShape must be initialized with 1 value"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + vals.size() == 1, + "MatrixShape must be initialized with 1 value. Received {}", + vals.size()); m_elements = *(vals.begin()); } template LIBRAPID_ALWAYS_INLINE VectorShape::VectorShape(const std::vector &vals) { - LIBRAPID_ASSERT(vals.size() == 1, "MatrixShape must be initialized with 1 value"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + vals.size() == 1, + "MatrixShape must be initialized with 1 value. Received {}", + vals.size()); m_elements = vals[0]; } LIBRAPID_ALWAYS_INLINE VectorShape::VectorShape(const Shape &other) { - LIBRAPID_ASSERT(other.ndim() == 1, - "VectorShape must be initialized with 1 dimension, but received {}", - other.ndim()); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::invalid_argument, + other.ndim() == 1, + "VectorShape must be initialized with 1 dimension, but received {}", + other.ndim()); m_elements = other[0]; } template LIBRAPID_ALWAYS_INLINE auto VectorShape::operator=(const std::initializer_list &vals) -> VectorShape & { - LIBRAPID_ASSERT(vals.size() == 1, "MatrixShape must be initialized with 1 value"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + vals.size() == 1, + "MatrixShape must be initialized with 1 value. Received {}", + vals.size()); m_elements = *(vals.begin()); return *this; } @@ -555,7 +623,10 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto VectorShape::operator=(const std::vector &vals) -> VectorShape & { - LIBRAPID_ASSERT(vals.size() == 1, "MatrixShape must be initialized with 1 value"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::runtime_error, + vals.size() == 1, + "MatrixShape must be initialized with 1 value. Received {}", + vals.size()); m_elements = vals[0]; return *this; } @@ -574,8 +645,12 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto VectorShape::operator[](Index index) const -> const SizeType & { static_assert(std::is_integral_v, "Index must be an integral type"); - LIBRAPID_ASSERT(index < 1, "Index out of bounds"); - LIBRAPID_ASSERT(index >= 0, "Index out of bounds"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + index < 1, + "Index {} out of bounds for VectorShape with 1 dimension", + index); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::range_error, index >= 0, "Index {} out of bounds. Must be greater than 0", index); return m_elements; } @@ -583,8 +658,12 @@ namespace librapid { template LIBRAPID_ALWAYS_INLINE auto VectorShape::operator[](Index index) -> SizeType & { static_assert(std::is_integral_v, "Index must be an integral type"); - LIBRAPID_ASSERT(index < 1, "Index out of bounds"); - LIBRAPID_ASSERT(index >= 0, "Index out of bounds"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + index < 1, + "Index {} out of bounds for VectorShape with 1 dimension", + index); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::range_error, index >= 0, "Index {} out of bounds. Must be greater than 0", index); return m_elements; } @@ -592,10 +671,20 @@ namespace librapid { LIBRAPID_ALWAYS_INLINE constexpr auto VectorShape::ndim() const -> int { return 1; } LIBRAPID_ALWAYS_INLINE auto VectorShape::subshape(int start, int end) const -> Shape { - LIBRAPID_ASSERT(start <= end, "Start index must be less than end index"); - LIBRAPID_ASSERT(end <= 1, - "End index must be less than or equal to the number of dimensions"); - LIBRAPID_ASSERT(start >= 0, "Start index must be greater than or equal to 0"); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::invalid_argument, + start <= end, + "Start index ({}) must not be greater than end index ({})", + start, + end); + LIBRAPID_ASSERT_WITH_EXCEPTION( + std::range_error, + end <= 1, + "End index ({}) must be less than or equal to the number of dimensions (1).", + end); + LIBRAPID_ASSERT_WITH_EXCEPTION(std::range_error, + start >= 0, + "Start index ({}) must be greater than or equal to 0", + start); return Shape::zeros(1); } diff --git a/librapid/include/librapid/core/config.hpp b/librapid/include/librapid/core/config.hpp index 45cb5621..3ed090fc 100644 --- a/librapid/include/librapid/core/config.hpp +++ b/librapid/include/librapid/core/config.hpp @@ -12,10 +12,8 @@ // Detect Release vs Debug builds #if !defined(NDEBUG) # define LIBRAPID_DEBUG -# define LIBRAPID_RELEASE_NOEXCEPT #else # define LIBRAPID_RELEASE -# define LIBRAPID_RELEASE_NOEXCEPT noexcept #endif // Detect the operating system diff --git a/librapid/include/librapid/core/core.hpp b/librapid/include/librapid/core/core.hpp index 23edcc92..835e0a25 100644 --- a/librapid/include/librapid/core/core.hpp +++ b/librapid/include/librapid/core/core.hpp @@ -1,6 +1,12 @@ #ifndef LIBRAPID_CORE #define LIBRAPID_CORE +#if defined(LIBRAPID_PYTHON) +# include +# include +# include +#endif // LIBRAPID_PYTHON + #include "warningSuppress.hpp" #include "librapidPch.hpp" #include "debugTrap.hpp" @@ -9,6 +15,7 @@ #include "traits.hpp" #include "typetraits.hpp" #include "helperMacros.hpp" +#include "log.hpp" #include "forward.hpp" @@ -19,30 +26,30 @@ // Fourier Transform #if defined(LIBRAPID_HAS_FFTW) && !defined(LIBRAPID_HAS_CUDA) // If CUDA is enabled, we use cuFFT -# include +# include #endif // LIBRAPID_HAS_CUDA #if defined(LIBRAPID_MSVC) -# pragma warning(push) -# pragma warning(disable : 4324) -# pragma warning(disable : 4458) -# pragma warning(disable : 4456) +# pragma warning(push) +# pragma warning(disable : 4324) +# pragma warning(disable : 4458) +# pragma warning(disable : 4456) #endif // LIBRAPID_MSVC #include #if defined(LIBRAPID_MSVC) -# pragma warning(pop) +# pragma warning(pop) #endif // LIBRAPID_MSVC #if defined(LIBRAPID_HAS_OPENCL) -# include "../opencl/openclErrorIdentifier.hpp" -# include "../opencl/openclConfigure.hpp" -# include "../opencl/openclKernelProcessor.hpp" +# include "../opencl/openclErrorIdentifier.hpp" +# include "../opencl/openclConfigure.hpp" +# include "../opencl/openclKernelProcessor.hpp" #endif // LIBRAPID_HAS_OPENCL #if defined(LIBRAPID_HAS_CUDA) -# include "../cuda/cudaKernelProcesor.hpp" +# include "../cuda/cudaKernelProcesor.hpp" #endif // LIBRAPID_HAS_CUDA #endif // LIBRAPID_CORE \ No newline at end of file diff --git a/librapid/include/librapid/core/genericConfig.hpp b/librapid/include/librapid/core/genericConfig.hpp index 38003b20..bf276d17 100644 --- a/librapid/include/librapid/core/genericConfig.hpp +++ b/librapid/include/librapid/core/genericConfig.hpp @@ -4,167 +4,38 @@ #define LIBRAPID_INLINE inline #define LIBRAPID_ALWAYS_INLINE inline -#define LIBRAPID_ASSERT_ALWAYS(cond, msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - if (!(cond)) { \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)), \ - (int)strlen(FILENAME), \ - (int)funcName.length(), \ - (int)strlen(#cond), \ - (int)strlen("ASSERTION FAILED")); \ - std::string formatted = fmt::format( \ - "[{0:-^{6}}]\n[File {1:>{7}}]\n[Function " \ - "{2:>{8}}]\n[Line {3:>{9}}]\n[Condition " \ - "{4:>{10}}]\n{5}\n", \ - "ASSERTION FAILED", \ - FILENAME, \ - funcName, \ - __LINE__, \ - #cond, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 14, \ - maxLen + 9, \ - maxLen + 5, \ - maxLen + 9, \ - maxLen + 4); \ - if (librapid::global::throwOnAssert) { \ - throw std::runtime_error(formatted); \ - } else { \ - fmt::print(fmt::fg(fmt::color::red), formatted); \ - psnip_trap(); \ - } \ - } \ - } while (0) - #if defined(LIBRAPID_ENABLE_ASSERT) -# define LIBRAPID_STATUS(msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::green), \ - "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " \ - "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", \ - "STATUS", \ - FILENAME, \ - funcName, \ - __LINE__, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen); \ - } while (0) - -# define LIBRAPID_WARN(msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::yellow), \ - "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " \ - "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", \ - "WARNING", \ - FILENAME, \ - funcName, \ - __LINE__, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen); \ - } while (0) -# define LIBRAPID_ERROR(msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapiod::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::red), \ - "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " \ - "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", \ - "ERROR", \ - FILENAME, \ - funcName, \ - __LINE__, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen); \ - if (librapid::global::throwOnAssert) { \ - throw std::runtime_error(formatted); \ - } else { \ - fmt::print(fmt::fg(fmt::color::red), formatted); \ - psnip_trap(); \ - } \ - } while (0) +# define LIBRAPID_ASSERT(condition, message, ...) \ + librapid::assert::librapidAssert(condition, \ + message, \ + __LINE__, \ + FUNCTION, \ + FILENAME, \ + STRINGIFY(condition) __VA_OPT__(, ) \ + __VA_ARGS__) + +# define LIBRAPID_ASSERT_WITH_EXCEPTION(raiseType, condition, message, ...) \ + librapid::assert::librapidAssert(condition, \ + message, \ + __LINE__, \ + FUNCTION, \ + FILENAME, \ + STRINGIFY(condition) __VA_OPT__(, ) \ + __VA_ARGS__) + +# define LIBRAPID_STATUS(message, ...) \ + librapid::assert::librapidStatus( \ + message, __LINE__, FUNCTION, FILENAME __VA_OPT__(, ) __VA_ARGS__) + +# define LIBRAPID_WARN(message, ...) \ + librapid::assert::librapidWarn( \ + message, __LINE__, FUNCTION, FILENAME __VA_OPT__(, ) __VA_ARGS__) + +# define LIBRAPID_ERROR(message, ...) \ + librapid::assert::librapidError( \ + message, __LINE__, FUNCTION, FILENAME __VA_OPT__(, ) __VA_ARGS__) -# define LIBRAPID_WASSERT(cond, msg, ...) \ - do { \ - if (!(cond)) { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen(#cond) + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::yellow), \ - "[{0:-^{6}}]\n[File {1:>{7}}]\n[Function " \ - "{2:>{8}}]\n[Line {3:>{9}}]\n[Condition " \ - "{4:>{10}}]\n{5}\n", \ - "WARN ASSERTION FAILED", \ - FILENAME, \ - funcName, \ - __LINE__, \ - #cond, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen + 0, \ - maxLen - 5); \ - } \ - } while (0) - -# define LIBRAPID_ASSERT(cond, msg, ...) \ - LIBRAPID_ASSERT_ALWAYS(cond, msg __VA_OPT__(, ) __VA_ARGS__) -#else -# define LIBRAPID_WARN_ONCE(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_STATUS(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_WARN(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_ERROR(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_LOG(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_WASSERT(cond, ...) \ - do { \ - } while (0) -# define LIBRAPID_ASSERT(cond, ...) \ - do { \ - } while (0) #endif // LIBRAPID_ENABLE_ASSERT -#define PURE_FUNCTION [[nodiscard]] constexpr - #endif // LIBRAPID_CORE_GNU_CONFIG_HPP \ No newline at end of file diff --git a/librapid/include/librapid/core/global.hpp b/librapid/include/librapid/core/global.hpp index e3dba898..c43e63c1 100644 --- a/librapid/include/librapid/core/global.hpp +++ b/librapid/include/librapid/core/global.hpp @@ -8,8 +8,8 @@ namespace librapid { namespace global { - // Should ASSERT functions error or throw exceptions? - extern bool throwOnAssert; + // Should ASSERT functions print their message to stdout? + extern bool printOnAssert; /// Arrays with more elements than this will run with multithreaded implementations extern size_t multithreadThreshold; diff --git a/librapid/include/librapid/core/gnuConfig.hpp b/librapid/include/librapid/core/gnuConfig.hpp index be214525..d6afc046 100644 --- a/librapid/include/librapid/core/gnuConfig.hpp +++ b/librapid/include/librapid/core/gnuConfig.hpp @@ -2,169 +2,45 @@ #define LIBRAPID_CORE_GNU_CONFIG_HPP #define LIBRAPID_INLINE inline -#define LIBRAPID_ALWAYS_INLINE inline __attribute__((always_inline)) -#define LIBRAPID_ASSERT_ALWAYS(cond, msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - if (!(cond)) { \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)), \ - (int)strlen(FILENAME), \ - (int)funcName.length(), \ - (int)strlen(#cond), \ - (int)strlen("ASSERTION FAILED")); \ - std::string formatted = fmt::format( \ - "[{0:-^{6}}]\n[File {1:>{7}}]\n[Function " \ - "{2:>{8}}]\n[Line {3:>{9}}]\n[Condition " \ - "{4:>{10}}]\n{5}\n", \ - "ASSERTION FAILED", \ - FILENAME, \ - funcName, \ - __LINE__, \ - #cond, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 14, \ - maxLen + 9, \ - maxLen + 5, \ - maxLen + 9, \ - maxLen + 4); \ - if (librapid::global::throwOnAssert) { \ - throw std::runtime_error(formatted); \ - } else { \ - fmt::print(fmt::fg(fmt::color::red), formatted); \ - psnip_trap(); \ - } \ - } \ - } while (0) +#if defined(LIBRAPID_NO_ALWAYS_INLINE) +# define LIBRAPID_ALWAYS_INLINE inline +#else +# define LIBRAPID_ALWAYS_INLINE inline __attribute__((always_inline)) +#endif #if defined(LIBRAPID_ENABLE_ASSERT) -# define LIBRAPID_STATUS(msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::green), \ - "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " \ - "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", \ - "STATUS", \ - FILENAME, \ - funcName, \ - __LINE__, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen); \ - } while (0) - -# define LIBRAPID_WARN(msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::yellow), \ - "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " \ - "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", \ - "WARNING", \ - FILENAME, \ - funcName, \ - __LINE__, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen); \ - } while (0) - -# define LIBRAPID_ERROR(msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - std::string formatted = fmt::format(fmt::fg(fmt::color::red), \ - "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " \ - "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", \ - "ERROR", \ - FILENAME, \ - funcName, \ - __LINE__, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen); \ - if (librapid::global::throwOnAssert) { \ - throw std::runtime_error(formatted); \ - } else { \ - fmt::print(fmt::fg(fmt::color::red), formatted); \ - psnip_trap(); \ - } \ - } while (0) -# define LIBRAPID_WASSERT(cond, msg, ...) \ - do { \ - if (!(cond)) { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen(#cond) + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::yellow), \ - "[{0:-^{6}}]\n[File {1:>{7}}]\n[Function " \ - "{2:>{8}}]\n[Line {3:>{9}}]\n[Condition " \ - "{4:>{10}}]\n{5}\n", \ - "WARN ASSERTION FAILED", \ - FILENAME, \ - funcName, \ - __LINE__, \ - #cond, \ - fmt::format(msg __VA_OPT__(, ) __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen + 0, \ - maxLen - 5); \ - } \ - } while (0) +# define LIBRAPID_ASSERT(condition, message, ...) \ + librapid::assert::librapidAssert(condition, \ + message, \ + __LINE__, \ + FUNCTION, \ + FILENAME, \ + STRINGIFY(condition) __VA_OPT__(, ) \ + __VA_ARGS__) + +# define LIBRAPID_ASSERT_WITH_EXCEPTION(raiseType, condition, message, ...) \ + librapid::assert::librapidAssert(condition, \ + message, \ + __LINE__, \ + FUNCTION, \ + FILENAME, \ + STRINGIFY(condition) __VA_OPT__(, ) \ + __VA_ARGS__) + +# define LIBRAPID_STATUS(message, ...) \ + librapid::assert::librapidStatus( \ + message, __LINE__, FUNCTION, FILENAME __VA_OPT__(, ) __VA_ARGS__) + +# define LIBRAPID_WARN(message, ...) \ + librapid::assert::librapidWarn( \ + message, __LINE__, FUNCTION, FILENAME __VA_OPT__(, ) __VA_ARGS__) + +# define LIBRAPID_ERROR(message, ...) \ + librapid::assert::librapidError( \ + message, __LINE__, FUNCTION, FILENAME __VA_OPT__(, ) __VA_ARGS__) -# define LIBRAPID_ASSERT(cond, msg, ...) \ - LIBRAPID_ASSERT_ALWAYS(cond, msg __VA_OPT__(, ) __VA_ARGS__) -#else -# define LIBRAPID_WARN_ONCE(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_STATUS(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_WARN(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_ERROR(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_LOG(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_WASSERT(cond, ...) \ - do { \ - } while (0) -# define LIBRAPID_ASSERT(cond, ...) \ - do { \ - } while (0) #endif // LIBRAPID_ENABLE_ASSERT -#define PURE_FUNCTION __attribute__((const)) - #endif // LIBRAPID_CORE_GNU_CONFIG_HPP \ No newline at end of file diff --git a/librapid/include/librapid/core/helperMacros.hpp b/librapid/include/librapid/core/helperMacros.hpp index 2bd93caf..34ac2c69 100644 --- a/librapid/include/librapid/core/helperMacros.hpp +++ b/librapid/include/librapid/core/helperMacros.hpp @@ -67,9 +67,13 @@ return os; \ } -#define LIBRAPID_SIMPLE_IO_NORANGE(TEMPLATE, TYPE) \ - template \ - struct fmt::is_range : std::false_type {}; +#if defined(FMT_RANGES_H_) +# define LIBRAPID_SIMPLE_IO_NORANGE(TEMPLATE, TYPE) \ + template \ + struct fmt::is_range : std::false_type {}; +#else +# define LIBRAPID_SIMPLE_IO_NORANGE(TEMPLATE, TYPE) +#endif // FMT_RANGES_H_ namespace librapid::typetraits { template diff --git a/librapid/include/librapid/core/librapidPch.hpp b/librapid/include/librapid/core/librapidPch.hpp index a75cbca5..87c4b5c9 100644 --- a/librapid/include/librapid/core/librapidPch.hpp +++ b/librapid/include/librapid/core/librapidPch.hpp @@ -9,28 +9,16 @@ */ // Standard Library -#include #include -#include -#include #include #include #include #include -#include -#include #include -#include -#include -#include -#include -#include -#include #include #include #include #include -#include #if defined(LIBRAPID_HAS_OMP) # include @@ -48,15 +36,7 @@ // fmtlib #include #include -#include -#include -#include -#include #include -#include -#include -#include -#include #include diff --git a/librapid/include/librapid/core/log.hpp b/librapid/include/librapid/core/log.hpp new file mode 100644 index 00000000..2d73eea2 --- /dev/null +++ b/librapid/include/librapid/core/log.hpp @@ -0,0 +1,134 @@ +#pragma once + +namespace librapid::assert { + template + void librapidAssert(bool condition, const std::string &message, uint64_t line, + std::string signature, const std::string &filename, + const std::string &conditionString, const Args &...args) { + if (!condition) { + std::string formattedMessage = fmt::vformat(message, fmt::make_format_args(args...)); + + if (global::printOnAssert) { + if (signature.length() > 70) { + // Truncate the signature + signature = signature.substr(0, 67) + "..."; + } + + int maxLen = detail::internalMax((int)std::ceil(std::log(line)), + (int)filename.length(), + (int)conditionString.length(), + (int)signature.length(), + (int)strlen("ASSERTION FAILED")); + + std::string formatted = fmt::format( + "[{0:-^{6}}]\n[File {1:>{7}}]\n[Function " + "{2:>{8}}]\n[Line {3:>{9}}]\n[Condition " + "{4:>{10}}]\n{5}\n", + "ASSERTION FAILED", + filename, + signature, + line, + conditionString, + formattedMessage, + maxLen + 14, + maxLen + 9, + maxLen + 5, + maxLen + 9, + maxLen + 4); + + fmt::print(fmt::fg(fmt::color::red), formatted); + } + + throw RaiseType(formattedMessage); + } + } + + template + void librapidStatusGeneric(const std::string &message, uint64_t line, std::string signature, + const std::string &filename, const std::string warningType, + const Format &format, Args &&...args) { + if (signature.length() > 70) { + // Truncate the signature + signature = signature.substr(0, 67) + "..."; + } + + int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(line)) + 6, + (int)filename.length() + 6, + (int)signature.length() + 6, + (int)warningType.length()); + + std::string formattedMessage = fmt::vformat(message, fmt::make_format_args(args...)); + + fmt::print(format, + "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " + "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", + "STATUS", + filename, + signature, + line, + formattedMessage, + maxLen + 5, + maxLen + 0, + maxLen - 4, + maxLen); + } + + template + void librapidStatus(const std::string &message, uint64_t line, std::string signature, + const std::string &filename, Args &&...args) { + librapidStatusGeneric(message, + line, + signature, + filename, + "STATUS", + fmt::fg(fmt::color::green), + std::forward(args)...); + } + + template + void librapidWarn(const std::string &message, uint64_t line, std::string signature, + const std::string &filename, Args &&...args) { + librapidStatusGeneric(message, + line, + signature, + filename, + "WARNING", + fmt::fg(fmt::color::yellow), + std::forward(args)...); + } + + template + void librapidError(const std::string &message, uint64_t line, std::string signature, + const std::string &filename, Args &&...args) { + librapidStatusGeneric(message, + line, + signature, + filename, + "ERROR", + fmt::fg(fmt::color::red), + std::forward(args)...); + throw RaiseType(message); + } +} // namespace librapid::assert + +#if !defined(LIBRAPID_ENABLE_ASSERT) +# define LIBRAPID_ASSERT(condition, message, ...) \ + do { \ + } while (false) + +# define LIBRAPID_ASSERT_WITH_EXCEPTION(raiseType, condition, message, ...) \ + do { \ + } while (false) + +# define LIBRAPID_STATUS(message, ...) \ + do { \ + } while (false) + +# define LIBRAPID_WARN(message, ...) \ + do { \ + } while (false) + +# define LIBRAPID_ERROR(message, ...) \ + do { \ + } while (false) +#endif // LIBRAPID_ENABLE_ASSERT diff --git a/librapid/include/librapid/core/msvcConfig.hpp b/librapid/include/librapid/core/msvcConfig.hpp index 86b05764..f60cacb0 100644 --- a/librapid/include/librapid/core/msvcConfig.hpp +++ b/librapid/include/librapid/core/msvcConfig.hpp @@ -2,169 +2,45 @@ #define LIBRAPID_CORE_MSVC_CONFIG_HPP #define LIBRAPID_INLINE inline -#define LIBRAPID_ALWAYS_INLINE inline __forceinline -#define LIBRAPID_ASSERT_ALWAYS(cond, msg, ...) \ - do { \ - if (!(cond)) { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)), \ - (int)strlen(FILENAME), \ - (int)funcName.length(), \ - (int)strlen(#cond), \ - (int)strlen("ASSERTION FAILED")); \ - std::string formatted = fmt::format( \ - "[{0:-^{6}}]\n[File {1:>{7}}]\n[Function " \ - "{2:>{8}}]\n[Line {3:>{9}}]\n[Condition " \ - "{4:>{10}}]\n{5}\n", \ - "ASSERTION FAILED", \ - FILENAME, \ - funcName, \ - __LINE__, \ - #cond, \ - fmt::format(msg, __VA_ARGS__), \ - maxLen + 14, \ - maxLen + 9, \ - maxLen + 5, \ - maxLen + 9, \ - maxLen + 4); \ - if (librapid::global::throwOnAssert) { \ - throw std::runtime_error(formatted); \ - } else { \ - fmt::print(fmt::fg(fmt::color::red), formatted); \ - psnip_trap(); \ - } \ - } \ - } while (0) +#if defined(LIBRAPID_NO_ALWAYS_INLINE) +# define LIBRAPID_ALWAYS_INLINE inline +#else +# define LIBRAPID_ALWAYS_INLINE inline __forceinline +#endif #if defined(LIBRAPID_ENABLE_ASSERT) -# define LIBRAPID_STATUS(msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::green), \ - "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " \ - "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", \ - "STATUS", \ - FILENAME, \ - funcName, \ - __LINE__, \ - fmt::format(msg, __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen); \ - } while (0) - -# define LIBRAPID_WARN(msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::yellow), \ - "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " \ - "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", \ - "WARNING", \ - FILENAME, \ - funcName, \ - __LINE__, \ - fmt::format(msg, __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen); \ - } while (0) - -# define LIBRAPID_ERROR(msg, ...) \ - do { \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - std::string formatted = fmt::format( \ - "[{0:-^{5}}]\n[File {1:>{6}}]\n[Function " \ - "{2:>{7}}]\n[Line {3:>{8}}]\n{4}\n", \ - "ERROR", \ - FILENAME, \ - funcName, \ - __LINE__, \ - fmt::format(msg, __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen); \ - if (librapid::global::throwOnAssert) { \ - throw std::runtime_error(formatted); \ - } else { \ - fmt::print(fmt::fg(fmt::color::red), formatted); \ - psnip_trap(); \ - } \ - } while (0) -# define LIBRAPID_WASSERT(cond, msg, ...) \ - std::string funcName = FUNCTION; \ - if (funcName.length() > 75) funcName = ""; \ - do { \ - if (!(cond)) { \ - int maxLen = librapid::detail::internalMax((int)std::ceil(std::log(__LINE__)) + 6, \ - (int)strlen(FILENAME) + 6, \ - (int)funcName.length() + 6, \ - (int)strlen(#cond) + 6, \ - (int)strlen("WARN ASSERTION FAILED")); \ - fmt::print(fmt::fg(fmt::color::yellow), \ - "[{0:-^{6}}]\n[File {1:>{7}}]\n[Function " \ - "{2:>{8}}]\n[Line {3:>{9}}]\n[Condition " \ - "{4:>{10}}]\n{5}\n", \ - "WARN ASSERTION FAILED", \ - FILENAME, \ - funcName, \ - __LINE__, \ - #cond, \ - fmt::format(msg, __VA_ARGS__), \ - maxLen + 5, \ - maxLen + 0, \ - maxLen - 4, \ - maxLen + 0, \ - maxLen - 5); \ - } \ - } while (0) +# define LIBRAPID_ASSERT(condition, message, ...) \ + librapid::assert::librapidAssert(condition, \ + message, \ + __LINE__, \ + FUNCTION, \ + FILENAME, \ + STRINGIFY(condition), \ + __VA_ARGS__) + +# define LIBRAPID_ASSERT_WITH_EXCEPTION(raiseType, condition, message, ...) \ + librapid::assert::librapidAssert(condition, \ + message, \ + __LINE__, \ + FUNCTION, \ + FILENAME, \ + STRINGIFY(condition), \ + __VA_ARGS__) + +# define LIBRAPID_STATUS(message, ...) \ + librapid::assert::librapidStatus( \ + message, __LINE__, FUNCTION, FILENAME, __VA_ARGS__) + +# define LIBRAPID_WARN(message, ...) \ + librapid::assert::librapidWarn( \ + message, __LINE__, FUNCTION, FILENAME, __VA_ARGS__) + +# define LIBRAPID_ERROR(message, ...) \ + librapid::assert::librapidError( \ + message, __LINE__, FUNCTION, FILENAME, __VA_ARGS__) -# define LIBRAPID_ASSERT(cond, msg, ...) LIBRAPID_ASSERT_ALWAYS(cond, msg, __VA_ARGS__) - -#else -# define LIBRAPID_WARN_ONCE(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_STATUS(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_WARN(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_ERROR(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_LOG(msg, ...) \ - do { \ - } while (0) -# define LIBRAPID_WASSERT(cond, ...) \ - do { \ - } while (0) -# define LIBRAPID_ASSERT(cond, ...) \ - do { \ - } while (0) #endif // LIBRAPID_ENABLE_ASSERT -#define PURE_FUNCTION [[nodiscard]] constexpr - -#endif // LIBRAPID_CORE_MSVC_CONFIG_HPP \ No newline at end of file +#endif // LIBRAPID_CORE_MSVC_CONFIG_HPP diff --git a/librapid/include/librapid/core/traits.hpp b/librapid/include/librapid/core/traits.hpp index 9a4e760d..fc2deba5 100644 --- a/librapid/include/librapid/core/traits.hpp +++ b/librapid/include/librapid/core/traits.hpp @@ -116,14 +116,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template @@ -200,14 +200,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -232,14 +232,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -264,14 +264,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -296,14 +296,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -328,14 +328,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -360,14 +360,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -392,14 +392,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -424,14 +424,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -456,14 +456,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -488,14 +488,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -520,14 +520,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template @@ -552,14 +552,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = true; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; #if defined(LIBRAPID_HAS_CUDA) @@ -773,14 +773,14 @@ namespace librapid { static constexpr bool canAlign = true; static constexpr bool canMemcpy = false; - LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } - LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } - LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } - LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } - LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } - LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } - LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } - LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + // LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + // LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + // LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + // LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + // LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + // LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + // LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + // LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } }; template<> @@ -805,6 +805,19 @@ namespace librapid { }; #endif + template + struct NumericInfo { + using Scalar = typename TypeInfo::Scalar; + LIMIT_IMPL_CONSTEXPR(min) { return NUM_LIM(min); } + LIMIT_IMPL_CONSTEXPR(max) { return NUM_LIM(max); } + LIMIT_IMPL_CONSTEXPR(epsilon) { return NUM_LIM(epsilon); } + LIMIT_IMPL_CONSTEXPR(roundError) { return NUM_LIM(round_error); } + LIMIT_IMPL_CONSTEXPR(denormMin) { return NUM_LIM(denorm_min); } + LIMIT_IMPL_CONSTEXPR(infinity) { return NUM_LIM(infinity); } + LIMIT_IMPL_CONSTEXPR(quietNaN) { return NUM_LIM(quiet_NaN); } + LIMIT_IMPL_CONSTEXPR(signalingNaN) { return NUM_LIM(signaling_NaN); } + }; + template using ScalarReturnType = typename TypeInfo::Scalar; }; // namespace typetraits diff --git a/librapid/include/librapid/librapid.hpp b/librapid/include/librapid/librapid.hpp index c6fbeb16..56dcb81f 100644 --- a/librapid/include/librapid/librapid.hpp +++ b/librapid/include/librapid/librapid.hpp @@ -11,4 +11,22 @@ #include "core/literals.hpp" #include "ml/ml.hpp" +namespace librapid { + LIBRAPID_ALWAYS_INLINE void anotherTestFunction(int x) { + if (x % 3 == 0) { + throw std::runtime_error("Divisible by 3"); + } else { + fmt::print("Not divisible by 3\n"); + } + } + + LIBRAPID_ALWAYS_INLINE void testFunction(int x) { + if (x % 5 == 0) { + anotherTestFunction(x); + } else { + fmt::print("Not divisible by 5\n"); + } + } +} + #endif // LIBRAPID_HPP \ No newline at end of file diff --git a/librapid/include/librapid/math/complex.hpp b/librapid/include/librapid/math/complex.hpp index 50e197f7..48fea7cd 100644 --- a/librapid/include/librapid/math/complex.hpp +++ b/librapid/include/librapid/math/complex.hpp @@ -13,2068 +13,2070 @@ */ #if defined(_M_IX86) || (defined(_M_X64) && !defined(_M_ARM64EC)) -# define USE_X86_X64_INTRINSICS -# include +# define USE_X86_X64_INTRINSICS +# include #elif defined(_M_ARM64) || defined(_M_ARM64EC) -# define USE_ARM64_INTRINSICS -# include +# define USE_ARM64_INTRINSICS +# include #endif +#define REQUIRE_SCALAR(T) \ + typename std::enable_if_t<::librapid::typetraits::TypeInfo::type == \ + ::librapid::detail::LibRapidType::Scalar, \ + int> = 0 + namespace librapid { - namespace detail { - // Implements floating-point arithmetic for numeric algorithms - namespace multiprec { - template - struct Fmp { - Scalar val0; // Most significant numeric_limits::precision bits - Scalar val1; // Least significant numeric_limits::precision bits - }; - - /// \brief Summarizes two 1x precision values combined into a 2x precision result - /// - /// This function is exact when: - /// 1. The result doesn't overflow - /// 2. Either underflow is gradual, or no internal underflow occurs - /// 3. Intermediate precision is either the same as T, or greater than twice the - /// precision of T - /// 4. Parameters and local variables do not retain extra intermediate precision - /// 5. Rounding mode is rounding to nearest. - /// - /// Violation of condition 3 or 5 could lead to relative error on the order of - /// epsilon^2. - /// - /// Violation of other conditions could lead to worse results - /// - /// \tparam T Template type - /// \param x First value - /// \param y Second value - /// \return Sum of x and y - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto addX2(const T &x, - const T &y) noexcept - -> Fmp { - const T sum0 = x + y; - const T yMod = sum0 - x; - const T xMod = sum0 - yMod; - const T yErr = y - yMod; - const T xErr = x - xMod; - return {sum0, xErr + yErr}; - } - - /// \brief Combines two 1x precision values into a 2x precision result with the - /// requirement of specific exponent relationship - /// - /// Requires: exponent(x) + countr_zero(significand(x)) >= exponent(y) or x == 0 - /// - /// The result is exact when: - /// 1. The requirement above is satisfied - /// 2. No internal overflow occurs - /// 3. Either underflow is gradual, or no internal underflow occurs - /// 4. Intermediate precision is either the same as T, or greater than twice the - /// precision of T - /// 5. Parameters and local variables do not retain extra intermediate precision - /// 6. Rounding mode is rounding to nearest - /// - /// Violation of condition 3 or 5 could lead to relative error on the order of - /// epsilon^2. - /// - /// Violation of other conditions could lead to worse results - /// - /// \tparam T Template type - /// \param x First value - /// \param y Second value - /// \return Sum of x and y - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto addSmallX2(const T x, - const T y) noexcept - -> Fmp { - const T sum0 = x + y; - const T yMod = sum0 - x; - const T yErr = y - yMod; - return {sum0, yErr}; - } - - /// \brief Combines a 1x precision value with a 2x precision value - /// - /// Requires: exponent(x) + countr_zero(significand(x)) >= exponent(y.val0) or x == 0 - /// - /// \tparam T Template type - /// \param x First value - /// \param y Second value - /// \return Sum of x and y - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto - addSmallX2(const T &x, const Fmp &y) noexcept -> Fmp { - const Fmp sum0 = addSmallX2(x, y.val0); - return addSmallX2(sum0.val0, sum0.val1 + y.val1); - } - - /// \brief Combines two 2x precision values into a 1x precision result - /// \tparam T Template type - /// \param x First value - /// \param y Second value - /// \return Sum of x and y - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto addX1(const Fmp &x, - const Fmp &y) noexcept - -> T { - const Fmp sum0 = addX2(x.val0, y.val0); - return sum0.val0 + (sum0.val1 + (x.val1 + y.val1)); - } - - /// \brief Rounds a 2x precision value to 26 significant bits - /// \param x Value to round - /// \return Rounded value - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto - highHalf(const double x) noexcept -> double { - const auto bits = bitCast(x); - const auto highHalfBits = (bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL; - return bitCast(highHalfBits); - } + namespace detail { + // Implements floating-point arithmetic for numeric algorithms + namespace multiprec { + template + struct Fmp { + Scalar val0; // Most significant numeric_limits::precision bits + Scalar val1; // Least significant numeric_limits::precision bits + }; + + /// \brief Summarizes two 1x precision values combined into a 2x precision result + /// + /// This function is exact when: + /// 1. The result doesn't overflow + /// 2. Either underflow is gradual, or no internal underflow occurs + /// 3. Intermediate precision is either the same as T, or greater than twice the + /// precision of T + /// 4. Parameters and local variables do not retain extra intermediate precision + /// 5. Rounding mode is rounding to nearest. + /// + /// Violation of condition 3 or 5 could lead to relative error on the order of + /// epsilon^2. + /// + /// Violation of other conditions could lead to worse results + /// + /// \tparam T Template type + /// \param x First value + /// \param y Second value + /// \return Sum of x and y + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto addX2(const T &x, + const T &y) noexcept + -> Fmp { + const T sum0 = x + y; + const T yMod = sum0 - x; + const T xMod = sum0 - yMod; + const T yErr = y - yMod; + const T xErr = x - xMod; + return {sum0, xErr + yErr}; + } + + /// \brief Combines two 1x precision values into a 2x precision result with the + /// requirement of specific exponent relationship + /// + /// Requires: exponent(x) + countr_zero(significand(x)) >= exponent(y) or x == 0 + /// + /// The result is exact when: + /// 1. The requirement above is satisfied + /// 2. No internal overflow occurs + /// 3. Either underflow is gradual, or no internal underflow occurs + /// 4. Intermediate precision is either the same as T, or greater than twice the + /// precision of T + /// 5. Parameters and local variables do not retain extra intermediate precision + /// 6. Rounding mode is rounding to nearest + /// + /// Violation of condition 3 or 5 could lead to relative error on the order of + /// epsilon^2. + /// + /// Violation of other conditions could lead to worse results + /// + /// \tparam T Template type + /// \param x First value + /// \param y Second value + /// \return Sum of x and y + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto addSmallX2(const T x, + const T y) noexcept + -> Fmp { + const T sum0 = x + y; + const T yMod = sum0 - x; + const T yErr = y - yMod; + return {sum0, yErr}; + } + + /// \brief Combines a 1x precision value with a 2x precision value + /// + /// Requires: exponent(x) + countr_zero(significand(x)) >= exponent(y.val0) or x == 0 + /// + /// \tparam T Template type + /// \param x First value + /// \param y Second value + /// \return Sum of x and y + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto + addSmallX2(const T &x, const Fmp &y) noexcept -> Fmp { + const Fmp sum0 = addSmallX2(x, y.val0); + return addSmallX2(sum0.val0, sum0.val1 + y.val1); + } + + /// \brief Combines two 2x precision values into a 1x precision result + /// \tparam T Template type + /// \param x First value + /// \param y Second value + /// \return Sum of x and y + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto addX1(const Fmp &x, + const Fmp &y) noexcept + -> T { + const Fmp sum0 = addX2(x.val0, y.val0); + return sum0.val0 + (sum0.val1 + (x.val1 + y.val1)); + } + + /// \brief Rounds a 2x precision value to 26 significant bits + /// \param x Value to round + /// \return Rounded value + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr auto + highHalf(const double x) noexcept -> double { + const auto bits = bitCast(x); + const auto highHalfBits = (bits + 0x3ff'ffffULL) & 0xffff'ffff'f800'0000ULL; + return bitCast(highHalfBits); + } #if defined(USE_X86_X64_INTRINSICS) || defined(USE_ARM64_INTRINSICS) // SIMD method - /// \brief Calculates the error between x^2 and its faithfully rounded product prod0 - /// - /// The result is exact when: - /// 1. prod0 is x^2 faithfully rounded - /// 2. No internal overflow or underflow occurs - /// - /// Violation of condition 1 could lead to relative error on the order of epsilon. - /// - /// \param x Input value - /// \param prod0 Faithfully rounded product of x^2 - /// \return Error between x^2 and prod0 - LIBRAPID_NODISCARD - LIBRAPID_ALWAYS_INLINE auto sqrError(const double x, const double prod0) noexcept - -> double { -# if defined(USE_X86_X64_INTRINSICS) - const __m128d xVec = _mm_set_sd(x); - const __m128d prodVec = _mm_set_sd(prod0); - const __m128d resultVec = _mm_fmsub_sd(xVec, xVec, prodVec); - double result; - _mm_store_sd(&result, resultVec); - return result; -# else // Only two options, so this is fine - const float64x1_t xVec = vld1_double(&x); - const float64x1_t prod0Vec = vld1_double(&prod0); - const float64x1_t resultVec = vfma_double(vneg_double(prod0Vec), xVec, xVec); - double result; - vst1_double(&result, resultVec); - return result; -# endif - } + /// \brief Calculates the error between x^2 and its faithfully rounded product prod0 + /// + /// The result is exact when: + /// 1. prod0 is x^2 faithfully rounded + /// 2. No internal overflow or underflow occurs + /// + /// Violation of condition 1 could lead to relative error on the order of epsilon. + /// + /// \param x Input value + /// \param prod0 Faithfully rounded product of x^2 + /// \return Error between x^2 and prod0 + LIBRAPID_NODISCARD + LIBRAPID_ALWAYS_INLINE auto sqrError(const double x, const double prod0) noexcept + -> double { +# if defined(USE_X86_X64_INTRINSICS) + const __m128d xVec = _mm_set_sd(x); + const __m128d prodVec = _mm_set_sd(prod0); + const __m128d resultVec = _mm_fmsub_sd(xVec, xVec, prodVec); + double result; + _mm_store_sd(&result, resultVec); + return result; +# else // Only two options, so this is fine + const float64x1_t xVec = vld1_double(&x); + const float64x1_t prod0Vec = vld1_double(&prod0); + const float64x1_t resultVec = vfma_double(vneg_double(prod0Vec), xVec, xVec); + double result; + vst1_double(&result, resultVec); + return result; +# endif + } #else - /// \brief Fallback method for sqrError(const double, const double) when SIMD is not - /// available. - LIBRAPID_NODISCARD - LIBRAPID_ALWAYS_INLINE constexpr double sqrError(const double x, - const double prod0) noexcept { - const double xHigh = highHalf(x); - const double xLow = x - xHigh; - return ((xHigh * xHigh - prod0) + 2.0 * xHigh * xLow) + xLow * xLow; - } + /// \brief Fallback method for sqrError(const double, const double) when SIMD is not + /// available. + LIBRAPID_NODISCARD + LIBRAPID_ALWAYS_INLINE constexpr double sqrError(const double x, + const double prod0) noexcept { + const double xHigh = highHalf(x); + const double xLow = x - xHigh; + return ((xHigh * xHigh - prod0) + 2.0 * xHigh * xLow) + xLow * xLow; + } #endif - /// \brief Type-agnostic version of sqrError(const double, const double) - /// \tparam T Template type - /// \param x Input value - /// \param prod0 Faithfully rounded product of x^2 - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sqrError(const T x, - const T prod0) noexcept -> T { - const T xHigh = static_cast(highHalf(x)); - const T xLow = x - xHigh; - return ((xHigh * xHigh - prod0) + static_cast(2.0) * xHigh * xLow) + xLow * xLow; - } - - /// \brief Calculates the square of a 1x precision value and returns a 2x precision - /// result - /// - /// The result is exact when no internal overflow or underflow occurs. - /// - /// \param x Input value - /// \return 2x precision square of x - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sqrX2(const double x) noexcept - -> Fmp { - const double prod0 = x * x; - return {prod0, sqrError(x, prod0)}; - } - - /// \brief Type-agnostic version of sqrX2(const double) - /// \tparam T Template type - /// \param x Input value - /// \return 2x precision square of x - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sqrX2(const T x) noexcept -> Fmp { - const T prod0 = x * x; - return {prod0, static_cast(sqrError(x, prod0))}; - } - } // namespace multiprec - - namespace algorithm { - // HypotLegHuge = T{0.5} * sqrt((numeric_limits::max())); - // HypotLegTiny = sqrt(T{2.0} * (numeric_limits::min)() / - // numeric_limits::epsilon()); - - template - struct HypotLegHugeHelper { - // If is an integer type, divide by two rather than multiplying by 0.5, as - // 0.5 gets truncated to zero - static inline T val = - (std::is_integral_v) - ? (::librapid::sqrt(typetraits::TypeInfo::max()) / T(2)) - : (T(0.5) * ::librapid::sqrt(typetraits::TypeInfo::max())); - }; - - template<> - struct HypotLegHugeHelper { - static constexpr double val = 6.703903964971298e+153; - }; - - template<> - struct HypotLegHugeHelper { - static constexpr double val = 9.2233715e+18f; - }; - - template - struct HypotLegTinyHelper { - // If is an integer type, divide by two rather than multiplying by 0.5, as - // 0.5 gets truncated to zero - static inline T val = ::librapid::sqrt(T(2) * typetraits::TypeInfo::min() / - typetraits::TypeInfo::epsilon()); - }; - - template<> - struct HypotLegTinyHelper { - static constexpr double val = 1.4156865331029228e-146; - }; - - template<> - struct HypotLegTinyHelper { - static constexpr double val = 4.440892e-16f; - }; - - template - static inline T HypotLegHuge = HypotLegHugeHelper::val; - template - static inline T HypotLegTiny = HypotLegTinyHelper::val; - - /// \brief Calculates \f$ x^2 + y^2 - 1 \f$ for - /// \f$ |x| \geq |y| \f$ and \f$ 0.5 \leq |x| < 2^{12} \f$ - /// \tparam T Template type \param x First value \param y Second value - /// \return x * x + y * y - 1 - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto normMinusOne(const T x, - const T y) noexcept -> T { - const multiprec::Fmp xSqr = multiprec::sqrX2(x); - const multiprec::Fmp ySqr = multiprec::sqrX2(y); - const multiprec::Fmp xSqrM1 = multiprec::addSmallX2(T(-1), xSqr); - return multiprec::addX1(xSqrM1, ySqr); - } - - /// \brief Calculates \f$ \log(1 + x) \f$ - /// - /// May be inaccurate for small inputs - /// - /// \tparam safe If true, will check for NaNs and overflow - /// \tparam T Template type - /// \param x Input value - /// \return \f$ \log(1 + x) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto logP1(const T x) -> T { - if constexpr (!safe) return ::librapid::log(x + 1.0); + /// \brief Type-agnostic version of sqrError(const double, const double) + /// \tparam T Template type + /// \param x Input value + /// \param prod0 Faithfully rounded product of x^2 + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sqrError(const T x, + const T prod0) noexcept -> T { + const T xHigh = static_cast(highHalf(x)); + const T xLow = x - xHigh; + return ((xHigh * xHigh - prod0) + static_cast(2.0) * xHigh * xLow) + xLow * xLow; + } + + /// \brief Calculates the square of a 1x precision value and returns a 2x precision + /// result + /// + /// The result is exact when no internal overflow or underflow occurs. + /// + /// \param x Input value + /// \return 2x precision square of x + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sqrX2(const double x) noexcept + -> Fmp { + const double prod0 = x * x; + return {prod0, sqrError(x, prod0)}; + } + + /// \brief Type-agnostic version of sqrX2(const double) + /// \tparam T Template type + /// \param x Input value + /// \return 2x precision square of x + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto sqrX2(const T x) noexcept -> Fmp { + const T prod0 = x * x; + return {prod0, static_cast(sqrError(x, prod0))}; + } + } // namespace multiprec + + namespace algorithm { + // HypotLegHuge = T{0.5} * sqrt((numeric_limits::max())); + // HypotLegTiny = sqrt(T{2.0} * (numeric_limits::min)() / + // numeric_limits::epsilon()); + + template + struct HypotLegHugeHelper { + // If is an integer type, divide by two rather than multiplying by 0.5, as + // 0.5 gets truncated to zero + static inline T val = + (std::is_integral_v) + ? (::librapid::sqrt(typetraits::NumericInfo::max()) / T(2)) + : (T(0.5) * ::librapid::sqrt(typetraits::NumericInfo::max())); + }; + + template<> + struct HypotLegHugeHelper { + static constexpr double val = 6.703903964971298e+153; + }; + + template<> + struct HypotLegHugeHelper { + static constexpr double val = 9.2233715e+18f; + }; + + template + struct HypotLegTinyHelper { + // If is an integer type, divide by two rather than multiplying by 0.5, as + // 0.5 gets truncated to zero + static inline T val = ::librapid::sqrt(T(2) * typetraits::NumericInfo::min() / + typetraits::NumericInfo::epsilon()); + }; + + template<> + struct HypotLegTinyHelper { + static constexpr double val = 1.4156865331029228e-146; + }; + + template<> + struct HypotLegTinyHelper { + static constexpr double val = 4.440892e-16f; + }; + + template + static inline T HypotLegHuge = HypotLegHugeHelper::val; + template + static inline T HypotLegTiny = HypotLegTinyHelper::val; + + /// \brief Calculates \f$ x^2 + y^2 - 1 \f$ for + /// \f$ |x| \geq |y| \f$ and \f$ 0.5 \leq |x| < 2^{12} \f$ + /// \tparam T Template type \param x First value \param y Second value + /// \return x * x + y * y - 1 + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto normMinusOne(const T x, + const T y) noexcept -> T { + const multiprec::Fmp xSqr = multiprec::sqrX2(x); + const multiprec::Fmp ySqr = multiprec::sqrX2(y); + const multiprec::Fmp xSqrM1 = multiprec::addSmallX2(T(-1), xSqr); + return multiprec::addX1(xSqrM1, ySqr); + } + + /// \brief Calculates \f$ \log(1 + x) \f$ + /// + /// May be inaccurate for small inputs + /// + /// \tparam safe If true, will check for NaNs and overflow + /// \tparam T Template type + /// \param x Input value + /// \return \f$ \log(1 + x) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto logP1(const T x) -> T { + if constexpr (!safe) return ::librapid::log(x + 1.0); #if defined(LIBRAPID_USE_MULTIPREC) - // No point doing anything shown below if we're using multiprec - if constexpr (std::is_same_v) return ::librapid::log(x + 1.0); + // No point doing anything shown below if we're using multiprec + if constexpr (std::is_same_v) return ::librapid::log(x + 1.0); #endif - if (::librapid::isNaN(x)) return x + x; // Trigger a signaling NaN - - // Naive formula - if (x <= T(-0.5) || T(2) <= x) { - // To avoid overflow - if (x == typetraits::TypeInfo::max()) return ::librapid::log(x); - return ::librapid::log(T(1) + x); - } - - const T absX = ::librapid::abs(x); - if (absX < typetraits::TypeInfo::epsilon()) { - if (x == T(0)) return x; - return x - T(0.5) * x * x; // Honour rounding - } - - // log(1 + x) with fix for small x - const multiprec::Fmp tmp = multiprec::addSmallX2(T(1), x); - return ::librapid::log(tmp.val0) + tmp.val1 / tmp.val0; - } - - // Return log(hypot(x, y)) - - /// \brief Calculates \f$ \log(\sqrt{x^2 + y^2}) \f$ - /// \tparam safe If true, will check for NaNs and overflow - /// \tparam T Template type - /// \param x Horizontal component - /// \param y Vertical component - /// \return \f$ \log(\sqrt{x^2 + y^2}) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto logHypot(const T x, const T y) noexcept - -> T { - if constexpr (!safe) return ::librapid::log(::librapid::sqrt(x * x + y * y)); + if (::librapid::isNaN(x)) return x + x; // Trigger a signaling NaN + + // Naive formula + if (x <= T(-0.5) || T(2) <= x) { + // To avoid overflow + if (x == typetraits::NumericInfo::max()) return ::librapid::log(x); + return ::librapid::log(T(1) + x); + } + + const T absX = ::librapid::abs(x); + if (absX < typetraits::NumericInfo::epsilon()) { + if (x == T(0)) return x; + return x - T(0.5) * x * x; // Honour rounding + } + + // log(1 + x) with fix for small x + const multiprec::Fmp tmp = multiprec::addSmallX2(T(1), x); + return ::librapid::log(tmp.val0) + tmp.val1 / tmp.val0; + } + + // Return log(hypot(x, y)) + + /// \brief Calculates \f$ \log(\sqrt{x^2 + y^2}) \f$ + /// \tparam safe If true, will check for NaNs and overflow + /// \tparam T Template type + /// \param x Horizontal component + /// \param y Vertical component + /// \return \f$ \log(\sqrt{x^2 + y^2}) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto logHypot(const T x, const T y) noexcept + -> T { + if constexpr (!safe) return ::librapid::log(::librapid::sqrt(x * x + y * y)); #if defined(LIBRAPID_USE_MULTIPREC) - // No point doing anything shown below if we're using multiprec - if constexpr (std::is_same_v) - return ::librapid::log(::mpfr::hypot(x, y)); - else { + // No point doing anything shown below if we're using multiprec + if constexpr (std::is_same_v) + return ::librapid::log(::mpfr::hypot(x, y)); + else { #endif - if (!::librapid::isFinite(x) || !::librapid::isFinite(y)) { // Inf or NaN - // Return NaN and raise FE_INVALID if either x or y is NaN - if (::librapid::isNaN(x) || ::librapid::isNaN(y)) return x + y; - - // Return Inf if either of them is infinity - if (::librapid::isInf(x)) return x; - if (::librapid::isInf(y)) return y; - - return x + y; // Fallback - } - - T absX = ::librapid::abs(x); - T absY = ::librapid::abs(y); - - if (absX < absY) std::swap(absX, absY); // Ensure absX > absY - if (absY == 0) return ::librapid::log(absX); // One side has zero length - - // Avoid overflow and underflow - if (HypotLegTiny < absX && absX < HypotLegHuge) { - constexpr auto normSmall = T(0.5); - constexpr auto normBig = T(3.0); - - const T absYSqr = absY * absY; - - if (absX == T(1)) return logP1(absYSqr) * T(0.5); - - const T norm = absX * absX + absYSqr; - if (normSmall < norm && norm < normBig) // Avoid cancellation - return logP1(normMinusOne(absX, absY)) * T(0.5); - return ::librapid::log(norm) * T(0.5); - } else { // Use 1 1/2 precision to preserve bits - constexpr T cm = T(22713.0L / 32768.0L); // Not sure where this came from - constexpr T cl = T(1.4286068203094172321214581765680755e-6L); // Or this... - - const int exp = std::ilogb(absX); - const T absXScaled = std::scalbn(absX, -exp); - const T absYScaled = std::scalbn(absY, -exp); - const T absYScaledSqr = absYScaled * absYScaled; - const T normScaled = absXScaled * absXScaled + absYScaledSqr; - const T realShifted = ::librapid::log(normScaled) * T(0.5); - const auto fExp = static_cast(exp); - return (realShifted + fExp * cl) + fExp * cm; - } + if (!::librapid::isFinite(x) || !::librapid::isFinite(y)) { // Inf or NaN + // Return NaN and raise FE_INVALID if either x or y is NaN + if (::librapid::isNaN(x) || ::librapid::isNaN(y)) return x + y; + + // Return Inf if either of them is infinity + if (::librapid::isInf(x)) return x; + if (::librapid::isInf(y)) return y; + + return x + y; // Fallback + } + + T absX = ::librapid::abs(x); + T absY = ::librapid::abs(y); + + if (absX < absY) std::swap(absX, absY); // Ensure absX > absY + if (absY == 0) return ::librapid::log(absX); // One side has zero length + + // Avoid overflow and underflow + if (HypotLegTiny < absX && absX < HypotLegHuge) { + constexpr auto normSmall = T(0.5); + constexpr auto normBig = T(3.0); + + const T absYSqr = absY * absY; + + if (absX == T(1)) return logP1(absYSqr) * T(0.5); + + const T norm = absX * absX + absYSqr; + if (normSmall < norm && norm < normBig) // Avoid cancellation + return logP1(normMinusOne(absX, absY)) * T(0.5); + return ::librapid::log(norm) * T(0.5); + } else { // Use 1 1/2 precision to preserve bits + constexpr T cm = T(22713.0L / 32768.0L); // Not sure where this came from + constexpr T cl = T(1.4286068203094172321214581765680755e-6L); // Or this... + + const int exp = std::ilogb(absX); + const T absXScaled = std::scalbn(absX, -exp); + const T absYScaled = std::scalbn(absY, -exp); + const T absYScaledSqr = absYScaled * absYScaled; + const T normScaled = absXScaled * absXScaled + absYScaledSqr; + const T realShifted = ::librapid::log(normScaled) * T(0.5); + const auto fExp = static_cast(exp); + return (realShifted + fExp * cl) + fExp * cm; + } #if defined(LIBRAPID_USE_MULTIPREC) - } // This ensures the "if constexpr" above actually stops compiler errors + } // This ensures the "if constexpr" above actually stops compiler errors #endif - } - - /// \brief Compute \f$e^{\text{pleft}} \times \text{right} \times 2^{\text{exponent}}\f$ - /// - /// \tparam T Template type - /// \param pleft Pointer to the value to be exponentiated - /// \param right Multiplier for the exponentiated value - /// \param exponent Exponent for the power of 2 multiplication - /// \return 1 if the result is NaN or Inf, -1 otherwise - template - auto expMul(T *pleft, T right, short exponent) -> short { + } + + /// \brief Compute \f$e^{\text{pleft}} \times \text{right} \times 2^{\text{exponent}}\f$ + /// + /// \tparam T Template type + /// \param pleft Pointer to the value to be exponentiated + /// \param right Multiplier for the exponentiated value + /// \param exponent Exponent for the power of 2 multiplication + /// \return 1 if the result is NaN or Inf, -1 otherwise + template + auto expMul(T *pleft, T right, short exponent) -> short { #if defined(LIBRAPID_USE_MULTIPREC) - if constexpr (std::is_same_v) { - *pleft = ::mpfr::exp(*pleft) * right * ::mpfr::exp2(exponent); - return (::librapid::isNaN(*pleft) || ::librapid::isInf(*pleft)) ? 1 : -1; - } else { + if constexpr (std::is_same_v) { + *pleft = ::mpfr::exp(*pleft) * right * ::mpfr::exp2(exponent); + return (::librapid::isNaN(*pleft) || ::librapid::isInf(*pleft)) ? 1 : -1; + } else { #endif #if defined(LIBRAPID_MSVC) - auto tmp = static_cast(*pleft); - short ans = _CSTD _Exp(&tmp, static_cast(right), exponent); - *pleft = static_cast(tmp); - return ans; + auto tmp = static_cast(*pleft); + short ans = _CSTD _Exp(&tmp, static_cast(right), exponent); + *pleft = static_cast(tmp); + return ans; #else - *pleft = ::librapid::exp(*pleft) * right * ::librapid::exp2(exponent); - return (::librapid::isNaN(*pleft) || ::librapid::isInf(*pleft)) ? 1 : -1; + *pleft = ::librapid::exp(*pleft) * right * ::librapid::exp2(exponent); + return (::librapid::isNaN(*pleft) || ::librapid::isInf(*pleft)) ? 1 : -1; #endif #if defined(LIBRAPID_USE_MULTIPREC) - } // This ensures the "if constexpr" above actually stops compiler errors + } // This ensures the "if constexpr" above actually stops compiler errors #endif - } - } // namespace algorithm - } // namespace detail - - /// \brief A class representing a complex number of the form \f$a + bi\f$, where \f$a\f$ and - /// \f$b\f$ are real numbers - /// - /// This class represents a complex number of the form \f$a + bi\f$, where \f$a\f$ and - /// \f$b\f$ are real numbers. The class is templated, allowing the user to specify the type - /// of the real and imaginary components. The default type is ``double``. - /// - /// \tparam T The type of the real and imaginary components - template - class Complex { - public: - using Scalar = typename typetraits::TypeInfo::Scalar; - - /// \brief Default constructor - /// - /// Create a new complex number. Both the real and imaginary components are set to zero - Complex() : m_val {T(0), T(0)} {} - - /// \brief Construct a complex number from a real number - /// - /// Create a complex number, setting only the real component. The imaginary component is - /// initialized to zero - /// - /// \tparam R The type of the real component - /// \param realVal The real component - template - explicit Complex(const R &realVal) : m_val {T(realVal), T(0)} {} - - /// \brief Construct a complex number from real and imaginary components - /// - /// Create a new complex number where both the real and imaginary parts are set from the - /// passed parameters - /// - /// \tparam R The type of the real component - /// \tparam I The type of the imaginary component - /// \param realVal The real component - /// \param imagVal The imaginary component - template - Complex(const R &realVal, const I &imagVal) : m_val {T(realVal), T(imagVal)} {} - - /// \brief Complex number copy constructor - /// \param other The complex number to copy - Complex(const Complex &other) : m_val {other.real(), other.imag()} {} - - /// \brief Complex number move constructor - /// \param other The complex number to move - Complex(Complex &&other) noexcept : m_val {other.real(), other.imag()} {} - - /// \brief Construct a complex number from another complex number with a different type - /// \tparam Other Type of the components of the other complex number - /// \param other The complex number to copy - template - Complex(const Complex &other) : m_val {T(other.real()), T(other.imag())} {} - - /// \brief Construct a complex number from a std::complex - /// \param other The std::complex value to copy - explicit Complex(const std::complex &other) : m_val {other.real(), other.imag()} {} - - static constexpr auto size() -> size_t { - return typetraits::TypeInfo::packetWidth; - } - - /// \brief Complex number assignment operator - /// \param other The value to assign - /// \return *this - auto operator=(const Complex &other) -> Complex & { - if (this == &other) return *this; - m_val[RE] = other.real(); - m_val[IM] = other.imag(); - return *this; - } - - // template - // LIBRAPID_ALWAYS_INLINE void store(P *ptr) const { - // auto casted = reinterpret_cast(ptr); - // auto ret = Vc::interleave(m_val[RE], m_val[IM]); - // ret.first.store(casted); - // ret.second.store(casted + size()); - // } - - // template - // LIBRAPID_ALWAYS_INLINE void load(const P *ptr) { - // auto casted = reinterpret_cast(ptr); - // Vc::deinterleave(&m_val[RE], &m_val[IM], casted, Vc::Aligned); - // } - - /// \brief Assign to the real component - /// - /// Set the real component of this complex number to \p val - /// - /// \param val The value to assign - LIBRAPID_ALWAYS_INLINE void real(const T &val) { m_val[RE] = val; } - - /// \brief Assign to the imaginary component - /// - /// Set the imaginary component of this complex number to \p val - /// - /// \param val The value to assign - LIBRAPID_ALWAYS_INLINE void imag(const T &val) { m_val[IM] = val; } - - /// \brief Access the real component - /// - /// Returns a const reference to the real component of this complex number - /// - /// \return Real component - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto real() const -> const T & { - return m_val[RE]; - } - - /// \brief Access the imaginary component - /// - /// Returns a const reference to the imaginary component of this complex number - /// - /// \return Imaginary component - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto imag() const -> const T & { - return m_val[IM]; - } - - /// \brief Access the real component - /// - /// Returns a reference to the real component of this complex number. Since this is a - /// reference type, it can be assigned to - /// - /// \return Real component - LIBRAPID_ALWAYS_INLINE auto real() -> T & { return m_val[RE]; } - - /// \brief Access the imaginary component - /// - /// Returns a reference to the imaginary component of this complex number. Since this is a - /// reference type, it can be assigned to - /// - /// \return imaginary component - LIBRAPID_ALWAYS_INLINE auto imag() -> T & { return m_val[IM]; } - - /// \brief Complex number assigment operator - /// - /// Set the real component of this complex number to \p other, and the imaginary component - /// to 0 - /// - /// \param other - /// \return *this - LIBRAPID_ALWAYS_INLINE auto operator=(const T &other) -> Complex & { - m_val[RE] = other; - m_val[IM] = 0; - return *this; - } - - /// \brief Complex number assigment operator - /// - /// Assign another complex number to this one, copying the real and imaginary components - /// - /// \tparam Other The type of the other complex number - /// \param other Complex number to assign - /// \return *this - template - LIBRAPID_ALWAYS_INLINE auto operator=(const Complex &other) -> Complex & { - m_val[RE] = static_cast(other.real()); - m_val[IM] = static_cast(other.real()); - return *this; - } - - /// \brief Inplace addition - /// - /// Add a scalar value to the real component of this imaginary number - /// - /// \param other Scalar value to add - /// \return *this - LIBRAPID_ALWAYS_INLINE auto operator+=(const T &other) -> Complex & { - m_val[RE] = m_val[RE] + other; - return *this; - } - - /// \brief Inplace subtraction - /// - /// Subtract a scalar value from the real component of this imaginary number - /// - /// \param other Scalar value to subtract - /// \return *this - LIBRAPID_ALWAYS_INLINE auto operator-=(const T &other) -> Complex & { - m_val[RE] = m_val[RE] - other; - return *this; - } - - /// \brief Inplace multiplication - /// - /// Multiply both the real and imaginary components of this complex number by a scalar - /// - /// \param other Scalar value to multiply by - /// \return *this - LIBRAPID_ALWAYS_INLINE auto operator*=(const T &other) -> Complex & { - m_val[RE] = m_val[RE] * other; - m_val[IM] = m_val[IM] * other; - return *this; - } - - /// \brief Inplace division - /// - /// Divide both the real and imaginary components of this complex number by a scalar - /// - /// \param other Scalar value to divide by - /// \return *this - LIBRAPID_ALWAYS_INLINE auto operator/=(const T &other) -> Complex & { - m_val[RE] = m_val[RE] / other; - m_val[IM] = m_val[IM] / other; - return *this; - } - - /// \brief Inplace addition - /// - /// Add a complex number to this one - /// - /// \param other Complex number to add - /// \return *this - LIBRAPID_ALWAYS_INLINE auto operator+=(const Complex &other) -> Complex & { - this->_add(other); - return *this; - } - - /// \brief Inplace subtraction - /// - /// Subtract a complex number from this one - /// - /// \param other Complex number to subtract - /// \return *this - LIBRAPID_ALWAYS_INLINE auto operator-=(const Complex &other) -> Complex & { - this->_sub(other); - return *this; - } - - /// \brief Inplace multiplication - /// - /// Multiply this complex number by another one - /// - /// \param other Complex number to multiply by - /// \return *this - LIBRAPID_ALWAYS_INLINE auto operator*=(const Complex &other) -> Complex & { - this->_mul(other); - return *this; - } - - /// \brief Inplace division - /// - /// Divide this complex number by another one - /// - /// \param other Complex number to divide by - /// \return *this - LIBRAPID_ALWAYS_INLINE auto operator/=(const Complex &other) -> Complex & { - this->_div(other); - return *this; - } - - /// \brief Cast to scalar types - /// - /// Cast this complex number to a scalar type. This will extract only the real component. - /// - /// \tparam To Type to cast to - /// \return Scalar - template - LIBRAPID_ALWAYS_INLINE explicit operator To() const { - return static_cast(m_val[RE]); - } - - /// \brief Cast to a complex number with a different scalar type - /// - /// Cast the real and imaginary components of this complex number to a different type and - /// return the result as a new complex number - /// - /// \tparam To Scalar type to cast to - /// \return Complex number - template - LIBRAPID_ALWAYS_INLINE explicit operator Complex() const { - return Complex(static_cast(m_val[RE]), static_cast(m_val[IM])); - } - - template - void str(const fmt::formatter &format, Ctx &ctx) const { - // Complex numbers are printed as (a +- bi) - - fmt::format_to(ctx.out(), "("); - format.format(m_val[RE], ctx); - if (m_val[IM] < 0) { - fmt::format_to(ctx.out(), "-"); - format.format(-m_val[IM], ctx); - } else { - fmt::format_to(ctx.out(), "+"); - format.format(m_val[IM], ctx); - } - fmt::format_to(ctx.out(), "i)"); - } - - protected: - /// \brief Add a complex number to this one - /// \tparam Other Scalar type of the other complex number - /// \param other Other complex number - template - LIBRAPID_ALWAYS_INLINE void _add(const Complex &other) { - m_val[RE] = m_val[RE] + other.real(); - m_val[IM] = m_val[IM] + other.imag(); - } - - /// \brief Subtract a complex number from this one - /// \tparam Other Scalar type of the other complex number - /// \param other Other complex number - template - LIBRAPID_ALWAYS_INLINE void _sub(const Complex &other) { - m_val[RE] = m_val[RE] - other.real(); - m_val[IM] = m_val[IM] - other.imag(); - } - - /// \brief Multiply this complex number by another one - /// \tparam Other Scalar type of the other complex number - /// \param other Other complex number - template - LIBRAPID_ALWAYS_INLINE void _mul(const Complex &other) { - T otherReal = static_cast(other.real()); - T otherImag = static_cast(other.imag()); - - T tmp = m_val[RE] * otherReal - m_val[IM] * otherImag; - m_val[IM] = m_val[RE] * otherImag + m_val[IM] * otherReal; - m_val[RE] = tmp; - } - - /// \brief Divide this complex number by another one - /// \tparam Other Scalar type of the other complex number - /// \param other Other complex number - template - LIBRAPID_ALWAYS_INLINE void _div(const Complex &other) { - T otherReal = static_cast(other.real()); - T otherImag = static_cast(other.imag()); - - if (::librapid::isNaN(otherReal) || ::librapid::isNaN(otherImag)) { // Set result to NaN - m_val[RE] = typetraits::TypeInfo::quietNaN(); - m_val[IM] = m_val[RE]; - } else if ((otherImag < 0 ? T(-otherImag) - : T(+otherImag)) < // |other.imag()| < |other.real()| - (otherReal < 0 ? T(-otherReal) : T(+otherReal))) { - T wr = otherImag / otherReal; - T wd = otherReal + wr * otherImag; - - if (::librapid::isNaN(wd) || wd == 0) { // NaN result - m_val[RE] = typetraits::TypeInfo::quietNaN(); - m_val[IM] = m_val[RE]; - } else { // Valid result - T tmp = (m_val[RE] + m_val[IM] * wr) / wd; - m_val[IM] = (m_val[IM] - m_val[RE] * wr) / wd; - m_val[RE] = tmp; - } - } else if (otherImag == 0) { // Set NaN - m_val[RE] = typetraits::TypeInfo::quietNaN(); - m_val[IM] = m_val[RE]; - } else { // 0 < |other.real()| <= |other.imag()| - T wr = otherReal / otherImag; - T wd = otherImag + wr * otherReal; - - if (::librapid::isNaN(wd) || wd == 0) { // NaN result - m_val[RE] = typetraits::TypeInfo::quietNaN(); - m_val[IM] = m_val[RE]; - } else { - T tmp = (m_val[RE] * wr + m_val[IM]) / wd; - m_val[IM] = (m_val[IM] * wr - m_val[RE]) / wd; - m_val[RE] = tmp; - } - } - } - - private: - T m_val[2]; - static constexpr size_t RE = 0; - static constexpr size_t IM = 1; - }; - - /// \brief Negate a complex number - /// \tparam T Scalar type of the complex number - /// \param other Complex number to negate - /// \return Negated complex number - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(const Complex &other) - -> Complex { - return {-other.real(), -other.imag()}; - } - - /// \brief Add two complex numbers - /// - /// Add two complex numbers together, returning the result - /// - /// \tparam L Scalar type of LHS - /// \tparam R Scalar type of RHS - /// \param left LHS complex number - /// \param right RHS complex number - /// \return Sum of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator+(const Complex &left, - const Complex &right) { - using Scalar = typename std::common_type_t; - Complex tmp(left.real(), left.imag()); - tmp += Complex(right.real(), right.imag()); - return tmp; - } - - /// \brief Add a complex number and a scalar - /// - /// Add a real number to the real component of a complex number, returning the result - /// - /// \tparam T Scalar type of the complex number - /// \tparam R Type of the real number - /// \param left LHS complex number - /// \param right RHS scalar - /// \return Sum of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator+(const Complex &left, - const R &right) { - Complex tmp(left); - tmp.real(tmp.real() + right); - return tmp; - } - - /// \brief Add a scalar to a complex number - /// - /// Add a real number to the real component of a complex number, returning the result - /// - /// \tparam R Type of the real number - /// \tparam T Scalar type of the complex number - /// \param left LHS scalar - /// \param right RHS complex number - /// \return Sum of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator+(const R &left, - const Complex &right) { - Complex tmp(left); - tmp += right; - return tmp; - } - - /// \brief Subtract a complex number from another complex number - /// - /// Subtract the real and imaginary components of the RHS complex number from the corresponding - /// components of the LHS complex number, returning the result - /// - /// \tparam L Scalar type of the LHS complex number - /// \tparam R Scalar type of the RHS complex number - /// \param left LHS complex number - /// \param right RHS complex number - /// \return Difference of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(const Complex &left, - const Complex &right) { - using Scalar = typename std::common_type_t; - Complex tmp(left.real(), left.imag()); - tmp -= Complex(right.real(), right.imag()); - return tmp; - } - - /// \brief Subtract a scalar from a complex number - /// - /// Subtract a real number from the real component of a complex number, returning the result - /// - /// \tparam T Scalar type of the complex number - /// \tparam R Type of the real number - /// \param left LHS complex number - /// \param right RHS scalar - /// \return Difference of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(const Complex &left, - const R &right) { - Complex tmp(left); - tmp.real(tmp.real() - right); - return tmp; - } - - /// \brief Subtract a complex number from a scalar - /// - /// Subtract the real and imaginary components of the RHS complex number from a real number, - /// returning the result - /// - /// \tparam T Scalar type of the complex number - /// \tparam R Type of the real number - /// \param left LHS scalar - /// \param right RHS complex number - /// \return Difference of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(const R &left, - const Complex &right) { - Complex tmp(left); - tmp -= right; - return tmp; - } - - /// \brief Multiply two complex numbers - /// - /// Multiply the LHS and RHS complex numbers, returning the result - /// - /// \tparam L Scalar type of the LHS complex number - /// \tparam R Scalar type of the RHS complex number - /// \param left LHS complex number - /// \param right RHS complex number - /// \return Product of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator*(const Complex &left, - const Complex &right) { - using Scalar = typename std::common_type_t; - Complex tmp(left.real(), left.imag()); - tmp *= Complex(right.real(), right.imag()); - return tmp; - } - - /// \brief Multiply a complex number by a scalar - /// - /// Multiply the real and imaginary components of a complex number by a real number, returning - /// the result - /// - /// \tparam T Scalar type of the complex number - /// \tparam R Type of the real number - /// \param left LHS complex number - /// \param right RHS scalar - /// \return Product of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator*(const Complex &left, - const R &right) { - Complex tmp(left); - tmp.real(tmp.real() * right); - tmp.imag(tmp.imag() * right); - return tmp; - } - - /// \brief Multiply a scalar by a complex number - /// - /// Multiply a real number by the real and imaginary components of a complex number, returning - /// the result - /// - /// \tparam T Scalar type of the complex number - /// \tparam R Type of the real number - /// \param left LHS scalar - /// \param right RHS complex number - /// \return Product of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator*(const R &left, - const Complex &right) { - Complex tmp(left); - tmp *= right; - return tmp; - } - - /// \brief Divide two complex numbers - /// - /// Divide the LHS complex number by the RHS complex number, returning the result - /// - /// \tparam L Scalar type of the LHS complex number - /// \tparam R Scalar type of the RHS complex number - /// \param left LHS complex number - /// \param right RHS complex number - /// \return Quotient of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator/(const Complex &left, - const Complex &right) { - using Scalar = typename std::common_type_t; - Complex tmp(left.real(), left.imag()); - tmp /= Complex(right.real(), right.imag()); - return tmp; - } - - /// \brief Divide a complex number by a scalar - /// - /// Divide the real and imaginary components of a complex number by a real number, returning the - /// result - /// - /// \tparam T Scalar type of the complex number - /// \tparam R Type of the real number - /// \param left LHS complex number - /// \param right RHS scalar - /// \return Quotient of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator/(const Complex &left, - const R &right) { - Complex tmp(left); - tmp.real(tmp.real() / right); - tmp.imag(tmp.imag() / right); - return tmp; - } - - /// \brief Divide a scalar by a complex number - /// - /// Divide a real number by the real and imaginary components of a complex number, returning the - /// result - /// - /// \tparam T Scalar type of the complex number - /// \tparam R Type of the real number - /// \param left LHS scalar - /// \param right RHS complex number - /// \return Quotient of LHS and RHS - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator/(const R &left, - const Complex &right) { - Complex tmp(left); - tmp /= right; - return tmp; - } - - /// \brief Equality comparison of two complex numbers - /// \tparam L Scalar type of LHS complex number - /// \tparam R Scalar type of RHS complex number - /// \param left LHS complex number - /// \param right RHS complex number - /// \return true if equal, false otherwise - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator==(const Complex &left, - const Complex &right) { - return left.real() == right.real() && left.imag() == right.imag(); - } - - /// \brief Equality comparison of complex number and scalar - /// - /// Compares the real component of the complex number to the scalar, and the imaginary component - /// to zero. Returns true if and only if both comparisons are true. - /// - /// \tparam T Scalar type of complex number - /// \param left LHS complex number - /// \param right RHS scalar - /// \return true if equal, false otherwise - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator==(const Complex &left, - T &right) { - return left.real() == right && left.imag() == 0; - } + } + } // namespace algorithm + } // namespace detail + + /// \brief A class representing a complex number of the form \f$a + bi\f$, where \f$a\f$ and + /// \f$b\f$ are real numbers + /// + /// This class represents a complex number of the form \f$a + bi\f$, where \f$a\f$ and + /// \f$b\f$ are real numbers. The class is templated, allowing the user to specify the type + /// of the real and imaginary components. The default type is ``double``. + /// + /// \tparam T The type of the real and imaginary components + template + class Complex { + public: + using Scalar = typename typetraits::TypeInfo::Scalar; + + /// \brief Default constructor + /// + /// Create a new complex number. Both the real and imaginary components are set to zero + Complex() : m_val {T(0), T(0)} {} + + /// \brief Construct a complex number from a real number + /// + /// Create a complex number, setting only the real component. The imaginary component is + /// initialized to zero + /// + /// \tparam R The type of the real component + /// \param realVal The real component + template + explicit Complex(const R &realVal) : m_val {T(realVal), T(0)} {} + + /// \brief Construct a complex number from real and imaginary components + /// + /// Create a new complex number where both the real and imaginary parts are set from the + /// passed parameters + /// + /// \tparam R The type of the real component + /// \tparam I The type of the imaginary component + /// \param realVal The real component + /// \param imagVal The imaginary component + template + Complex(const R &realVal, const I &imagVal) : m_val {T(realVal), T(imagVal)} {} + + /// \brief Complex number copy constructor + /// \param other The complex number to copy + Complex(const Complex &other) : m_val {other.real(), other.imag()} {} + + /// \brief Complex number move constructor + /// \param other The complex number to move + Complex(Complex &&other) noexcept : m_val {other.real(), other.imag()} {} + + /// \brief Construct a complex number from another complex number with a different type + /// \tparam Other Type of the components of the other complex number + /// \param other The complex number to copy + template + Complex(const Complex &other) : m_val {T(other.real()), T(other.imag())} {} + + /// \brief Construct a complex number from a std::complex + /// \param other The std::complex value to copy + explicit Complex(const std::complex &other) : m_val {other.real(), other.imag()} {} + + static constexpr auto size() -> size_t { + return typetraits::TypeInfo::packetWidth; + } + + /// \brief Complex number assignment operator + /// \param other The value to assign + /// \return *this + auto operator=(const Complex &other) -> Complex & { + if (this == &other) return *this; + m_val[RE] = other.real(); + m_val[IM] = other.imag(); + return *this; + } + + // template + // LIBRAPID_ALWAYS_INLINE void store(P *ptr) const { + // auto casted = reinterpret_cast(ptr); + // auto ret = Vc::interleave(m_val[RE], m_val[IM]); + // ret.first.store(casted); + // ret.second.store(casted + size()); + // } + + // template + // LIBRAPID_ALWAYS_INLINE void load(const P *ptr) { + // auto casted = reinterpret_cast(ptr); + // Vc::deinterleave(&m_val[RE], &m_val[IM], casted, Vc::Aligned); + // } + + /// \brief Assign to the real component + /// + /// Set the real component of this complex number to \p val + /// + /// \param val The value to assign + LIBRAPID_ALWAYS_INLINE void real(const T &val) { m_val[RE] = val; } + + /// \brief Assign to the imaginary component + /// + /// Set the imaginary component of this complex number to \p val + /// + /// \param val The value to assign + LIBRAPID_ALWAYS_INLINE void imag(const T &val) { m_val[IM] = val; } + + /// \brief Access the real component + /// + /// Returns a const reference to the real component of this complex number + /// + /// \return Real component + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto real() const -> const T & { + return m_val[RE]; + } + + /// \brief Access the imaginary component + /// + /// Returns a const reference to the imaginary component of this complex number + /// + /// \return Imaginary component + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto imag() const -> const T & { + return m_val[IM]; + } + + /// \brief Access the real component + /// + /// Returns a reference to the real component of this complex number. Since this is a + /// reference type, it can be assigned to + /// + /// \return Real component + LIBRAPID_ALWAYS_INLINE auto real() -> T & { return m_val[RE]; } + + /// \brief Access the imaginary component + /// + /// Returns a reference to the imaginary component of this complex number. Since this is a + /// reference type, it can be assigned to + /// + /// \return imaginary component + LIBRAPID_ALWAYS_INLINE auto imag() -> T & { return m_val[IM]; } + + /// \brief Complex number assigment operator + /// + /// Set the real component of this complex number to \p other, and the imaginary component + /// to 0 + /// + /// \param other + /// \return *this + LIBRAPID_ALWAYS_INLINE auto operator=(const T &other) -> Complex & { + m_val[RE] = other; + m_val[IM] = 0; + return *this; + } + + /// \brief Complex number assigment operator + /// + /// Assign another complex number to this one, copying the real and imaginary components + /// + /// \tparam Other The type of the other complex number + /// \param other Complex number to assign + /// \return *this + template + LIBRAPID_ALWAYS_INLINE auto operator=(const Complex &other) -> Complex & { + m_val[RE] = static_cast(other.real()); + m_val[IM] = static_cast(other.real()); + return *this; + } + + /// \brief Inplace addition + /// + /// Add a scalar value to the real component of this imaginary number + /// + /// \param other Scalar value to add + /// \return *this + LIBRAPID_ALWAYS_INLINE auto operator+=(const T &other) -> Complex & { + m_val[RE] = m_val[RE] + other; + return *this; + } + + /// \brief Inplace subtraction + /// + /// Subtract a scalar value from the real component of this imaginary number + /// + /// \param other Scalar value to subtract + /// \return *this + LIBRAPID_ALWAYS_INLINE auto operator-=(const T &other) -> Complex & { + m_val[RE] = m_val[RE] - other; + return *this; + } + + /// \brief Inplace multiplication + /// + /// Multiply both the real and imaginary components of this complex number by a scalar + /// + /// \param other Scalar value to multiply by + /// \return *this + LIBRAPID_ALWAYS_INLINE auto operator*=(const T &other) -> Complex & { + m_val[RE] = m_val[RE] * other; + m_val[IM] = m_val[IM] * other; + return *this; + } + + /// \brief Inplace division + /// + /// Divide both the real and imaginary components of this complex number by a scalar + /// + /// \param other Scalar value to divide by + /// \return *this + LIBRAPID_ALWAYS_INLINE auto operator/=(const T &other) -> Complex & { + m_val[RE] = m_val[RE] / other; + m_val[IM] = m_val[IM] / other; + return *this; + } + + /// \brief Inplace addition + /// + /// Add a complex number to this one + /// + /// \param other Complex number to add + /// \return *this + LIBRAPID_ALWAYS_INLINE auto operator+=(const Complex &other) -> Complex & { + this->_add(other); + return *this; + } + + /// \brief Inplace subtraction + /// + /// Subtract a complex number from this one + /// + /// \param other Complex number to subtract + /// \return *this + LIBRAPID_ALWAYS_INLINE auto operator-=(const Complex &other) -> Complex & { + this->_sub(other); + return *this; + } + + /// \brief Inplace multiplication + /// + /// Multiply this complex number by another one + /// + /// \param other Complex number to multiply by + /// \return *this + LIBRAPID_ALWAYS_INLINE auto operator*=(const Complex &other) -> Complex & { + this->_mul(other); + return *this; + } + + /// \brief Inplace division + /// + /// Divide this complex number by another one + /// + /// \param other Complex number to divide by + /// \return *this + LIBRAPID_ALWAYS_INLINE auto operator/=(const Complex &other) -> Complex & { + this->_div(other); + return *this; + } + + /// \brief Cast to scalar types + /// + /// Cast this complex number to a scalar type. This will extract only the real component. + /// + /// \tparam To Type to cast to + /// \return Scalar + template + LIBRAPID_ALWAYS_INLINE explicit operator To() const { + return static_cast(m_val[RE]); + } + + /// \brief Cast to a complex number with a different scalar type + /// + /// Cast the real and imaginary components of this complex number to a different type and + /// return the result as a new complex number + /// + /// \tparam To Scalar type to cast to + /// \return Complex number + template + LIBRAPID_ALWAYS_INLINE explicit operator Complex() const { + return Complex(static_cast(m_val[RE]), static_cast(m_val[IM])); + } + + template + void str(const fmt::formatter &format, Ctx &ctx) const { + // Complex numbers are printed as (a +- bi) + + fmt::format_to(ctx.out(), "("); + format.format(m_val[RE], ctx); + if (m_val[IM] < 0) { + fmt::format_to(ctx.out(), "-"); + format.format(-m_val[IM], ctx); + } else { + fmt::format_to(ctx.out(), "+"); + format.format(m_val[IM], ctx); + } + fmt::format_to(ctx.out(), "i)"); + } + + protected: + /// \brief Add a complex number to this one + /// \tparam Other Scalar type of the other complex number + /// \param other Other complex number + template + LIBRAPID_ALWAYS_INLINE void _add(const Complex &other) { + m_val[RE] = m_val[RE] + other.real(); + m_val[IM] = m_val[IM] + other.imag(); + } + + /// \brief Subtract a complex number from this one + /// \tparam Other Scalar type of the other complex number + /// \param other Other complex number + template + LIBRAPID_ALWAYS_INLINE void _sub(const Complex &other) { + m_val[RE] = m_val[RE] - other.real(); + m_val[IM] = m_val[IM] - other.imag(); + } + + /// \brief Multiply this complex number by another one + /// \tparam Other Scalar type of the other complex number + /// \param other Other complex number + template + LIBRAPID_ALWAYS_INLINE void _mul(const Complex &other) { + T otherReal = static_cast(other.real()); + T otherImag = static_cast(other.imag()); + + T tmp = m_val[RE] * otherReal - m_val[IM] * otherImag; + m_val[IM] = m_val[RE] * otherImag + m_val[IM] * otherReal; + m_val[RE] = tmp; + } + + /// \brief Divide this complex number by another one + /// \tparam Other Scalar type of the other complex number + /// \param other Other complex number + template + LIBRAPID_ALWAYS_INLINE void _div(const Complex &other) { + T otherReal = static_cast(other.real()); + T otherImag = static_cast(other.imag()); + + if (::librapid::isNaN(otherReal) || ::librapid::isNaN(otherImag)) { // Set result to NaN + m_val[RE] = typetraits::NumericInfo::quietNaN(); + m_val[IM] = m_val[RE]; + } else if ((otherImag < 0 ? T(-otherImag) + : T(+otherImag)) < // |other.imag()| < |other.real()| + (otherReal < 0 ? T(-otherReal) : T(+otherReal))) { + T wr = otherImag / otherReal; + T wd = otherReal + wr * otherImag; + + if (::librapid::isNaN(wd) || wd == 0) { // NaN result + m_val[RE] = typetraits::NumericInfo::quietNaN(); + m_val[IM] = m_val[RE]; + } else { // Valid result + T tmp = (m_val[RE] + m_val[IM] * wr) / wd; + m_val[IM] = (m_val[IM] - m_val[RE] * wr) / wd; + m_val[RE] = tmp; + } + } else if (otherImag == 0) { // Set NaN + m_val[RE] = typetraits::NumericInfo::quietNaN(); + m_val[IM] = m_val[RE]; + } else { // 0 < |other.real()| <= |other.imag()| + T wr = otherReal / otherImag; + T wd = otherImag + wr * otherReal; + + if (::librapid::isNaN(wd) || wd == 0) { // NaN result + m_val[RE] = typetraits::NumericInfo::quietNaN(); + m_val[IM] = m_val[RE]; + } else { + T tmp = (m_val[RE] * wr + m_val[IM]) / wd; + m_val[IM] = (m_val[IM] * wr - m_val[RE]) / wd; + m_val[RE] = tmp; + } + } + } + + private: + T m_val[2]; + static constexpr size_t RE = 0; + static constexpr size_t IM = 1; + }; + + /// \brief Negate a complex number + /// \tparam T Scalar type of the complex number + /// \param other Complex number to negate + /// \return Negated complex number + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(const Complex &other) + -> Complex { + return {-other.real(), -other.imag()}; + } + + /// \brief Add two complex numbers + /// + /// Add two complex numbers together, returning the result + /// + /// \tparam L Scalar type of LHS + /// \tparam R Scalar type of RHS + /// \param left LHS complex number + /// \param right RHS complex number + /// \return Sum of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator+(const Complex &left, + const Complex &right) { + using Scalar = typename std::common_type_t; + Complex tmp(left.real(), left.imag()); + tmp += Complex(right.real(), right.imag()); + return tmp; + } + + /// \brief Add a complex number and a scalar + /// + /// Add a real number to the real component of a complex number, returning the result + /// + /// \tparam T Scalar type of the complex number + /// \tparam R Type of the real number + /// \param left LHS complex number + /// \param right RHS scalar + /// \return Sum of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator+(const Complex &left, + const R &right) { + Complex tmp(left); + tmp.real(tmp.real() + right); + return tmp; + } + + /// \brief Add a scalar to a complex number + /// + /// Add a real number to the real component of a complex number, returning the result + /// + /// \tparam R Type of the real number + /// \tparam T Scalar type of the complex number + /// \param real LHS scalar + /// \param right RHS complex number + /// \return Sum of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator+(const R &real, + const Complex &right) { + Complex tmp(real); + tmp += right; + return tmp; + } + + /// \brief Subtract a complex number from another complex number + /// + /// Subtract the real and imaginary components of the RHS complex number from the corresponding + /// components of the LHS complex number, returning the result + /// + /// \tparam L Scalar type of the LHS complex number + /// \tparam R Scalar type of the RHS complex number + /// \param left LHS complex number + /// \param right RHS complex number + /// \return Difference of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(const Complex &left, + const Complex &right) { + using Scalar = typename std::common_type_t; + Complex tmp(left.real(), left.imag()); + tmp -= Complex(right.real(), right.imag()); + return tmp; + } + + /// \brief Subtract a scalar from a complex number + /// + /// Subtract a real number from the real component of a complex number, returning the result + /// + /// \tparam T Scalar type of the complex number + /// \tparam R Type of the real number + /// \param left LHS complex number + /// \param real RHS scalar + /// \return Difference of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(const Complex &left, + const R &real) { + Complex tmp(left); + tmp -= real; + return tmp; + } + + /// \brief Subtract a complex number from a scalar + /// + /// Subtract the real and imaginary components of the RHS complex number from a real number, + /// returning the result + /// + /// \tparam T Scalar type of the complex number + /// \tparam R Type of the real number + /// \param real LHS scalar + /// \param right RHS complex number + /// \return Difference of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator-(const R &real, + const Complex &right) { + Complex tmp(real); + tmp -= right; + return tmp; + } + + /// \brief Multiply two complex numbers + /// + /// Multiply the LHS and RHS complex numbers, returning the result + /// + /// \tparam L Scalar type of the LHS complex number + /// \tparam R Scalar type of the RHS complex number + /// \param left LHS complex number + /// \param right RHS complex number + /// \return Product of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator*(const Complex &left, + const Complex &right) { + using Scalar = typename std::common_type_t; + Complex tmp(left.real(), left.imag()); + tmp *= Complex(right.real(), right.imag()); + return tmp; + } + + /// \brief Multiply a complex number by a scalar + /// + /// Multiply the real and imaginary components of a complex number by a real number, returning + /// the result + /// + /// \tparam T Scalar type of the complex number + /// \tparam R Type of the real number + /// \param left LHS complex number + /// \param real RHS scalar + /// \return Product of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator*(const Complex &left, + const R &real) { + Complex tmp(left); + tmp *= real; + return tmp; + } + + /// \brief Multiply a scalar by a complex number + /// + /// Multiply a real number by the real and imaginary components of a complex number, returning + /// the result + /// + /// \tparam R Type of the real number + /// \tparam T Scalar type of the complex number + /// \param real LHS scalar + /// \param right RHS complex number + /// \return Product of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator*(const R &real, + const Complex &right) { + Complex tmp(real); + tmp *= right; + return tmp; + } + + /// \brief Divide two complex numbers + /// + /// Divide the LHS complex number by the RHS complex number, returning the result + /// + /// \tparam L Scalar type of the LHS complex number + /// \tparam R Scalar type of the RHS complex number + /// \param left LHS complex number + /// \param right RHS complex number + /// \return Quotient of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator/(const Complex &left, + const Complex &right) { + using Scalar = typename std::common_type_t; + Complex tmp(left.real(), left.imag()); + tmp /= Complex(right.real(), right.imag()); + return tmp; + } + + /// \brief Divide a complex number by a scalar + /// + /// Divide the real and imaginary components of a complex number by a real number, returning the + /// result + /// + /// \tparam T Scalar type of the complex number + /// \tparam R Type of the real number + /// \param left LHS complex number + /// \param real RHS scalar + /// \return Quotient of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator/(const Complex &left, + const R &real) { + Complex tmp(left); + tmp /= real; + return tmp; + } + + /// \brief Divide a scalar by a complex number + /// + /// Divide a real number by the real and imaginary components of a complex number, returning the + /// result + /// + /// \tparam T Scalar type of the complex number + /// \tparam R Type of the real number + /// \param real LHS scalar + /// \param right RHS complex number + /// \return Quotient of LHS and RHS + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto operator/(const R &real, + const Complex &right) { + Complex tmp(real); + tmp /= right; + return tmp; + } + + /// \brief Equality comparison of two complex numbers + /// \tparam L Scalar type of LHS complex number + /// \tparam R Scalar type of RHS complex number + /// \param left LHS complex number + /// \param right RHS complex number + /// \return true if equal, false otherwise + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator==(const Complex &left, + const Complex &right) { + return left.real() == right.real() && left.imag() == right.imag(); + } + + /// \brief Equality comparison of complex number and scalar + /// + /// Compares the real component of the complex number to the scalar, and the imaginary component + /// to zero. Returns true if and only if both comparisons are true. + /// + /// \tparam T Scalar type of complex number + /// \param left LHS complex number + /// \param right RHS scalar + /// \return true if equal, false otherwise + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator==(const Complex &left, + T &right) { + return left.real() == right && left.imag() == 0; + } #if !defined(LIBRAPID_CXX_20) - /// \brief Equality comparison of scalar and complex number - /// - /// Compares the real component of the complex number to the scalar, and the imaginary component - /// to zero. Returns true if and only if both comparisons are true. - /// - /// \tparam T Scalar type of complex number - /// \param left LHS scalar - /// \param right RHS complex number - /// \return true if equal, false otherwise - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator==(const T &left, - const Complex &right) { - return left == right.real() && 0 == right.imag(); - } + /// \brief Equality comparison of scalar and complex number + /// + /// Compares the real component of the complex number to the scalar, and the imaginary component + /// to zero. Returns true if and only if both comparisons are true. + /// + /// \tparam T Scalar type of complex number + /// \param left LHS scalar + /// \param right RHS complex number + /// \return true if equal, false otherwise + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator==(const T &left, + const Complex &right) { + return left == right.real() && 0 == right.imag(); + } #endif #if !defined(LIBRAPID_CXX_20) - /// \brief Inequality comparison of two complex numbers - /// \tparam T Scalar type of complex number - /// \param left LHS complex number - /// \param right RHS complex number - /// \return true if ***not*** equal, false otherwise - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator!=(const Complex &left, - const Complex &right) { - return !(left == right); - } - - /// \brief Inequality comparison of complex number and scalar - /// \see operator==(const Complex &, T &) - /// \tparam T Scalar type of complex number - /// \param left LHS complex number - /// \param right RHS scalar - /// \return true if ***not*** equal, false otherwise - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator!=(const Complex &left, - T &right) { - return !(left == right); - } - - /// \brief Inequality comparison of scalar and complex number - /// \see operator==(const T &, const Complex &) - /// \tparam T Scalar type of complex number - /// \param left LHS scalar - /// \param right RHS complex number - /// \return true if ***not*** equal, false otherwise - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator!=(const T &left, - const Complex &right) { - return !(left == right); - } + /// \brief Inequality comparison of two complex numbers + /// \tparam T Scalar type of complex number + /// \param left LHS complex number + /// \param right RHS complex number + /// \return true if ***not*** equal, false otherwise + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator!=(const Complex &left, + const Complex &right) { + return !(left == right); + } + + /// \brief Inequality comparison of complex number and scalar + /// \see operator==(const Complex &, T &) + /// \tparam T Scalar type of complex number + /// \param left LHS complex number + /// \param right RHS scalar + /// \return true if ***not*** equal, false otherwise + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator!=(const Complex &left, + T &right) { + return !(left == right); + } + + /// \brief Inequality comparison of scalar and complex number + /// \see operator==(const T &, const Complex &) + /// \tparam T Scalar type of complex number + /// \param left LHS scalar + /// \param right RHS complex number + /// \return true if ***not*** equal, false otherwise + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE constexpr bool operator!=(const T &left, + const Complex &right) { + return !(left == right); + } #endif - /// \brief Return \f$ \mathrm{Re}(z) \f$ - /// \tparam T Scalar type of the complex number - /// \param val Complex number - /// \return Real component of the complex number - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T real(const Complex &val) { - return val.real(); - } - - /// \brief Return \f$ \mathrm{Im}(z) \f$ - /// \tparam T Scalar type of the complex number - /// \param val Complex number - /// \return Imaginary component of the complex number - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T imag(const Complex &val) { - return val.imag(); - } - - /// \brief Return \f$ \sqrt{z} \f$ - /// \tparam T Scalar type of the complex number - /// \param val Complex number - /// \return Square root of the complex number - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex - sqrt(const Complex &val); // Defined later - - /// \brief Return \f$ \sqrt{\mathrm{Re}(z)^2 + \mathrm{Im}(z)^2} \f$ - /// \tparam T Scalar type of the complex number - /// \param val Complex number - /// \return Absolute value of the complex number - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T abs(const Complex &val) { - return ::librapid::hypot(val.real(), val.imag()); - } - - /// \brief Returns \f$z^{*}\f$ - /// \tparam T Scalar type of the complex number - /// \param val Complex number - /// \return Complex conjugate of the complex number - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex conj(const Complex &val) { - return Complex(val.real(), -val.imag()); - } - - /// \brief Compute the complex arc cosine of a complex number - /// - /// This function computes the complex arc cosine of the input complex number, - /// \f$\text{acos}(z)\f$ - /// - /// The algorithm handles NaN and infinity values, and avoids overflow. - /// - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex arc cosine of the input complex number - template - LIBRAPID_NODISCARD Complex acos(const Complex &other) { - const T arcBig = T(0.25) * ::librapid::sqrt(typetraits::TypeInfo::max()); - const T pi = []() { + /// \brief Return \f$ \mathrm{Re}(z) \f$ + /// \tparam T Scalar type of the complex number + /// \param val Complex number + /// \return Real component of the complex number + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T real(const Complex &val) { + return val.real(); + } + + /// \brief Return \f$ \mathrm{Im}(z) \f$ + /// \tparam T Scalar type of the complex number + /// \param val Complex number + /// \return Imaginary component of the complex number + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T imag(const Complex &val) { + return val.imag(); + } + + /// \brief Return \f$ \sqrt{z} \f$ + /// \tparam T Scalar type of the complex number + /// \param val Complex number + /// \return Square root of the complex number + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex + sqrt(const Complex &val); // Defined later + + /// \brief Return \f$ \sqrt{\mathrm{Re}(z)^2 + \mathrm{Im}(z)^2} \f$ + /// \tparam T Scalar type of the complex number + /// \param val Complex number + /// \return Absolute value of the complex number + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T abs(const Complex &val) { + return ::librapid::hypot(val.real(), val.imag()); + } + + /// \brief Returns \f$z^{*}\f$ + /// \tparam T Scalar type of the complex number + /// \param val Complex number + /// \return Complex conjugate of the complex number + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex conj(const Complex &val) { + return Complex(val.real(), -val.imag()); + } + + /// \brief Compute the complex arc cosine of a complex number + /// + /// This function computes the complex arc cosine of the input complex number, + /// \f$\text{acos}(z)\f$ + /// + /// The algorithm handles NaN and infinity values, and avoids overflow. + /// + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex arc cosine of the input complex number + template + LIBRAPID_NODISCARD Complex acos(const Complex &other) { + const T arcBig = T(0.25) * ::librapid::sqrt(typetraits::NumericInfo::max()); + const T pi = []() { #if defined(LIBRAPID_USE_MULTIPREC) - if constexpr (std::is_same_v) - return ::librapid::constPi(); - else - return static_cast(3.1415926535897932384626433832795029L); + if constexpr (std::is_same_v) + return ::librapid::constPi(); + else + return static_cast(3.1415926535897932384626433832795029L); #else - return static_cast(3.1415926535897932384626433832795029L); + return static_cast(3.1415926535897932384626433832795029L); #endif - }(); - - const T re = real(other); - const T im = imag(other); - T ux, vx; - - if (::librapid::isNaN(re) || ::librapid::isNaN(im)) { // At least one NaN - ux = typetraits::TypeInfo::quietNaN(); - vx = ux; - } else if (::librapid::isInf(re)) { // +/- Inf - if (::librapid::isInf(im)) { - if (re < 0) - ux = T(0.75) * pi; // (-Inf, +/-Inf) - else - ux = T(0.25) * pi; // (-Inf, +/-Inf) - } else if (re < 0) { - ux = pi; // (-Inf, finite) - } else { - ux = 0; // (+Inf, finite) - } - vx = -::librapid::copySign(typetraits::TypeInfo::infinity(), im); - } else if (::librapid::isInf(im)) { // finite, Inf) - ux = T(0.5) * pi; // (finite, +/-Inf) - vx = -im; - } else { // (finite, finite) - const Complex wx = sqrt(Complex(1 + re, -im)); - const Complex zx = sqrt(Complex(1 - re, -im)); - const T wr = real(wx); - const T wi = imag(wx); - const T zr = real(zx); - const T zi = imag(zx); - T alpha, beta; - - ux = 2 * ::librapid::atan2(zr, wr); - - if (arcBig < wr) { // Real part is large - alpha = wr; - beta = zi + wi * (zr / alpha); - } else if (arcBig < wi) { // Imaginary part is large - alpha = wi; - beta = wr * (zi / alpha) + zr; - } else if (wi < -arcBig) { // Imaginary part of w is large negative - alpha = -wi; - beta = wr * (zi / alpha) - zr; - } else { // Shouldn't overflow (?) - alpha = 0; - beta = wr * zi + wi * zr; // Im(w * z) - } - - vx = ::librapid::asinh(beta); - if (alpha != 0) { - // asinh(a * b) = asinh(a) + log(b) - if (0 <= vx) - vx += ::librapid::log(alpha); - else - vx -= ::librapid::log(alpha); - } - } - return Complex(ux, vx); - } - - /// \brief Compute the complex hyperbolic arc cosine of a complex number - /// - /// - /// This function computes the complex area hyperbolic cosine of the input complex number, - /// \f$ \text{acosh}(z) \f$ - /// - /// The algorithm handles NaN and infinity values, and avoids overflow. - /// - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex area hyperbolic cosine of the input complex number - template - LIBRAPID_NODISCARD Complex acosh(const Complex &other) { - const T arcBig = T(0.25) * ::librapid::sqrt(typetraits::TypeInfo::max()); - const T pi = []() { + }(); + + const T re = real(other); + const T im = imag(other); + T ux, vx; + + if (::librapid::isNaN(re) || ::librapid::isNaN(im)) { // At least one NaN + ux = typetraits::NumericInfo::quietNaN(); + vx = ux; + } else if (::librapid::isInf(re)) { // +/- Inf + if (::librapid::isInf(im)) { + if (re < 0) + ux = T(0.75) * pi; // (-Inf, +/-Inf) + else + ux = T(0.25) * pi; // (-Inf, +/-Inf) + } else if (re < 0) { + ux = pi; // (-Inf, finite) + } else { + ux = 0; // (+Inf, finite) + } + vx = -::librapid::copySign(typetraits::NumericInfo::infinity(), im); + } else if (::librapid::isInf(im)) { // finite, Inf) + ux = T(0.5) * pi; // (finite, +/-Inf) + vx = -im; + } else { // (finite, finite) + const Complex wx = sqrt(Complex(1 + re, -im)); + const Complex zx = sqrt(Complex(1 - re, -im)); + const T wr = real(wx); + const T wi = imag(wx); + const T zr = real(zx); + const T zi = imag(zx); + T alpha, beta; + + ux = 2 * ::librapid::atan2(zr, wr); + + if (arcBig < wr) { // Real part is large + alpha = wr; + beta = zi + wi * (zr / alpha); + } else if (arcBig < wi) { // Imaginary part is large + alpha = wi; + beta = wr * (zi / alpha) + zr; + } else if (wi < -arcBig) { // Imaginary part of w is large negative + alpha = -wi; + beta = wr * (zi / alpha) - zr; + } else { // Shouldn't overflow (?) + alpha = 0; + beta = wr * zi + wi * zr; // Im(w * z) + } + + vx = ::librapid::asinh(beta); + if (alpha != 0) { + // asinh(a * b) = asinh(a) + log(b) + if (0 <= vx) + vx += ::librapid::log(alpha); + else + vx -= ::librapid::log(alpha); + } + } + return Complex(ux, vx); + } + + /// \brief Compute the complex hyperbolic arc cosine of a complex number + /// + /// + /// This function computes the complex area hyperbolic cosine of the input complex number, + /// \f$ \text{acosh}(z) \f$ + /// + /// The algorithm handles NaN and infinity values, and avoids overflow. + /// + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex area hyperbolic cosine of the input complex number + template + LIBRAPID_NODISCARD Complex acosh(const Complex &other) { + const T arcBig = T(0.25) * ::librapid::sqrt(typetraits::NumericInfo::max()); + const T pi = []() { #if defined(LIBRAPID_USE_MULTIPREC) - if constexpr (std::is_same_v) - return ::librapid::constPi(); - else - return static_cast(3.1415926535897932384626433832795029L); + if constexpr (std::is_same_v) + return ::librapid::constPi(); + else + return static_cast(3.1415926535897932384626433832795029L); #else - return static_cast(3.1415926535897932384626433832795029L); + return static_cast(3.1415926535897932384626433832795029L); #endif - }(); - - const T re = real(other); - T im = imag(other); - T ux, vx; - - if (::librapid::isNaN(re) || ::librapid::isNaN(im)) { // At least one NaN - ux = typetraits::TypeInfo::quietNaN(); - vx = ux; - } else if (::librapid::isInf(re)) { // (+/-Inf, not NaN) - ux = typetraits::TypeInfo::infinity(); - if (::librapid::isInf(im)) { - if (re < 0) - vx = T(0.75) * pi; // (-Inf, +/-Inf) - else - vx = T(0.25) * pi; // (+Inf, +/-Inf) - } else if (re < 0) { - vx = pi; // (-Inf, finite) - } else { - vx = 0; // (+Inf, finite) - } - vx = ::librapid::copySign(vx, im); - } else { // (finite, finite) - const Complex wx = sqrt(Complex(re - 1, -im)); - const Complex zx = sqrt(Complex(re + 1, im)); - const T wr = real(wx); - const T wi = imag(wx); - const T zr = real(zx); - const T zi = imag(zx); - T alpha, beta; - - if (arcBig < wr) { // Real parts large - alpha = wr; - beta = zr - wi * (zi / alpha); - } else if (arcBig < wi) { // Imaginary parts large - alpha = wi; - beta = wr * (zr / alpha) - zi; - } else { // Shouldn't overflow (?) - alpha = 0; - beta = wr * zr - wi * zi; // Re(w * z) - } - - ux = ::librapid::asinh(beta); - if (alpha != 0) { - if (0 <= ux) - ux += ::librapid::log(alpha); - else - ux -= ::librapid::log(alpha); - } - vx = 2 * ::librapid::atan2(imag(sqrt(Complex(re - 1, im))), zr); - } - return Complex(ux, vx); - } - - /// \brief Compute the complex arc hyperbolic sine of a complex number - /// - /// This function computes the complex arc hyperbolic sine of the input complex number, - /// \f$ \text{asinh}(z) \f$ - /// - /// The algorithm handles NaN and infinity values, and avoids overflow. - /// - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex arc hyperbolic sine of the input complex number - template - LIBRAPID_NODISCARD Complex asinh(const Complex &other) { - const T arcBig = T(0.25) * ::librapid::sqrt(typetraits::TypeInfo::max()); - const T pi = []() { + }(); + + const T re = real(other); + T im = imag(other); + T ux, vx; + + if (::librapid::isNaN(re) || ::librapid::isNaN(im)) { // At least one NaN + ux = typetraits::NumericInfo::quietNaN(); + vx = ux; + } else if (::librapid::isInf(re)) { // (+/-Inf, not NaN) + ux = typetraits::NumericInfo::infinity(); + if (::librapid::isInf(im)) { + if (re < 0) + vx = T(0.75) * pi; // (-Inf, +/-Inf) + else + vx = T(0.25) * pi; // (+Inf, +/-Inf) + } else if (re < 0) { + vx = pi; // (-Inf, finite) + } else { + vx = 0; // (+Inf, finite) + } + vx = ::librapid::copySign(vx, im); + } else { // (finite, finite) + const Complex wx = sqrt(Complex(re - 1, -im)); + const Complex zx = sqrt(Complex(re + 1, im)); + const T wr = real(wx); + const T wi = imag(wx); + const T zr = real(zx); + const T zi = imag(zx); + T alpha, beta; + + if (arcBig < wr) { // Real parts large + alpha = wr; + beta = zr - wi * (zi / alpha); + } else if (arcBig < wi) { // Imaginary parts large + alpha = wi; + beta = wr * (zr / alpha) - zi; + } else { // Shouldn't overflow (?) + alpha = 0; + beta = wr * zr - wi * zi; // Re(w * z) + } + + ux = ::librapid::asinh(beta); + if (alpha != 0) { + if (0 <= ux) + ux += ::librapid::log(alpha); + else + ux -= ::librapid::log(alpha); + } + vx = 2 * ::librapid::atan2(imag(sqrt(Complex(re - 1, im))), zr); + } + return Complex(ux, vx); + } + + /// \brief Compute the complex arc hyperbolic sine of a complex number + /// + /// This function computes the complex arc hyperbolic sine of the input complex number, + /// \f$ \text{asinh}(z) \f$ + /// + /// The algorithm handles NaN and infinity values, and avoids overflow. + /// + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex arc hyperbolic sine of the input complex number + template + LIBRAPID_NODISCARD Complex asinh(const Complex &other) { + const T arcBig = T(0.25) * ::librapid::sqrt(typetraits::NumericInfo::max()); + const T pi = []() { #if defined(LIBRAPID_USE_MULTIPREC) - if constexpr (std::is_same_v) - return ::librapid::constPi(); - else - return static_cast(3.1415926535897932384626433832795029L); + if constexpr (std::is_same_v) + return ::librapid::constPi(); + else + return static_cast(3.1415926535897932384626433832795029L); #else - return static_cast(3.1415926535897932384626433832795029L); + return static_cast(3.1415926535897932384626433832795029L); #endif - }(); - - const T re = real(other); - T im = imag(other); - T ux, vx; - - if (::librapid::isNaN(re) || ::librapid::isNaN(im)) { // At least one NaN/Inf - ux = typetraits::TypeInfo::quietNaN(); - vx = ux; - } else if (::librapid::isInf(re)) { // (+/-Inf, not NaN) - if (::librapid::isInf(im)) { // (+/-Inf, +/-Inf) - ux = re; - vx = ::librapid::copySign(T(0.25) * pi, im); - } else { // (+/-Inf, finite) - ux = re; - vx = ::librapid::copySign(T(0), im); - } - } else if (::librapid::isInf(im)) { - ux = ::librapid::copySign(typetraits::TypeInfo::infinity(), re); - vx = ::librapid::copySign(T(0.5) * pi, im); - } else { // (finite, finite) - const Complex wx = sqrt(Complex(1 - im, re)); - const Complex zx = sqrt(Complex(1 + im, -re)); - const T wr = real(wx); - const T wi = imag(wx); - const T zr = real(zx); - const T zi = imag(zx); - T alpha, beta; - - if (arcBig < wr) { // Real parts are large - alpha = wr; - beta = wi * (zr / alpha) - zi; - } else if (arcBig < wi) { // Imaginary parts are large - alpha = wi; - beta = zr - wr * (zi / alpha); - } else if (wi < -arcBig) { - alpha = -wi; - beta = -zr - wr * (zi / alpha); - } else { // Shouldn't overflow (?) - alpha = 0; - beta = wi * zr - wr * zi; // Im(w * conj(z)) - } - - ux = ::librapid::asinh(beta); - if (alpha != 0) { - if (0 <= ux) - ux += ::librapid::log(alpha); - else - ux -= ::librapid::log(alpha); - } - vx = ::librapid::atan2(im, real(wx * zx)); - } - return Complex(ux, vx); - } - - /// \brief Compute the complex arc sine of a complex number - /// - /// This function computes the complex arc sine of the input complex number, - /// \f$ \text{asin}(z) \f$ - /// - /// It calculates the complex arc sine by using the complex hyperbolic sine function. - /// - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex arc sine of the input complex number - /// \see asinh - template - LIBRAPID_NODISCARD Complex asin(const Complex &other) { - Complex asinhVal = asinh(Complex(-imag(other), real(other))); - return Complex(imag(asinhVal), -real(asinhVal)); - } - - /// \brief Compute the complex arc hyperbolic tangent of a complex number - /// - /// This function computes the complex arc hyperbolic tangent of the input complex number, - /// \f$ \text{atanh}(z) \f$ - /// - /// This function performs error checking and supports NaNs and Infs. - /// - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex arc hyperbolic tangent of the input complex number - template - LIBRAPID_NODISCARD Complex atanh(const Complex &other) { - const T arcBig = T(0.25) * ::librapid::sqrt(typetraits::TypeInfo::max()); - const T piBy2 = []() { + }(); + + const T re = real(other); + T im = imag(other); + T ux, vx; + + if (::librapid::isNaN(re) || ::librapid::isNaN(im)) { // At least one NaN/Inf + ux = typetraits::NumericInfo::quietNaN(); + vx = ux; + } else if (::librapid::isInf(re)) { // (+/-Inf, not NaN) + if (::librapid::isInf(im)) { // (+/-Inf, +/-Inf) + ux = re; + vx = ::librapid::copySign(T(0.25) * pi, im); + } else { // (+/-Inf, finite) + ux = re; + vx = ::librapid::copySign(T(0), im); + } + } else if (::librapid::isInf(im)) { + ux = ::librapid::copySign(typetraits::NumericInfo::infinity(), re); + vx = ::librapid::copySign(T(0.5) * pi, im); + } else { // (finite, finite) + const Complex wx = sqrt(Complex(1 - im, re)); + const Complex zx = sqrt(Complex(1 + im, -re)); + const T wr = real(wx); + const T wi = imag(wx); + const T zr = real(zx); + const T zi = imag(zx); + T alpha, beta; + + if (arcBig < wr) { // Real parts are large + alpha = wr; + beta = wi * (zr / alpha) - zi; + } else if (arcBig < wi) { // Imaginary parts are large + alpha = wi; + beta = zr - wr * (zi / alpha); + } else if (wi < -arcBig) { + alpha = -wi; + beta = -zr - wr * (zi / alpha); + } else { // Shouldn't overflow (?) + alpha = 0; + beta = wi * zr - wr * zi; // Im(w * conj(z)) + } + + ux = ::librapid::asinh(beta); + if (alpha != 0) { + if (0 <= ux) + ux += ::librapid::log(alpha); + else + ux -= ::librapid::log(alpha); + } + vx = ::librapid::atan2(im, real(wx * zx)); + } + return Complex(ux, vx); + } + + /// \brief Compute the complex arc sine of a complex number + /// + /// This function computes the complex arc sine of the input complex number, + /// \f$ \text{asin}(z) \f$ + /// + /// It calculates the complex arc sine by using the complex hyperbolic sine function. + /// + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex arc sine of the input complex number + /// \see asinh + template + LIBRAPID_NODISCARD Complex asin(const Complex &other) { + Complex asinhVal = asinh(Complex(-imag(other), real(other))); + return Complex(imag(asinhVal), -real(asinhVal)); + } + + /// \brief Compute the complex arc hyperbolic tangent of a complex number + /// + /// This function computes the complex arc hyperbolic tangent of the input complex number, + /// \f$ \text{atanh}(z) \f$ + /// + /// This function performs error checking and supports NaNs and Infs. + /// + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex arc hyperbolic tangent of the input complex number + template + LIBRAPID_NODISCARD Complex atanh(const Complex &other) { + const T arcBig = T(0.25) * ::librapid::sqrt(typetraits::NumericInfo::max()); + const T piBy2 = []() { #if defined(LIBRAPID_USE_MULTIPREC) - if constexpr (std::is_same_v) - return ::librapid::constPi() / 2; - else - return static_cast(1.5707963267948966192313216916397514L); + if constexpr (std::is_same_v) + return ::librapid::constPi() / 2; + else + return static_cast(1.5707963267948966192313216916397514L); #else - return static_cast(1.5707963267948966192313216916397514L); + return static_cast(1.5707963267948966192313216916397514L); #endif - }(); - - T re = real(other); - T im = imag(other); - T ux, vx; - - if (::librapid::isNaN(re) || ::librapid::isNaN(im)) { // At least one NaN - ux = typetraits::TypeInfo::quietNaN(); - vx = ux; - } else if (::librapid::isInf(re)) { // (+/-Inf, not NaN) - ux = ::librapid::copySign(T(0), re); - vx = ::librapid::copySign(piBy2, im); - } else { // (finite, not NaN) - const T magIm = ::librapid::abs(im); - const T oldRe = re; - - re = ::librapid::abs(re); - - if (arcBig < re) { // |re| is large - T fx = im / re; - ux = 1 / re / (1 + fx * fx); - vx = ::librapid::copySign(piBy2, im); - } else if (arcBig < magIm) { // |im| is large - T fx = re / im; - ux = fx / im / (1 + fx * fx); - vx = ::librapid::copySign(piBy2, im); - } else if (re != 1) { // |re| is small - T reFrom1 = 1 - re; - T imEps2 = magIm * magIm; - ux = T(0.25) * detail::algorithm::logP1(4 * re / (reFrom1 * reFrom1 + imEps2)); - vx = T(0.5) * ::librapid::atan2(2 * im, reFrom1 * (1 + re) - imEps2); - } else if (im == 0) { // {+/-1, 0) - ux = typetraits::TypeInfo::infinity(); - vx = im; - } else { // (+/-1, nonzero) - ux = ::librapid::log(::librapid::sqrt(::librapid::sqrt(4 + im * im)) / - ::librapid::sqrt(magIm)); - vx = ::librapid::copySign(T(0.5) * (piBy2 + ::librapid::atan2(magIm, T(2))), im); - } - ux = ::librapid::copySign(ux, oldRe); - } - return Complex(ux, vx); - } - - /// \brief Compute the complex arc tangent of a complex number - /// - /// This function computes the complex arc tangent of the input complex number, - /// \f$ \text{atan}(z) \f$ - /// - /// The algorithm handles NaN and infinity values, and avoids overflow. - /// - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex arc tangent of the input complex number - template - LIBRAPID_NODISCARD Complex atan(const Complex &other) { - Complex atanhVal = ::librapid::atanh(Complex(-imag(other), real(other))); - return Complex(imag(atanhVal), -real(atanhVal)); - } - - /// \brief Compute the complex hyperbolic cosine of a complex number - /// - /// This function computes the complex hyperbolic cosine of the input complex number, - /// \f$ \text{cosh}(z) \f$ - /// - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex hyperbolic cosine of the input complex number - template - LIBRAPID_NODISCARD Complex cosh(const Complex &other) { - return Complex(::librapid::cosh(real(other)) * ::librapid::cos(imag(other)), - ::librapid::sinh(real(other)) * ::librapid::sin(imag(other))); - } - - template - LIBRAPID_NODISCARD Complex polarPositiveNanInfZeroRho(const T &rho, const T &theta) { - // Rho is +NaN/+Inf/+0 - if (::librapid::isNaN(theta) || ::librapid::isInf(theta)) { // Theta is NaN/Inf - if (::librapid::isInf(rho)) { - return Complex(rho, ::librapid::sin(theta)); // (Inf, NaN/Inf) - } else { - return Complex(rho, ::librapid::copySign(rho, theta)); // (NaN/0, NaN/Inf) - } - } else if (theta == T(0)) { // Theta is zero - return Complex(rho, theta); // (NaN/Inf/0, 0) - } else { // Theta is finite non-zero - // (NaN/Inf/0, finite non-zero) - return Complex(rho * ::librapid::cos(theta), rho * ::librapid::sin(theta)); - } - } - - /// \brief Compute the complex exponential of a complex number - /// - /// This function computes the complex exponential of the input complex number, - /// \f$ e^z \f$ - /// - /// The algorithm handles NaN and infinity values. - /// - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex exponential of the input complex number - template - LIBRAPID_NODISCARD Complex exp(const Complex &other) { - const T logRho = real(other); - const T theta = imag(other); - - if (!::librapid::isNaN(logRho) && !::librapid::isInf(logRho)) { // Real component is finite - T real = logRho; - T imag = logRho; - detail::algorithm::expMul(&real, static_cast(::librapid::cos(theta)), 0); - detail::algorithm::expMul(&imag, static_cast(::librapid::sin(theta)), 0); - return Complex(real, imag); - } - - // Real component is NaN/Inf - // Return polar(exp(re), im) - if (::librapid::isInf(logRho)) { - if (logRho < 0) { - return polarPositiveNanInfZeroRho(T(0), theta); // exp(-Inf) = +0 - } else { - return polarPositiveNanInfZeroRho(logRho, theta); // exp(+Inf) = +Inf - } - } else { - return polarPositiveNanInfZeroRho(static_cast(::librapid::abs(logRho)), - theta); // exp(NaN) = +NaN - } - } - - /// \brief Compute the complex exponential base 2 of a complex number - /// \see exp - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex exponential base 2 of the input complex number - template - LIBRAPID_NODISCARD Complex exp2(const Complex &other) { - return pow(T(2), other); - } - - /// \brief Compute the complex exponential base 10 of a complex number - /// \see exp - /// \tparam T Scalar type of the complex number - /// \param other Input complex number - /// \return Complex exponential base 10 of the input complex number - template - LIBRAPID_NODISCARD Complex exp10(const Complex &other) { - return pow(T(10), other); - } - - template - T _fabs(const Complex &other, int64_t *exp) { - *exp = 0; - T av = ::librapid::abs(real(other)); - T bv = ::librapid::abs(imag(other)); - - if (::librapid::isInf(av) || ::librapid::isInf(bv)) { - return typetraits::TypeInfo::infinity(); // At least one component is Inf - } else if (::librapid::isNaN(av)) { - return av; // Real component is NaN - } else if (::librapid::isNaN(bv)) { - return bv; // Imaginary component is NaN - } else { - if (av < bv) std::swap(av, bv); - if (av == 0) return av; // |0| = 0 - - if (1 <= av) { - *exp = 4; - av = av * T(0.0625); - bv = bv * T(0.0625); - } else { - const T fltEps = typetraits::TypeInfo::epsilon(); - const T legTiny = fltEps == 0 ? T(0) : 2 * typetraits::TypeInfo::min() / fltEps; - - if (av < legTiny) { - int64_t exponent; + }(); + + T re = real(other); + T im = imag(other); + T ux, vx; + + if (::librapid::isNaN(re) || ::librapid::isNaN(im)) { // At least one NaN + ux = typetraits::NumericInfo::quietNaN(); + vx = ux; + } else if (::librapid::isInf(re)) { // (+/-Inf, not NaN) + ux = ::librapid::copySign(T(0), re); + vx = ::librapid::copySign(piBy2, im); + } else { // (finite, not NaN) + const T magIm = ::librapid::abs(im); + const T oldRe = re; + + re = ::librapid::abs(re); + + if (arcBig < re) { // |re| is large + T fx = im / re; + ux = 1 / re / (1 + fx * fx); + vx = ::librapid::copySign(piBy2, im); + } else if (arcBig < magIm) { // |im| is large + T fx = re / im; + ux = fx / im / (1 + fx * fx); + vx = ::librapid::copySign(piBy2, im); + } else if (re != 1) { // |re| is small + T reFrom1 = 1 - re; + T imEps2 = magIm * magIm; + ux = T(0.25) * detail::algorithm::logP1(4 * re / (reFrom1 * reFrom1 + imEps2)); + vx = T(0.5) * ::librapid::atan2(2 * im, reFrom1 * (1 + re) - imEps2); + } else if (im == 0) { // {+/-1, 0) + ux = typetraits::NumericInfo::infinity(); + vx = im; + } else { // (+/-1, nonzero) + ux = ::librapid::log(::librapid::sqrt(::librapid::sqrt(4 + im * im)) / + ::librapid::sqrt(magIm)); + vx = ::librapid::copySign(T(0.5) * (piBy2 + ::librapid::atan2(magIm, T(2))), im); + } + ux = ::librapid::copySign(ux, oldRe); + } + return Complex(ux, vx); + } + + /// \brief Compute the complex arc tangent of a complex number + /// + /// This function computes the complex arc tangent of the input complex number, + /// \f$ \text{atan}(z) \f$ + /// + /// The algorithm handles NaN and infinity values, and avoids overflow. + /// + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex arc tangent of the input complex number + template + LIBRAPID_NODISCARD Complex atan(const Complex &other) { + Complex atanhVal = ::librapid::atanh(Complex(-imag(other), real(other))); + return Complex(imag(atanhVal), -real(atanhVal)); + } + + /// \brief Compute the complex hyperbolic cosine of a complex number + /// + /// This function computes the complex hyperbolic cosine of the input complex number, + /// \f$ \text{cosh}(z) \f$ + /// + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex hyperbolic cosine of the input complex number + template + LIBRAPID_NODISCARD Complex cosh(const Complex &other) { + return Complex(::librapid::cosh(real(other)) * ::librapid::cos(imag(other)), + ::librapid::sinh(real(other)) * ::librapid::sin(imag(other))); + } + + template + LIBRAPID_NODISCARD Complex polarPositiveNanInfZeroRho(const T &rho, const T &theta) { + // Rho is +NaN/+Inf/+0 + if (::librapid::isNaN(theta) || ::librapid::isInf(theta)) { // Theta is NaN/Inf + if (::librapid::isInf(rho)) { + return Complex(rho, ::librapid::sin(theta)); // (Inf, NaN/Inf) + } else { + return Complex(rho, ::librapid::copySign(rho, theta)); // (NaN/0, NaN/Inf) + } + } else if (theta == T(0)) { // Theta is zero + return Complex(rho, theta); // (NaN/Inf/0, 0) + } else { // Theta is finite non-zero + // (NaN/Inf/0, finite non-zero) + return Complex(rho * ::librapid::cos(theta), rho * ::librapid::sin(theta)); + } + } + + /// \brief Compute the complex exponential of a complex number + /// + /// This function computes the complex exponential of the input complex number, + /// \f$ e^z \f$ + /// + /// The algorithm handles NaN and infinity values. + /// + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex exponential of the input complex number + template + LIBRAPID_NODISCARD Complex exp(const Complex &other) { + const T logRho = real(other); + const T theta = imag(other); + + if (!::librapid::isNaN(logRho) && !::librapid::isInf(logRho)) { // Real component is finite + T real = logRho; + T imag = logRho; + detail::algorithm::expMul(&real, static_cast(::librapid::cos(theta)), 0); + detail::algorithm::expMul(&imag, static_cast(::librapid::sin(theta)), 0); + return Complex(real, imag); + } + + // Real component is NaN/Inf + // Return polar(exp(re), im) + if (::librapid::isInf(logRho)) { + if (logRho < 0) { + return polarPositiveNanInfZeroRho(T(0), theta); // exp(-Inf) = +0 + } else { + return polarPositiveNanInfZeroRho(logRho, theta); // exp(+Inf) = +Inf + } + } else { + return polarPositiveNanInfZeroRho(static_cast(::librapid::abs(logRho)), + theta); // exp(NaN) = +NaN + } + } + + /// \brief Compute the complex exponential base 2 of a complex number + /// \see exp + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex exponential base 2 of the input complex number + template + LIBRAPID_NODISCARD Complex exp2(const Complex &other) { + return pow(T(2), other); + } + + /// \brief Compute the complex exponential base 10 of a complex number + /// \see exp + /// \tparam T Scalar type of the complex number + /// \param other Input complex number + /// \return Complex exponential base 10 of the input complex number + template + LIBRAPID_NODISCARD Complex exp10(const Complex &other) { + return pow(T(10), other); + } + + template + T _fabs(const Complex &other, int64_t *exp) { + *exp = 0; + T av = ::librapid::abs(real(other)); + T bv = ::librapid::abs(imag(other)); + + if (::librapid::isInf(av) || ::librapid::isInf(bv)) { + return typetraits::NumericInfo::infinity(); // At least one component is Inf + } else if (::librapid::isNaN(av)) { + return av; // Real component is NaN + } else if (::librapid::isNaN(bv)) { + return bv; // Imaginary component is NaN + } else { + if (av < bv) std::swap(av, bv); + if (av == 0) return av; // |0| = 0 + + if (1 <= av) { + *exp = 4; + av = av * T(0.0625); + bv = bv * T(0.0625); + } else { + const T fltEps = typetraits::NumericInfo::epsilon(); + const T legTiny = fltEps == 0 ? T(0) : 2 * typetraits::NumericInfo::min() / fltEps; + + if (av < legTiny) { + int64_t exponent; #if defined(LIBRAPID_USE_MULTIPREC) - if constexpr (std::is_same_v) { - exponent = -2 * ::mpfr::mpreal::get_default_prec(); - } else { - exponent = -2 * std::numeric_limits::digits; - } + if constexpr (std::is_same_v) { + exponent = -2 * ::mpfr::mpreal::get_default_prec(); + } else { + exponent = -2 * std::numeric_limits::digits; + } #else - exponent = -2 * std::numeric_limits::digits; + exponent = -2 * std::numeric_limits::digits; #endif - *exp = exponent; - av = ::librapid::ldexp(av, -exponent); - bv = ::librapid::ldexp(bv, -exponent); - } else { - *exp = -2; - av = av * 4; - bv = bv * 4; - } - } - - const T tmp = av - bv; - if (tmp == av) { - return av; // bv is unimportant - } else { + *exp = exponent; + av = ::librapid::ldexp(av, -exponent); + bv = ::librapid::ldexp(bv, -exponent); + } else { + *exp = -2; + av = av * 4; + bv = bv * 4; + } + } + + const T tmp = av - bv; + if (tmp == av) { + return av; // bv is unimportant + } else { #if defined(LIBRAPID_USE_MULTIPREC) - if constexpr (std::is_same_v) { // No approximations - const T root2 = ::librapid::sqrt(mpfr(2)); - const T onePlusRoot2 = root2 + 1; - - const T qv = tmp / bv; - const T rv = (qv + 2) * qv; - const T sv = rv / (root2 + ::librapid::sqrt(rv + 2)) + onePlusRoot2 + qv; - return av + bv / sv; - } else { + if constexpr (std::is_same_v) { // No approximations + const T root2 = ::librapid::sqrt(mpfr(2)); + const T onePlusRoot2 = root2 + 1; + + const T qv = tmp / bv; + const T rv = (qv + 2) * qv; + const T sv = rv / (root2 + ::librapid::sqrt(rv + 2)) + onePlusRoot2 + qv; + return av + bv / sv; + } else { #endif - if (bv < tmp) { // Use a simple approximation - const T qv = av / bv; - return av + bv / (qv + ::librapid::sqrt(qv * qv + 1)); - } else { // Use 1 1/2 precision to preserve bits - constexpr T root2 = static_cast(1.4142135623730950488016887242096981L); - constexpr T onePlusRoot2High = static_cast(10125945.0 / 4194304.0); - constexpr T onePlusRoot2Low = - static_cast(1.4341252375973918872420969807856967e-7L); - - const T qv = tmp / bv; - const T rv = (qv + 2) * qv; - const T sv = rv / (root2 + ::librapid::sqrt(rv + 2)) + onePlusRoot2Low + - qv + onePlusRoot2High; - return av + bv / sv; - } + if (bv < tmp) { // Use a simple approximation + const T qv = av / bv; + return av + bv / (qv + ::librapid::sqrt(qv * qv + 1)); + } else { // Use 1 1/2 precision to preserve bits + constexpr T root2 = static_cast(1.4142135623730950488016887242096981L); + constexpr T onePlusRoot2High = static_cast(10125945.0 / 4194304.0); + constexpr T onePlusRoot2Low = + static_cast(1.4341252375973918872420969807856967e-7L); + + const T qv = tmp / bv; + const T rv = (qv + 2) * qv; + const T sv = rv / (root2 + ::librapid::sqrt(rv + 2)) + onePlusRoot2Low + + qv + onePlusRoot2High; + return av + bv / sv; + } #if defined(LIBRAPID_USE_MULTIPREC) - } + } #endif - } - } - } + } + } + } - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T _logAbs(const Complex &other) noexcept { - return static_cast(detail::algorithm::logHypot(static_cast(real(other)), - static_cast(imag(other)))); - } + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T _logAbs(const Complex &other) noexcept { + return static_cast(detail::algorithm::logHypot(static_cast(real(other)), + static_cast(imag(other)))); + } #if defined(LIBRAPID_USE_MULTIPREC) - template<> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE mpfr _logAbs(const Complex &other) noexcept { - return detail::algorithm::logHypot(real(other), imag(other)); - } + template<> + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE mpfr _logAbs(const Complex &other) noexcept { + return detail::algorithm::logHypot(real(other), imag(other)); + } #endif - template<> - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE float _logAbs(const Complex &other) noexcept { - return detail::algorithm::logHypot(real(other), imag(other)); - } - - /// \brief Calculates the natural logarithm of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return Natural logarithm of the complex number - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log(const Complex &other) { - const T logAbs = _logAbs(other); - const T theta = ::librapid::atan2(imag(other), real(other)); - return Complex(logAbs, theta); - } - - /// \brief Calculates the logarithm of a complex number with a complex base - /// - /// \f$ \log_{\mathrm{base}}(z) = \log(z) / \log(\mathrm{base}) \f$ - /// \tparam T Scalar type - /// \tparam B Base type - /// \param other Complex number - /// \param base Base of the logarithm - /// \return Logarithm of the complex number with the given base - /// \see log - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log(const Complex &other, - const Complex &base) { - return log(other) / log(base); - } - - /// \brief Calculates the logarithm of a complex number with a real base - /// - /// \f$ \log_{\mathrm{base}}(z) = \log(z) / \log(\mathrm{base}) \f$ - /// \tparam T Scalar type of the complex number - /// \tparam B Scalar type of the base - /// \param other Complex number - /// \param base Base of the logarithm (real) - /// \return Logarithm of the complex number with the given base - /// \see log - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log(const Complex &other, - const B &base) { - const T logAbs = _logAbs(other); - const T theta = ::librapid::atan2(imag(other), real(other)); - return Complex(logAbs, theta) / ::librapid::log(base); - } - - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex _pow(const T &left, const T &right) { - if (0 <= left) { - return Complex(::librapid::pow(left, right), ::librapid::copySign(T(0), right)); - } else { - return exp(right * log(Complex(left))); - } - } - - /// \brief Calculate \f$ \text{left}^{\text{right}} \f$ for a complex-valued left-hand side - /// \tparam T Value type for the left-hand side - /// \tparam V Value type for the right-hand side - /// \param left Complex base - /// \param right Real exponent - /// \return \f$ \text{left}^{\text{right}} \f$ - template::type == detail::LibRapidType::Scalar, int> = 0> - LIBRAPID_NODISCARD Complex pow(const Complex &left, const V &right) { - if (imag(left) == 0) { - if (::librapid::signBit(imag(left))) { - return conj(_pow(real(left), static_cast(right))); - } else { - return _pow(real(left), static_cast(right)); - } - } else { - return exp(static_cast(right) * log(left)); - } - } - - /// \brief Calculate \f$ \text{left}^{\text{right}} \f$ for a complex-valued right-hand side - /// \tparam T Value type for the left-hand side - /// \tparam V Value type for the right-hand side - /// \param left Real base - /// \param right Complex exponent - /// \return \f$ \text{left}^{\text{right}} \f$ - template::type == detail::LibRapidType::Scalar, int> = 0> - LIBRAPID_NODISCARD Complex pow(const V &left, const Complex &right) { - if (imag(right) == 0) { - return _pow(static_cast(left), real(right)); - } else if (0 < left) { - return exp(right * ::librapid::log(static_cast(left))); - } else { - return exp(right * log(Complex(static_cast(left)))); - } - } - - /// \brief Calculate \f$ \text{left}^{\text{right}} \f$ for complex numbers - /// \tparam T Complex number component type - /// \param left Complex base - /// \param right Complex exponent - /// \return \f$ \text{left}^{\text{right}} \f$ - template - LIBRAPID_NODISCARD Complex pow(const Complex &left, const Complex &right) { - if (imag(right) == 0) { - return pow(left, real(right)); - } else if (imag(left) == 0 && 0 < real(left)) { - return exp(right * ::librapid::log(real(left))); - } else { - return exp(right * log(left)); - } - } - - /// \brief Calculate the hyperbolic sine of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \sinh(z) \f$ - template - LIBRAPID_NODISCARD Complex sinh(const Complex &other) { - return Complex(::librapid::sinh(real(other)) * ::librapid::cos(imag(other)), - ::librapid::cosh(real(other)) * ::librapid::sin(imag(other))); - } - - template - LIBRAPID_NODISCARD Complex sqrt(const Complex &other) { - int64_t otherExp; - T rho = _fabs(other, &otherExp); // Get magnitude and scale factor - - if (otherExp == 0) { // Argument is zero, Inf or NaN - if (rho == 0) { - return Complex(T(0), imag(other)); - } else if (::librapid::isInf(rho)) { - const T re = real(other); - const T im = imag(other); - - if (::librapid::isInf(im)) { - return Complex(typetraits::TypeInfo::infinity(), im); // (any, +/-Inf) - } else if (::librapid::isNaN(im)) { - if (re < 0) { - // (-Inf, NaN) - return Complex(::librapid::abs(im), ::librapid::copySign(re, im)); - } else { - return other; // (+Inf, NaN) - } - } else { - if (re < 0) { - return Complex(T(0), ::librapid::copySign(re, im)); // (-Inf, finite) - } else { - return Complex(re, ::librapid::copySign(T(0), im)); // (+Inf, finite) - } - } - } else { - return Complex(rho, rho); - } - } else { // Compute in safest quadrant - T realMag = ::librapid::ldexp(::librapid::abs(real(other)), -otherExp); - rho = ::librapid::ldexp(::librapid::sqrt(2 * (realMag + rho)), otherExp / 2 - 1); - if (0 <= real(other)) { - return Complex(rho, imag(other) / (2 * rho)); - } else { - return Complex(::librapid::abs(imag(other) / (2 * rho)), - ::librapid::copySign(rho, imag(other))); - } - } - } - - /// \brief Calculate the hyperbolic tangent of a complex number - /// - /// This function supports propagation of NaNs and Infs. - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \tanh(z) \f$ - template - LIBRAPID_NODISCARD Complex tanh(const Complex &other) { - T tv = ::librapid::tan(imag(other)); - T sv = ::librapid::sinh(real(other)); - T bv = sv * (T(1) + tv * tv); - T dv = T(1) + bv * sv; - - if (::librapid::isInf(dv)) { - T real; - if (sv < T(0)) - real = T(-1); - else - real = T(1); - return Complex(real, T(0)); - } - return Complex((::librapid::sqrt(T(1) + sv * sv)) * bv / dv, tv / dv); - } - - // Return the phase angle of a complex value as a real - - /// \brief Return the phase angle of a complex value as a real - /// - /// This function calls \f$ \text{atan2}(\text{imag}(z), \text{real}(z)) \f$. - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \arg(z) \f$ - /// \see atan2 - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T arg(const Complex &other) { - return ::librapid::atan2(imag(other), real(other)); - } - - /// \brief Project a complex number onto the Riemann sphere - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \text{proj}(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex proj(const Complex &other) { - if (::librapid::isInf(real(other)) || ::librapid::isInf(imag(other))) { - const T im = ::librapid::copySign(T(0), imag(other)); - return Complex(typetraits::TypeInfo::infinity(), im); - } - return other; - } - - /// \brief Calculate the cosine of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \cos(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex cos(const Complex &other) { - return Complex(::librapid::cosh(imag(other)) * ::librapid::cos(real(other)), - -::librapid::sinh(imag(other)) * ::librapid::sin(real(other))); - } - - /// \brief Calculate the cosecant of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \csc(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex csc(const Complex &other) { - return T(1) / sin(other); - } - - /// \brief Calculate the secant of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \sec(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex sec(const Complex &other) { - return T(1) / cos(other); - } - - /// \brief Calculate the cotangent of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \cot(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex cot(const Complex &other) { - return T(1) / tan(other); - } - - /// \brief Calculate the arc cosecant of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \operatorname{arccsc}(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex acsc(const Complex &other) { - return asin(T(1) / other); - } - - /// \brief Calculate the arc secant of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \operatorname{arcsec}(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex asec(const Complex &other) { - return acos(T(1) / other); - } - - /// \brief Calculate the arc cotangent of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \operatorname{arccot}(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex acot(const Complex &other) { - return atan(T(1) / other); - } - - /// \brief Calculate the logarithm base 2 of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \log_2(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log2(const Complex &other) { - return log(other) / ::librapid::log(T(2)); - } - - /// \brief Calculate the logarithm base 10 of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \log_{10}(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log10(const Complex &other) { - return log(other) / ::librapid::log(10); - } - - // Return magnitude squared - - /// \brief Calculate the magnitude squared of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ |z|^2 \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T norm(const Complex &other) { - return real(other) * real(other) + imag(other) * imag(other); - } - - /// \brief Return a complex number from polar coordinates - /// - /// Given a radius, \p rho, and an angle, \p theta, this function returns the complex number - /// \f$ \rho e^{i\theta} \f$. - /// - /// The function returns NaN, infinity or zero based on the input values of rho. - /// \tparam T Scalar type of the complex number - /// \param rho Radius of the polar coordinate system - /// \param theta Angle of the polar coordinate system - /// \return Complex number in polar form. - template - LIBRAPID_NODISCARD Complex polar(const T &rho, const T &theta) { - if (!::librapid::isNaN(rho) && !::librapid::isInf(rho) && rho != T(0)) { - // Rho is finite and non-zero - return Complex(rho * ::librapid::cos(theta), rho * ::librapid::sin(theta)); - } - - // Rho is NaN/Inf/0 - if (::librapid::signBit(rho)) - return -polarPositiveNanInfZeroRho(-rho, theta); - else - return polarPositiveNanInfZeroRho(rho, theta); - } - - /// \brief Compute the sine of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \sin(z) \f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex sin(const Complex &other) { - return Complex(::librapid::cosh(imag(other)) * ::librapid::sin(real(other)), - ::librapid::sinh(imag(other)) * ::librapid::cos(real(other))); - } - - /// \brief Compute the tangent of a complex number - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ \tan(z) \f$ - template - LIBRAPID_NODISCARD Complex tan(const Complex &other) { - Complex zv(tanh(Complex(-imag(other), real(other)))); - return Complex(imag(zv), -real(zv)); - } - - /// \brief Round the real and imaginary parts of a complex number towards \f$ -\infty \f$ - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$ (\lfloor\operatorname{real}(z)\rfloor,\lfloor\operatorname{imag}(z)\rfloor )\f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex floor(const Complex &other) { - return Complex(::librapid::floor(real(other)), ::librapid::floor(imag(other))); - } - - /// \brief Round the real and imaginary parts of a complex number towards \f$ +\infty \f$ - /// \tparam T Scalar type - /// \param other Complex number - /// \return \f$(\lceil\operatorname{real}(z)\rceil,\lceil\operatorname{imag}(z)\rceil )\f$ - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex ceil(const Complex &other) { - return Complex(::librapid::ceil(real(other)), ::librapid::ceil(imag(other))); - } - - /// \brief Generate a random complex number between two given complex numbers - /// - /// This function generates a random complex number in the range [min, max], where min - /// and max are given as input. The function uses a default seed if none is provided. - /// - /// \tparam T Scalar type of the complex number - /// \param min Minimum complex number - /// \param max Maximum complex number - /// \param seed Seed for the random number generator - /// \return Random complex number between min and max - template - LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto random(const Complex &min, - const Complex &max, uint64_t seed = -1) - -> Complex { - return Complex(::librapid::random(real(min), real(max), seed), - ::librapid::random(imag(min), imag(max), seed)); - } - - namespace typetraits { - template - struct TypeInfo> { - static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; - using Scalar = Complex; - using Packet = std::false_type; - // typename std::conditional_t<(TypeInfo::packetWidth > 1), - // Complex::Packet>, std::false_type>; - static constexpr int64_t packetWidth = - 0; // TypeInfo::Scalar>::packetWidth; - static constexpr char name[] = "Complex"; - static constexpr bool supportsArithmetic = true; - static constexpr bool supportsLogical = true; - static constexpr bool supportsBinary = false; - static constexpr bool allowVectorisation = false; + template<> + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE float _logAbs(const Complex &other) noexcept { + return detail::algorithm::logHypot(real(other), imag(other)); + } + + /// \brief Calculates the natural logarithm of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return Natural logarithm of the complex number + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log(const Complex &other) { + const T logAbs = _logAbs(other); + const T theta = ::librapid::atan2(imag(other), real(other)); + return Complex(logAbs, theta); + } + + /// \brief Calculates the logarithm of a complex number with a complex base + /// + /// \f$ \log_{\mathrm{base}}(z) = \log(z) / \log(\mathrm{base}) \f$ + /// \tparam T Scalar type + /// \tparam B Base type + /// \param other Complex number + /// \param base Base of the logarithm + /// \return Logarithm of the complex number with the given base + /// \see log + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log(const Complex &other, + const Complex &base) { + return log(other) / log(base); + } + + /// \brief Calculates the logarithm of a complex number with a real base + /// + /// \f$ \log_{\mathrm{base}}(z) = \log(z) / \log(\mathrm{base}) \f$ + /// \tparam T Scalar type of the complex number + /// \tparam B Scalar type of the base + /// \param other Complex number + /// \param base Base of the logarithm (real) + /// \return Logarithm of the complex number with the given base + /// \see log + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log(const Complex &other, + const B &base) { + const T logAbs = _logAbs(other); + const T theta = ::librapid::atan2(imag(other), real(other)); + return Complex(logAbs, theta) / ::librapid::log(base); + } + + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex _pow(const T &left, const T &right) { + if (0 <= left) { + return Complex(::librapid::pow(left, right), ::librapid::copySign(T(0), right)); + } else { + return exp(right * log(Complex(left))); + } + } + + /// \brief Calculate \f$ \text{left}^{\text{right}} \f$ for a complex-valued left-hand side + /// \tparam T Value type for the left-hand side + /// \tparam V Value type for the right-hand side + /// \param left Complex base + /// \param right Real exponent + /// \return \f$ \text{left}^{\text{right}} \f$ + template::type == detail::LibRapidType::Scalar, int> = 0> + LIBRAPID_NODISCARD Complex pow(const Complex &left, const V &right) { + if (imag(left) == 0) { + if (::librapid::signBit(imag(left))) { + return conj(_pow(real(left), static_cast(right))); + } else { + return _pow(real(left), static_cast(right)); + } + } else { + return exp(static_cast(right) * log(left)); + } + } + + /// \brief Calculate \f$ \text{left}^{\text{right}} \f$ for a complex-valued right-hand side + /// \tparam T Value type for the left-hand side + /// \tparam V Value type for the right-hand side + /// \param left Real base + /// \param right Complex exponent + /// \return \f$ \text{left}^{\text{right}} \f$ + template::type == detail::LibRapidType::Scalar, int> = 0> + LIBRAPID_NODISCARD Complex pow(const V &left, const Complex &right) { + if (imag(right) == 0) { + return _pow(static_cast(left), real(right)); + } else if (0 < left) { + return exp(right * ::librapid::log(static_cast(left))); + } else { + return exp(right * log(Complex(static_cast(left)))); + } + } + + /// \brief Calculate \f$ \text{left}^{\text{right}} \f$ for complex numbers + /// \tparam T Complex number component type + /// \param left Complex base + /// \param right Complex exponent + /// \return \f$ \text{left}^{\text{right}} \f$ + template + LIBRAPID_NODISCARD Complex pow(const Complex &left, const Complex &right) { + if (imag(right) == 0) { + return pow(left, real(right)); + } else if (imag(left) == 0 && 0 < real(left)) { + return exp(right * ::librapid::log(real(left))); + } else { + return exp(right * log(left)); + } + } + + /// \brief Calculate the hyperbolic sine of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \sinh(z) \f$ + template + LIBRAPID_NODISCARD Complex sinh(const Complex &other) { + return Complex(::librapid::sinh(real(other)) * ::librapid::cos(imag(other)), + ::librapid::cosh(real(other)) * ::librapid::sin(imag(other))); + } + + template + LIBRAPID_NODISCARD Complex sqrt(const Complex &other) { + int64_t otherExp; + T rho = _fabs(other, &otherExp); // Get magnitude and scale factor + + if (otherExp == 0) { // Argument is zero, Inf or NaN + if (rho == 0) { + return Complex(T(0), imag(other)); + } else if (::librapid::isInf(rho)) { + const T re = real(other); + const T im = imag(other); + + if (::librapid::isInf(im)) { + return Complex(typetraits::NumericInfo::infinity(), im); // (any, +/-Inf) + } else if (::librapid::isNaN(im)) { + if (re < 0) { + // (-Inf, NaN) + return Complex(::librapid::abs(im), ::librapid::copySign(re, im)); + } else { + return other; // (+Inf, NaN) + } + } else { + if (re < 0) { + return Complex(T(0), ::librapid::copySign(re, im)); // (-Inf, finite) + } else { + return Complex(re, ::librapid::copySign(T(0), im)); // (+Inf, finite) + } + } + } else { + return Complex(rho, rho); + } + } else { // Compute in safest quadrant + T realMag = ::librapid::ldexp(::librapid::abs(real(other)), -otherExp); + rho = ::librapid::ldexp(::librapid::sqrt(2 * (realMag + rho)), otherExp / 2 - 1); + if (0 <= real(other)) { + return Complex(rho, imag(other) / (2 * rho)); + } else { + return Complex(::librapid::abs(imag(other) / (2 * rho)), + ::librapid::copySign(rho, imag(other))); + } + } + } + + /// \brief Calculate the hyperbolic tangent of a complex number + /// + /// This function supports propagation of NaNs and Infs. + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \tanh(z) \f$ + template + LIBRAPID_NODISCARD Complex tanh(const Complex &other) { + T tv = ::librapid::tan(imag(other)); + T sv = ::librapid::sinh(real(other)); + T bv = sv * (T(1) + tv * tv); + T dv = T(1) + bv * sv; + + if (::librapid::isInf(dv)) { + T real; + if (sv < T(0)) + real = T(-1); + else + real = T(1); + return Complex(real, T(0)); + } + return Complex((::librapid::sqrt(T(1) + sv * sv)) * bv / dv, tv / dv); + } + + // Return the phase angle of a complex value as a real + + /// \brief Return the phase angle of a complex value as a real + /// + /// This function calls \f$ \text{atan2}(\text{imag}(z), \text{real}(z)) \f$. + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \arg(z) \f$ + /// \see atan2 + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T arg(const Complex &other) { + return ::librapid::atan2(imag(other), real(other)); + } + + /// \brief Project a complex number onto the Riemann sphere + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \text{proj}(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex proj(const Complex &other) { + if (::librapid::isInf(real(other)) || ::librapid::isInf(imag(other))) { + const T im = ::librapid::copySign(T(0), imag(other)); + return Complex(typetraits::NumericInfo::infinity(), im); + } + return other; + } + + /// \brief Calculate the cosine of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \cos(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex cos(const Complex &other) { + return Complex(::librapid::cosh(imag(other)) * ::librapid::cos(real(other)), + -::librapid::sinh(imag(other)) * ::librapid::sin(real(other))); + } + + /// \brief Calculate the cosecant of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \csc(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex csc(const Complex &other) { + return T(1) / sin(other); + } + + /// \brief Calculate the secant of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \sec(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex sec(const Complex &other) { + return T(1) / cos(other); + } + + /// \brief Calculate the cotangent of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \cot(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex cot(const Complex &other) { + return T(1) / tan(other); + } + + /// \brief Calculate the arc cosecant of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \operatorname{arccsc}(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex acsc(const Complex &other) { + return asin(T(1) / other); + } + + /// \brief Calculate the arc secant of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \operatorname{arcsec}(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex asec(const Complex &other) { + return acos(T(1) / other); + } + + /// \brief Calculate the arc cotangent of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \operatorname{arccot}(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex acot(const Complex &other) { + return atan(T(1) / other); + } + + /// \brief Calculate the logarithm base 2 of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \log_2(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log2(const Complex &other) { + return log(other) / ::librapid::log(T(2)); + } + + /// \brief Calculate the logarithm base 10 of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \log_{10}(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex log10(const Complex &other) { + return log(other) / ::librapid::log(10); + } + + // Return magnitude squared + + /// \brief Calculate the magnitude squared of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ |z|^2 \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE T norm(const Complex &other) { + return real(other) * real(other) + imag(other) * imag(other); + } + + /// \brief Return a complex number from polar coordinates + /// + /// Given a radius, \p rho, and an angle, \p theta, this function returns the complex number + /// \f$ \rho e^{i\theta} \f$. + /// + /// The function returns NaN, infinity or zero based on the input values of rho. + /// \tparam T Scalar type of the complex number + /// \param rho Radius of the polar coordinate system + /// \param theta Angle of the polar coordinate system + /// \return Complex number in polar form. + template + LIBRAPID_NODISCARD Complex polar(const T &rho, const T &theta) { + if (!::librapid::isNaN(rho) && !::librapid::isInf(rho) && rho != T(0)) { + // Rho is finite and non-zero + return Complex(rho * ::librapid::cos(theta), rho * ::librapid::sin(theta)); + } + + // Rho is NaN/Inf/0 + if (::librapid::signBit(rho)) + return -polarPositiveNanInfZeroRho(-rho, theta); + else + return polarPositiveNanInfZeroRho(rho, theta); + } + + /// \brief Compute the sine of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \sin(z) \f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex sin(const Complex &other) { + return Complex(::librapid::cosh(imag(other)) * ::librapid::sin(real(other)), + ::librapid::sinh(imag(other)) * ::librapid::cos(real(other))); + } + + /// \brief Compute the tangent of a complex number + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ \tan(z) \f$ + template + LIBRAPID_NODISCARD Complex tan(const Complex &other) { + Complex zv(tanh(Complex(-imag(other), real(other)))); + return Complex(imag(zv), -real(zv)); + } + + /// \brief Round the real and imaginary parts of a complex number towards \f$ -\infty \f$ + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$ (\lfloor\operatorname{real}(z)\rfloor,\lfloor\operatorname{imag}(z)\rfloor )\f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex floor(const Complex &other) { + return Complex(::librapid::floor(real(other)), ::librapid::floor(imag(other))); + } + + /// \brief Round the real and imaginary parts of a complex number towards \f$ +\infty \f$ + /// \tparam T Scalar type + /// \param other Complex number + /// \return \f$(\lceil\operatorname{real}(z)\rceil,\lceil\operatorname{imag}(z)\rceil )\f$ + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE Complex ceil(const Complex &other) { + return Complex(::librapid::ceil(real(other)), ::librapid::ceil(imag(other))); + } + + /// \brief Generate a random complex number between two given complex numbers + /// + /// This function generates a random complex number in the range [min, max], where min + /// and max are given as input. The function uses a default seed if none is provided. + /// + /// \tparam T Scalar type of the complex number + /// \param min Minimum complex number + /// \param max Maximum complex number + /// \param seed Seed for the random number generator + /// \return Random complex number between min and max + template + LIBRAPID_NODISCARD LIBRAPID_ALWAYS_INLINE auto random(const Complex &min, + const Complex &max, uint64_t seed = -1) + -> Complex { + return Complex(::librapid::random(real(min), real(max), seed), + ::librapid::random(imag(min), imag(max), seed)); + } + + namespace typetraits { + template + struct TypeInfo> { + static constexpr detail::LibRapidType type = detail::LibRapidType::Scalar; + using Scalar = Complex; + using Backend = typename TypeInfo::Backend; + using ShapeType = std::false_type; + using Packet = std::false_type; + static constexpr int64_t packetWidth = 0; + static constexpr char name[] = "Complex"; + static constexpr bool supportsArithmetic = true; + static constexpr bool supportsLogical = true; + static constexpr bool supportsBinary = false; + static constexpr bool allowVectorisation = false; #if defined(LIBRAPID_HAS_CUDA) - static constexpr cudaDataType_t CudaType = cudaDataType_t::CUDA_C_64F; + static constexpr cudaDataType_t CudaType = cudaDataType_t::CUDA_C_64F; #endif - static constexpr bool canAlign = TypeInfo::canAlign; - static constexpr bool canMemcpy = TypeInfo::canMemcpy; - - LIMIT_IMPL(min) { return TypeInfo::min(); } - LIMIT_IMPL(max) { return TypeInfo::max(); } - LIMIT_IMPL(epsilon) { return TypeInfo::epsilon(); } - LIMIT_IMPL(roundError) { return TypeInfo::roundError(); } - LIMIT_IMPL(denormMin) { return TypeInfo::denormMin(); } - LIMIT_IMPL(infinity) { return TypeInfo::infinity(); } - LIMIT_IMPL(quietNaN) { return TypeInfo::quietNaN(); } - LIMIT_IMPL(signalingNaN) { return TypeInfo::signalingNaN(); } - }; - } // namespace typetraits + static constexpr bool canAlign = TypeInfo::canAlign; + static constexpr bool canMemcpy = TypeInfo::canMemcpy; + + LIMIT_IMPL(min) { return TypeInfo::min(); } + LIMIT_IMPL(max) { return TypeInfo::max(); } + LIMIT_IMPL(epsilon) { return TypeInfo::epsilon(); } + LIMIT_IMPL(roundError) { return TypeInfo::roundError(); } + LIMIT_IMPL(denormMin) { return TypeInfo::denormMin(); } + LIMIT_IMPL(infinity) { return TypeInfo::infinity(); } + LIMIT_IMPL(quietNaN) { return TypeInfo::quietNaN(); } + LIMIT_IMPL(signalingNaN) { return TypeInfo::signalingNaN(); } + }; + } // namespace typetraits } // namespace librapid // Support FMT printing @@ -2082,31 +2084,33 @@ namespace librapid { template struct fmt::formatter, Char> { private: - using Type = librapid::Complex; - using Scalar = typename Type::Scalar; - using Base = fmt::formatter; - Base m_base; + using Type = librapid::Complex; + using Scalar = typename Type::Scalar; + using Base = fmt::formatter; + Base m_base; public: - template - FMT_CONSTEXPR auto parse(ParseContext &ctx) -> const char * { - return m_base.parse(ctx); - } - - template - FMT_CONSTEXPR auto format(const Type &val, FormatContext &ctx) const -> decltype(ctx.out()) { - val.str(m_base, ctx); - return ctx.out(); - } + template + FMT_CONSTEXPR auto parse(ParseContext &ctx) -> const char * { + return m_base.parse(ctx); + } + + template + FMT_CONSTEXPR auto format(const Type &val, FormatContext &ctx) const -> decltype(ctx.out()) { + val.str(m_base, ctx); + return ctx.out(); + } }; #endif // FMT_API #ifdef USE_X86_X64_INTRINSICS -# undef USE_X86_X64_INTRINSICS +# undef USE_X86_X64_INTRINSICS #endif #ifdef USE_ARM64_INTRINSICS -# undef USE_ARM64_INTRINSICS +# undef USE_ARM64_INTRINSICS #endif -#endif // LIBRAPID_MATH_COMPLEX_HPP \ No newline at end of file +#undef REQUIRE_SCALAR + +#endif // LIBRAPID_MATH_COMPLEX_HPP diff --git a/librapid/include/librapid/opencl/opencl.hpp b/librapid/include/librapid/opencl/opencl.hpp index 2b926aca..f2b90050 100644 --- a/librapid/include/librapid/opencl/opencl.hpp +++ b/librapid/include/librapid/opencl/opencl.hpp @@ -71,16 +71,16 @@ * fixes in the new header as well as additional OpenCL 2.0 features. * As a result the header is not directly backward compatible and for this * reason we release it as opencl.hpp rather than a new version of cl.hpp. - * + * * * \section compatibility Compatibility * Due to the evolution of the underlying OpenCL API the 2.0 C++ bindings * include an updated approach to defining supported feature versions * and the range of valid underlying OpenCL runtime versions supported. * - * The combination of preprocessor macros CL_HPP_TARGET_OPENCL_VERSION and + * The combination of preprocessor macros CL_HPP_TARGET_OPENCL_VERSION and * CL_HPP_MINIMUM_OPENCL_VERSION control this range. These are three digit - * decimal values representing OpenCL runtime versions. The default for + * decimal values representing OpenCL runtime versions. The default for * the target is 300, representing OpenCL 3.0. The minimum is defined as 200. * These settings would use 2.0 and newer API calls only. * If backward compatibility with a 1.2 runtime is required, the minimum @@ -89,21 +89,21 @@ * Note that this is a compile-time setting, and so affects linking against * a particular SDK version rather than the versioning of the loaded runtime. * - * The earlier versions of the header included basic vector and string - * classes based loosely on STL versions. These were difficult to + * The earlier versions of the header included basic vector and string + * classes based loosely on STL versions. These were difficult to * maintain and very rarely used. For the 2.0 header we now assume * the presence of the standard library unless requested otherwise. - * We use std::array, std::vector, std::shared_ptr and std::string - * throughout to safely manage memory and reduce the chance of a + * We use std::array, std::vector, std::shared_ptr and std::string + * throughout to safely manage memory and reduce the chance of a * recurrance of earlier memory management bugs. * - * These classes are used through typedefs in the cl namespace: + * These classes are used through typedefs in the cl namespace: * cl::array, cl::vector, cl::pointer and cl::string. * In addition cl::allocate_pointer forwards to std::allocate_shared * by default. - * In all cases these standard library classes can be replaced with - * custom interface-compatible versions using the CL_HPP_NO_STD_ARRAY, - * CL_HPP_NO_STD_VECTOR, CL_HPP_NO_STD_UNIQUE_PTR and + * In all cases these standard library classes can be replaced with + * custom interface-compatible versions using the CL_HPP_NO_STD_ARRAY, + * CL_HPP_NO_STD_VECTOR, CL_HPP_NO_STD_UNIQUE_PTR and * CL_HPP_NO_STD_STRING macros. * * The OpenCL 1.x versions of the C++ bindings included a size_t wrapper @@ -114,12 +114,12 @@ * using the CL_HPP_ENABLE_SIZE_T_COMPATIBILITY macro. * * Finally, the program construction interface used a clumsy vector-of-pairs - * design in the earlier versions. We have replaced that with a cleaner - * vector-of-vectors and vector-of-strings design. However, for backward + * design in the earlier versions. We have replaced that with a cleaner + * vector-of-vectors and vector-of-strings design. However, for backward * compatibility old behaviour can be regained with the * CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY macro. - * - * In OpenCL 2.0 OpenCL C is not entirely backward compatibility with + * + * In OpenCL 2.0 OpenCL C is not entirely backward compatibility with * earlier versions. As a result a flag must be passed to the OpenCL C * compiled to request OpenCL 2.0 compilation of kernels with 1.2 as * the default in the absence of the flag. @@ -213,178 +213,179 @@ * bindings, including support for the optional exception feature and * also the supplied vector and string classes, see following sections for * decriptions of these features. - * + * * Note: the C++ bindings use std::call_once and therefore may need to be * compiled using special command-line options (such as "-pthread") on some * platforms! * * \code - #define CL_HPP_ENABLE_EXCEPTIONS - #define CL_HPP_TARGET_OPENCL_VERSION 200 - - #include - #include - #include - #include - #include - - const int numElements = 32; - - int main(void) - { - // Filter for a 2.0 or newer platform and set it as the default - std::vector platforms; - cl::Platform::get(&platforms); - cl::Platform plat; - for (auto &p : platforms) { - std::string platver = p.getInfo(); - if (platver.find("OpenCL 2.") != std::string::npos || - platver.find("OpenCL 3.") != std::string::npos) { - // Note: an OpenCL 3.x platform may not support all required features! - plat = p; - } - } - if (plat() == 0) { - std::cout << "No OpenCL 2.0 or newer platform found.\n"; - return -1; - } - - cl::Platform newP = cl::Platform::setDefault(plat); - if (newP != plat) { - std::cout << "Error setting default platform.\n"; - return -1; - } - - // C++11 raw string literal for the first kernel - std::string kernel1{R"CLC( - global int globalA; - kernel void updateGlobal() - { - globalA = 75; - } - )CLC"}; - - // Raw string literal for the second kernel - std::string kernel2{R"CLC( - typedef struct { global int *bar; } Foo; - kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int *inputB, - global int *output, int val, write_only pipe int outPipe, queue_t childQueue) - { - output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + *(aNum->bar); - write_pipe(outPipe, &val); - queue_t default_queue = get_default_queue(); - ndrange_t ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); - - // Have a child kernel write into third quarter of output - enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, - ^{ - output[get_global_size(0)*2 + get_global_id(0)] = - inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + get_global_id(0)] + globalA; - }); - - // Have a child kernel write into last quarter of output - enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, - ^{ - output[get_global_size(0)*3 + get_global_id(0)] = - inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + get_global_id(0)] + globalA + 2; - }); - } - )CLC"}; - - std::vector programStrings; - programStrings.push_back(kernel1); - programStrings.push_back(kernel2); - - cl::Program vectorAddProgram(programStrings); - try { - vectorAddProgram.build("-cl-std=CL2.0"); - } - catch (...) { - // Print build info for all devices - cl_int buildErr = CL_SUCCESS; - auto buildInfo = vectorAddProgram.getBuildInfo(&buildErr); - for (auto &pair : buildInfo) { - std::cerr << pair.second << std::endl << std::endl; - } - - return 1; - } - - typedef struct { int *bar; } Foo; - - // Get and run kernel that initializes the program-scope global - // A test for kernels that take no arguments - auto program2Kernel = - cl::KernelFunctor<>(vectorAddProgram, "updateGlobal"); - program2Kernel( - cl::EnqueueArgs( - cl::NDRange(1))); - - ////////////////// - // SVM allocations - - auto anSVMInt = cl::allocate_svm>(); - *anSVMInt = 5; - cl::SVMAllocator>> svmAllocReadOnly; - auto fooPointer = cl::allocate_pointer(svmAllocReadOnly); - fooPointer->bar = anSVMInt.get(); - cl::SVMAllocator> svmAlloc; - std::vector>> inputA(numElements, 1, svmAlloc); - cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); - - ////////////// - // Traditional cl_mem allocations - - std::vector output(numElements, 0xdeadbeef); - cl::Buffer outputBuffer(output.begin(), output.end(), false); - cl::Pipe aPipe(sizeof(cl_int), numElements / 2); - - // Default command queue, also passed in as a parameter - cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault( - cl::Context::getDefault(), cl::Device::getDefault()); - - auto vectorAddKernel = - cl::KernelFunctor< - decltype(fooPointer)&, - int*, - cl::coarse_svm_vector&, - cl::Buffer, - int, - cl::Pipe&, - cl::DeviceCommandQueue - >(vectorAddProgram, "vectorAdd"); - - // Ensure that the additional SVM pointer is available to the kernel - // This one was not passed as a parameter - vectorAddKernel.setSVMPointers(anSVMInt); - - cl_int error; - vectorAddKernel( - cl::EnqueueArgs( - cl::NDRange(numElements/2), - cl::NDRange(numElements/2)), - fooPointer, - inputA.data(), - inputB, - outputBuffer, - 3, - aPipe, - defaultDeviceQueue, - error - ); - - cl::copy(outputBuffer, output.begin(), output.end()); - - cl::Device d = cl::Device::getDefault(); - - std::cout << "Output:\n"; - for (int i = 1; i < numElements; ++i) { - std::cout << "\t" << output[i] << "\n"; - } - std::cout << "\n\n"; - - return 0; - } + #define CL_HPP_ENABLE_EXCEPTIONS + #define CL_HPP_TARGET_OPENCL_VERSION 200 + + #include + #include + #include + #include + #include + + const int numElements = 32; + + int main(void) + { + // Filter for a 2.0 or newer platform and set it as the default + std::vector platforms; + cl::Platform::get(&platforms); + cl::Platform plat; + for (auto &p : platforms) { + std::string platver = p.getInfo(); + if (platver.find("OpenCL 2.") != std::string::npos || + platver.find("OpenCL 3.") != std::string::npos) { + // Note: an OpenCL 3.x platform may not support all required features! + plat = p; + } + } + if (plat() == 0) { + std::cout << "No OpenCL 2.0 or newer platform found.\n"; + return -1; + } + + cl::Platform newP = cl::Platform::setDefault(plat); + if (newP != plat) { + std::cout << "Error setting default platform.\n"; + return -1; + } + + // C++11 raw string literal for the first kernel + std::string kernel1{R"CLC( + global int globalA; + kernel void updateGlobal() + { + globalA = 75; + } + )CLC"}; + + // Raw string literal for the second kernel + std::string kernel2{R"CLC( + typedef struct { global int *bar; } Foo; + kernel void vectorAdd(global const Foo* aNum, global const int *inputA, global const int + *inputB, global int *output, int val, write_only pipe int outPipe, queue_t childQueue) + { + output[get_global_id(0)] = inputA[get_global_id(0)] + inputB[get_global_id(0)] + val + + *(aNum->bar); write_pipe(outPipe, &val); queue_t default_queue = get_default_queue(); ndrange_t + ndrange = ndrange_1D(get_global_size(0)/2, get_global_size(0)/2); + + // Have a child kernel write into third quarter of output + enqueue_kernel(default_queue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*2 + get_global_id(0)] = + inputA[get_global_size(0)*2 + get_global_id(0)] + inputB[get_global_size(0)*2 + + get_global_id(0)] + globalA; + }); + + // Have a child kernel write into last quarter of output + enqueue_kernel(childQueue, CLK_ENQUEUE_FLAGS_WAIT_KERNEL, ndrange, + ^{ + output[get_global_size(0)*3 + get_global_id(0)] = + inputA[get_global_size(0)*3 + get_global_id(0)] + inputB[get_global_size(0)*3 + + get_global_id(0)] + globalA + 2; + }); + } + )CLC"}; + + std::vector programStrings; + programStrings.push_back(kernel1); + programStrings.push_back(kernel2); + + cl::Program vectorAddProgram(programStrings); + try { + vectorAddProgram.build("-cl-std=CL2.0"); + } + catch (...) { + // Print build info for all devices + cl_int buildErr = CL_SUCCESS; + auto buildInfo = vectorAddProgram.getBuildInfo(&buildErr); + for (auto &pair : buildInfo) { + std::cerr << pair.second << std::endl << std::endl; + } + + return 1; + } + + typedef struct { int *bar; } Foo; + + // Get and run kernel that initializes the program-scope global + // A test for kernels that take no arguments + auto program2Kernel = + cl::KernelFunctor<>(vectorAddProgram, "updateGlobal"); + program2Kernel( + cl::EnqueueArgs( + cl::NDRange(1))); + + ////////////////// + // SVM allocations + + auto anSVMInt = cl::allocate_svm>(); + *anSVMInt = 5; + cl::SVMAllocator>> svmAllocReadOnly; + auto fooPointer = cl::allocate_pointer(svmAllocReadOnly); + fooPointer->bar = anSVMInt.get(); + cl::SVMAllocator> svmAlloc; + std::vector>> inputA(numElements, 1, + svmAlloc); cl::coarse_svm_vector inputB(numElements, 2, svmAlloc); + + ////////////// + // Traditional cl_mem allocations + + std::vector output(numElements, 0xdeadbeef); + cl::Buffer outputBuffer(output.begin(), output.end(), false); + cl::Pipe aPipe(sizeof(cl_int), numElements / 2); + + // Default command queue, also passed in as a parameter + cl::DeviceCommandQueue defaultDeviceQueue = cl::DeviceCommandQueue::makeDefault( + cl::Context::getDefault(), cl::Device::getDefault()); + + auto vectorAddKernel = + cl::KernelFunctor< + decltype(fooPointer)&, + int*, + cl::coarse_svm_vector&, + cl::Buffer, + int, + cl::Pipe&, + cl::DeviceCommandQueue + >(vectorAddProgram, "vectorAdd"); + + // Ensure that the additional SVM pointer is available to the kernel + // This one was not passed as a parameter + vectorAddKernel.setSVMPointers(anSVMInt); + + cl_int error; + vectorAddKernel( + cl::EnqueueArgs( + cl::NDRange(numElements/2), + cl::NDRange(numElements/2)), + fooPointer, + inputA.data(), + inputB, + outputBuffer, + 3, + aPipe, + defaultDeviceQueue, + error + ); + + cl::copy(outputBuffer, output.begin(), output.end()); + + cl::Device d = cl::Device::getDefault(); + + std::cout << "Output:\n"; + for (int i = 1; i < numElements; ++i) { + std::cout << "\t" << output[i] << "\n"; + } + std::cout << "\n\n"; + + return 0; + } * * \endcode * @@ -392,175 +393,185 @@ #ifndef CL_HPP_ #define CL_HPP_ +#if defined(LIBRAPID_GNU) || defined(LIBRAPID_CLANG) +# pragma GCC diagnostic push +# pragma GCC diagnostic ignored "-Wignored-attributes" +#endif + /* Handle deprecated preprocessor definitions. In each case, we only check for * the old name if the new name is not defined, so that user code can define * both and hence work with either version of the bindings. */ #if !defined(CL_HPP_USE_DX_INTEROP) && defined(USE_DX_INTEROP) -# pragma message("opencl.hpp: USE_DX_INTEROP is deprecated. Define CL_HPP_USE_DX_INTEROP instead") -# define CL_HPP_USE_DX_INTEROP +# pragma message( \ + "opencl.hpp: USE_DX_INTEROP is deprecated. Define CL_HPP_USE_DX_INTEROP instead") +# define CL_HPP_USE_DX_INTEROP #endif #if !defined(CL_HPP_ENABLE_EXCEPTIONS) && defined(__CL_ENABLE_EXCEPTIONS) -# pragma message("opencl.hpp: __CL_ENABLE_EXCEPTIONS is deprecated. Define CL_HPP_ENABLE_EXCEPTIONS instead") -# define CL_HPP_ENABLE_EXCEPTIONS +# pragma message( \ + "opencl.hpp: __CL_ENABLE_EXCEPTIONS is deprecated. Define CL_HPP_ENABLE_EXCEPTIONS instead") +# define CL_HPP_ENABLE_EXCEPTIONS #endif #if !defined(CL_HPP_NO_STD_VECTOR) && defined(__NO_STD_VECTOR) -# pragma message("opencl.hpp: __NO_STD_VECTOR is deprecated. Define CL_HPP_NO_STD_VECTOR instead") -# define CL_HPP_NO_STD_VECTOR +# pragma message( \ + "opencl.hpp: __NO_STD_VECTOR is deprecated. Define CL_HPP_NO_STD_VECTOR instead") +# define CL_HPP_NO_STD_VECTOR #endif #if !defined(CL_HPP_NO_STD_STRING) && defined(__NO_STD_STRING) -# pragma message("opencl.hpp: __NO_STD_STRING is deprecated. Define CL_HPP_NO_STD_STRING instead") -# define CL_HPP_NO_STD_STRING +# pragma message( \ + "opencl.hpp: __NO_STD_STRING is deprecated. Define CL_HPP_NO_STD_STRING instead") +# define CL_HPP_NO_STD_STRING #endif #if defined(VECTOR_CLASS) -# pragma message("opencl.hpp: VECTOR_CLASS is deprecated. Alias cl::vector instead") +# pragma message("opencl.hpp: VECTOR_CLASS is deprecated. Alias cl::vector instead") #endif #if defined(STRING_CLASS) -# pragma message("opencl.hpp: STRING_CLASS is deprecated. Alias cl::string instead.") +# pragma message("opencl.hpp: STRING_CLASS is deprecated. Alias cl::string instead.") #endif #if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) && defined(__CL_USER_OVERRIDE_ERROR_STRINGS) -# pragma message("opencl.hpp: __CL_USER_OVERRIDE_ERROR_STRINGS is deprecated. Define CL_HPP_USER_OVERRIDE_ERROR_STRINGS instead") -# define CL_HPP_USER_OVERRIDE_ERROR_STRINGS +# pragma message( \ + "opencl.hpp: __CL_USER_OVERRIDE_ERROR_STRINGS is deprecated. Define CL_HPP_USER_OVERRIDE_ERROR_STRINGS instead") +# define CL_HPP_USER_OVERRIDE_ERROR_STRINGS #endif /* Warn about features that are no longer supported */ #if defined(__USE_DEV_VECTOR) -# pragma message("opencl.hpp: __USE_DEV_VECTOR is no longer supported. Expect compilation errors") +# pragma message( \ + "opencl.hpp: __USE_DEV_VECTOR is no longer supported. Expect compilation errors") #endif #if defined(__USE_DEV_STRING) -# pragma message("opencl.hpp: __USE_DEV_STRING is no longer supported. Expect compilation errors") +# pragma message( \ + "opencl.hpp: __USE_DEV_STRING is no longer supported. Expect compilation errors") #endif /* Detect which version to target */ #if !defined(CL_HPP_TARGET_OPENCL_VERSION) -# pragma message("opencl.hpp: CL_HPP_TARGET_OPENCL_VERSION is not defined. It will default to 300 (OpenCL 3.0)") -# define CL_HPP_TARGET_OPENCL_VERSION 300 +# pragma message( \ + "opencl.hpp: CL_HPP_TARGET_OPENCL_VERSION is not defined. It will default to 300 (OpenCL 3.0)") +# define CL_HPP_TARGET_OPENCL_VERSION 300 #endif -#if CL_HPP_TARGET_OPENCL_VERSION != 100 && \ - CL_HPP_TARGET_OPENCL_VERSION != 110 && \ - CL_HPP_TARGET_OPENCL_VERSION != 120 && \ - CL_HPP_TARGET_OPENCL_VERSION != 200 && \ - CL_HPP_TARGET_OPENCL_VERSION != 210 && \ - CL_HPP_TARGET_OPENCL_VERSION != 220 && \ - CL_HPP_TARGET_OPENCL_VERSION != 300 -# pragma message("opencl.hpp: CL_HPP_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220 or 300). It will be set to 300 (OpenCL 3.0).") -# undef CL_HPP_TARGET_OPENCL_VERSION -# define CL_HPP_TARGET_OPENCL_VERSION 300 +#if CL_HPP_TARGET_OPENCL_VERSION != 100 && CL_HPP_TARGET_OPENCL_VERSION != 110 && \ + CL_HPP_TARGET_OPENCL_VERSION != 120 && CL_HPP_TARGET_OPENCL_VERSION != 200 && \ + CL_HPP_TARGET_OPENCL_VERSION != 210 && CL_HPP_TARGET_OPENCL_VERSION != 220 && \ + CL_HPP_TARGET_OPENCL_VERSION != 300 +# pragma message( \ + "opencl.hpp: CL_HPP_TARGET_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220 or 300). It will be set to 300 (OpenCL 3.0).") +# undef CL_HPP_TARGET_OPENCL_VERSION +# define CL_HPP_TARGET_OPENCL_VERSION 300 #endif /* Forward target OpenCL version to C headers if necessary */ #if defined(CL_TARGET_OPENCL_VERSION) /* Warn if prior definition of CL_TARGET_OPENCL_VERSION is lower than * requested C++ bindings version */ -#if CL_TARGET_OPENCL_VERSION < CL_HPP_TARGET_OPENCL_VERSION -# pragma message("CL_TARGET_OPENCL_VERSION is already defined as is lower than CL_HPP_TARGET_OPENCL_VERSION") -#endif +# if CL_TARGET_OPENCL_VERSION < CL_HPP_TARGET_OPENCL_VERSION +# pragma message( \ + "CL_TARGET_OPENCL_VERSION is already defined as is lower than CL_HPP_TARGET_OPENCL_VERSION") +# endif #else -# define CL_TARGET_OPENCL_VERSION CL_HPP_TARGET_OPENCL_VERSION +# define CL_TARGET_OPENCL_VERSION CL_HPP_TARGET_OPENCL_VERSION #endif #if !defined(CL_HPP_MINIMUM_OPENCL_VERSION) -# define CL_HPP_MINIMUM_OPENCL_VERSION 200 +# define CL_HPP_MINIMUM_OPENCL_VERSION 200 #endif -#if CL_HPP_MINIMUM_OPENCL_VERSION != 100 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 110 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 120 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 200 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 210 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 220 && \ - CL_HPP_MINIMUM_OPENCL_VERSION != 300 -# pragma message("opencl.hpp: CL_HPP_MINIMUM_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220 or 300). It will be set to 100") -# undef CL_HPP_MINIMUM_OPENCL_VERSION -# define CL_HPP_MINIMUM_OPENCL_VERSION 100 +#if CL_HPP_MINIMUM_OPENCL_VERSION != 100 && CL_HPP_MINIMUM_OPENCL_VERSION != 110 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 120 && CL_HPP_MINIMUM_OPENCL_VERSION != 200 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 210 && CL_HPP_MINIMUM_OPENCL_VERSION != 220 && \ + CL_HPP_MINIMUM_OPENCL_VERSION != 300 +# pragma message( \ + "opencl.hpp: CL_HPP_MINIMUM_OPENCL_VERSION is not a valid value (100, 110, 120, 200, 210, 220 or 300). It will be set to 100") +# undef CL_HPP_MINIMUM_OPENCL_VERSION +# define CL_HPP_MINIMUM_OPENCL_VERSION 100 #endif #if CL_HPP_MINIMUM_OPENCL_VERSION > CL_HPP_TARGET_OPENCL_VERSION -# error "CL_HPP_MINIMUM_OPENCL_VERSION must not be greater than CL_HPP_TARGET_OPENCL_VERSION" +# error "CL_HPP_MINIMUM_OPENCL_VERSION must not be greater than CL_HPP_TARGET_OPENCL_VERSION" #endif #if CL_HPP_MINIMUM_OPENCL_VERSION <= 100 && !defined(CL_USE_DEPRECATED_OPENCL_1_0_APIS) -# define CL_USE_DEPRECATED_OPENCL_1_0_APIS +# define CL_USE_DEPRECATED_OPENCL_1_0_APIS #endif #if CL_HPP_MINIMUM_OPENCL_VERSION <= 110 && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -# define CL_USE_DEPRECATED_OPENCL_1_1_APIS +# define CL_USE_DEPRECATED_OPENCL_1_1_APIS #endif #if CL_HPP_MINIMUM_OPENCL_VERSION <= 120 && !defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) -# define CL_USE_DEPRECATED_OPENCL_1_2_APIS +# define CL_USE_DEPRECATED_OPENCL_1_2_APIS #endif #if CL_HPP_MINIMUM_OPENCL_VERSION <= 200 && !defined(CL_USE_DEPRECATED_OPENCL_2_0_APIS) -# define CL_USE_DEPRECATED_OPENCL_2_0_APIS +# define CL_USE_DEPRECATED_OPENCL_2_0_APIS #endif #if CL_HPP_MINIMUM_OPENCL_VERSION <= 210 && !defined(CL_USE_DEPRECATED_OPENCL_2_1_APIS) -# define CL_USE_DEPRECATED_OPENCL_2_1_APIS +# define CL_USE_DEPRECATED_OPENCL_2_1_APIS #endif #if CL_HPP_MINIMUM_OPENCL_VERSION <= 220 && !defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) -# define CL_USE_DEPRECATED_OPENCL_2_2_APIS +# define CL_USE_DEPRECATED_OPENCL_2_2_APIS #endif #ifdef _WIN32 -#include +# include -#if defined(CL_HPP_USE_DX_INTEROP) -#include -#include -#endif +# if defined(CL_HPP_USE_DX_INTEROP) +# include +# include +# endif #endif // _WIN32 #if defined(_MSC_VER) -#include -#endif // _MSC_VER - - // Check for a valid C++ version +# include +#endif // _MSC_VER + +// Check for a valid C++ version -// Need to do both tests here because for some reason __cplusplus is not +// Need to do both tests here because for some reason __cplusplus is not // updated in visual studio #if (!defined(_MSC_VER) && __cplusplus < 201103L) || (defined(_MSC_VER) && _MSC_VER < 1700) -#error Visual studio 2013 or another C++11-supporting compiler required +# error Visual studio 2013 or another C++11-supporting compiler required #endif #if defined(__APPLE__) || defined(__MACOSX) -#include +# include #else -#include +# include #endif // !__APPLE__ #if __cplusplus >= 201703L -# define CL_HPP_DEFINE_STATIC_MEMBER_ inline +# define CL_HPP_DEFINE_STATIC_MEMBER_ inline #elif defined(_MSC_VER) -# define CL_HPP_DEFINE_STATIC_MEMBER_ __declspec(selectany) +# define CL_HPP_DEFINE_STATIC_MEMBER_ __declspec(selectany) #elif defined(__MINGW32__) -# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((selectany)) +# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((selectany)) #else -# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((weak)) +# define CL_HPP_DEFINE_STATIC_MEMBER_ __attribute__((weak)) #endif // !_MSC_VER // Define deprecated prefixes and suffixes to ensure compilation // in case they are not pre-defined #if !defined(CL_API_PREFIX__VERSION_1_1_DEPRECATED) -#define CL_API_PREFIX__VERSION_1_1_DEPRECATED +# define CL_API_PREFIX__VERSION_1_1_DEPRECATED #endif // #if !defined(CL_API_PREFIX__VERSION_1_1_DEPRECATED) #if !defined(CL_API_SUFFIX__VERSION_1_1_DEPRECATED) -#define CL_API_SUFFIX__VERSION_1_1_DEPRECATED +# define CL_API_SUFFIX__VERSION_1_1_DEPRECATED #endif // #if !defined(CL_API_SUFFIX__VERSION_1_1_DEPRECATED) #if !defined(CL_API_PREFIX__VERSION_1_2_DEPRECATED) -#define CL_API_PREFIX__VERSION_1_2_DEPRECATED +# define CL_API_PREFIX__VERSION_1_2_DEPRECATED #endif // #if !defined(CL_API_PREFIX__VERSION_1_2_DEPRECATED) #if !defined(CL_API_SUFFIX__VERSION_1_2_DEPRECATED) -#define CL_API_SUFFIX__VERSION_1_2_DEPRECATED +# define CL_API_SUFFIX__VERSION_1_2_DEPRECATED #endif // #if !defined(CL_API_SUFFIX__VERSION_1_2_DEPRECATED) #if !defined(CL_API_PREFIX__VERSION_2_2_DEPRECATED) -#define CL_API_PREFIX__VERSION_2_2_DEPRECATED +# define CL_API_PREFIX__VERSION_2_2_DEPRECATED #endif // #if !defined(CL_API_PREFIX__VERSION_2_2_DEPRECATED) #if !defined(CL_API_SUFFIX__VERSION_2_2_DEPRECATED) -#define CL_API_SUFFIX__VERSION_2_2_DEPRECATED +# define CL_API_SUFFIX__VERSION_2_2_DEPRECATED #endif // #if !defined(CL_API_SUFFIX__VERSION_2_2_DEPRECATED) #if !defined(CL_CALLBACK) -#define CL_CALLBACK -#endif //CL_CALLBACK +# define CL_CALLBACK +#endif // CL_CALLBACK #include #include @@ -569,55 +580,53 @@ #include #include - // Define a size_type to represent a correctly resolved size_t #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) namespace cl { - using size_type = ::size_t; + using size_type = ::size_t; } // namespace cl -#else // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) +#else // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) namespace cl { - using size_type = size_t; + using size_type = size_t; } // namespace cl #endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) - #if defined(CL_HPP_ENABLE_EXCEPTIONS) -#include +# include #endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) #if !defined(CL_HPP_NO_STD_VECTOR) -#include +# include namespace cl { - template < class T, class Alloc = std::allocator > - using vector = std::vector; + template> + using vector = std::vector; } // namespace cl #endif // #if !defined(CL_HPP_NO_STD_VECTOR) #if !defined(CL_HPP_NO_STD_STRING) -#include +# include namespace cl { - using string = std::string; + using string = std::string; } // namespace cl #endif // #if !defined(CL_HPP_NO_STD_STRING) #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) -#include +# if !defined(CL_HPP_NO_STD_UNIQUE_PTR) +# include namespace cl { - // Replace unique_ptr and allocate_pointer for internal use - // to allow user to replace them - template - using pointer = std::unique_ptr; + // Replace unique_ptr and allocate_pointer for internal use + // to allow user to replace them + template + using pointer = std::unique_ptr; } // namespace cl -#endif +# endif #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 #if !defined(CL_HPP_NO_STD_ARRAY) -#include +# include namespace cl { - template < class T, size_type N > - using array = std::array; + template + using array = std::array; } // namespace cl #endif // #if !defined(CL_HPP_NO_STD_ARRAY) @@ -625,73 +634,54 @@ namespace cl { // use of the old size_t interface class #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) namespace cl { - namespace compatibility { - /*! \brief class used to interface between C++ and - * OpenCL C calls that require arrays of size_t values, whose - * size is known statically. - */ - template - class size_t - { - private: - size_type data_[N]; - - public: - //! \brief Initialize size_t to all 0s - size_t() - { - for (int i = 0; i < N; ++i) { - data_[i] = 0; - } - } - - size_t(const array &rhs) - { - for (int i = 0; i < N; ++i) { - data_[i] = rhs[i]; - } - } - - size_type& operator[](int index) - { - return data_[index]; - } - - const size_type& operator[](int index) const - { - return data_[index]; - } - - //! \brief Conversion operator to T*. - operator size_type* () { return data_; } - - //! \brief Conversion operator to const T*. - operator const size_type* () const { return data_; } - - operator array() const - { - array ret; - - for (int i = 0; i < N; ++i) { - ret[i] = data_[i]; - } - return ret; - } - }; - } // namespace compatibility - - template - using size_t = compatibility::size_t; + namespace compatibility { + /*! \brief class used to interface between C++ and + * OpenCL C calls that require arrays of size_t values, whose + * size is known statically. + */ + template + class size_t { + private: + size_type data_[N]; + + public: + //! \brief Initialize size_t to all 0s + size_t() { + for (int i = 0; i < N; ++i) { data_[i] = 0; } + } + + size_t(const array &rhs) { + for (int i = 0; i < N; ++i) { data_[i] = rhs[i]; } + } + + size_type &operator[](int index) { return data_[index]; } + + const size_type &operator[](int index) const { return data_[index]; } + + //! \brief Conversion operator to T*. + operator size_type *() { return data_; } + + //! \brief Conversion operator to const T*. + operator const size_type *() const { return data_; } + + operator array() const { + array ret; + + for (int i = 0; i < N; ++i) { ret[i] = data_[i]; } + return ret; + } + }; + } // namespace compatibility + + template + using size_t = compatibility::size_t; } // namespace cl #endif // #if defined(CL_HPP_ENABLE_SIZE_T_COMPATIBILITY) // Helper alias to avoid confusing the macros -namespace cl { - namespace detail { - using size_t_array = array; - } // namespace detail -} // namespace cl - +namespace cl { namespace detail { + using size_t_array = array; +}} // namespace cl::detail /*! \namespace cl * @@ -700,10985 +690,9179 @@ namespace cl { */ namespace cl { -#define CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(name) \ - using PFN_##name = name##_fn +#define CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(name) using PFN_##name = name##_fn -#define CL_HPP_INIT_CL_EXT_FCN_PTR_(name) \ - if (!pfn_##name) { \ - pfn_##name = (PFN_##name)clGetExtensionFunctionAddress(#name); \ - } +#define CL_HPP_INIT_CL_EXT_FCN_PTR_(name) \ + if (!pfn_##name) { pfn_##name = (PFN_##name)clGetExtensionFunctionAddress(#name); } -#define CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, name) \ - if (!pfn_##name) { \ - pfn_##name = (PFN_##name) \ - clGetExtensionFunctionAddressForPlatform(platform, #name); \ - } +#define CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, name) \ + if (!pfn_##name) { \ + pfn_##name = (PFN_##name)clGetExtensionFunctionAddressForPlatform(platform, #name); \ + } #ifdef cl_khr_external_memory - enum class ExternalMemoryType : cl_external_memory_handle_type_khr; + enum class ExternalMemoryType : cl_external_memory_handle_type_khr; #endif - class Memory; - class Platform; - class Program; - class Device; - class Context; - class CommandQueue; - class DeviceCommandQueue; - class Memory; - class Buffer; - class Pipe; + class Memory; + class Platform; + class Program; + class Device; + class Context; + class CommandQueue; + class DeviceCommandQueue; + class Memory; + class Buffer; + class Pipe; #ifdef cl_khr_semaphore - class Semaphore; + class Semaphore; #endif #if defined(cl_khr_command_buffer) - class CommandBufferKhr; - class MutableCommandKhr; + class CommandBufferKhr; + class MutableCommandKhr; #endif // cl_khr_command_buffer #if defined(CL_HPP_ENABLE_EXCEPTIONS) - /*! \brief Exception class - * - * This may be thrown by API functions when CL_HPP_ENABLE_EXCEPTIONS is defined. - */ - class Error : public std::exception - { - private: - cl_int err_; - const char * errStr_; - public: - /*! \brief Create a new CL error exception for a given error code - * and corresponding message. - * - * \param err error code value. - * - * \param errStr a descriptive string that must remain in scope until - * handling of the exception has concluded. If set, it - * will be returned by what(). - */ - Error(cl_int err, const char * errStr = nullptr) : err_(err), errStr_(errStr) - {} - - /*! \brief Get error string associated with exception - * - * \return A memory pointer to the error message string. - */ - const char * what() const noexcept override - { - if (errStr_ == nullptr) { - return "empty"; - } - else { - return errStr_; - } - } - - /*! \brief Get error code associated with exception - * - * \return The error code. - */ - cl_int err(void) const { return err_; } - }; -#define CL_HPP_ERR_STR_(x) #x + /*! \brief Exception class + * + * This may be thrown by API functions when CL_HPP_ENABLE_EXCEPTIONS is defined. + */ + class Error : public std::exception { + private: + cl_int err_; + const char *errStr_; + + public: + /*! \brief Create a new CL error exception for a given error code + * and corresponding message. + * + * \param err error code value. + * + * \param errStr a descriptive string that must remain in scope until + * handling of the exception has concluded. If set, it + * will be returned by what(). + */ + Error(cl_int err, const char *errStr = nullptr) : err_(err), errStr_(errStr) {} + + /*! \brief Get error string associated with exception + * + * \return A memory pointer to the error message string. + */ + const char *what() const noexcept override { + if (errStr_ == nullptr) { + return "empty"; + } else { + return errStr_; + } + } + + /*! \brief Get error code associated with exception + * + * \return The error code. + */ + cl_int err(void) const { return err_; } + }; +# define CL_HPP_ERR_STR_(x) #x #else -#define CL_HPP_ERR_STR_(x) nullptr +# define CL_HPP_ERR_STR_(x) nullptr #endif // CL_HPP_ENABLE_EXCEPTIONS - -namespace detail -{ + namespace detail { #if defined(CL_HPP_ENABLE_EXCEPTIONS) -static inline cl_int errHandler ( - cl_int err, - const char * errStr = nullptr) -{ - if (err != CL_SUCCESS) { - throw Error(err, errStr); - } - return err; -} + static inline cl_int errHandler(cl_int err, const char *errStr = nullptr) { + if (err != CL_SUCCESS) { throw Error(err, errStr); } + return err; + } #else -static inline cl_int errHandler (cl_int err, const char * errStr = nullptr) -{ - (void) errStr; // suppress unused variable warning - return err; -} + static inline cl_int errHandler(cl_int err, const char *errStr = nullptr) { + (void)errStr; // suppress unused variable warning + return err; + } #endif // CL_HPP_ENABLE_EXCEPTIONS -} - - + } // namespace detail //! \cond DOXYGEN_DETAIL #if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) -#define __GET_DEVICE_INFO_ERR CL_HPP_ERR_STR_(clGetDeviceInfo) -#define __GET_PLATFORM_INFO_ERR CL_HPP_ERR_STR_(clGetPlatformInfo) -#define __GET_DEVICE_IDS_ERR CL_HPP_ERR_STR_(clGetDeviceIDs) -#define __GET_PLATFORM_IDS_ERR CL_HPP_ERR_STR_(clGetPlatformIDs) -#define __GET_CONTEXT_INFO_ERR CL_HPP_ERR_STR_(clGetContextInfo) -#define __GET_EVENT_INFO_ERR CL_HPP_ERR_STR_(clGetEventInfo) -#define __GET_EVENT_PROFILE_INFO_ERR CL_HPP_ERR_STR_(clGetEventProfileInfo) -#define __GET_MEM_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetMemObjectInfo) -#define __GET_IMAGE_INFO_ERR CL_HPP_ERR_STR_(clGetImageInfo) -#define __GET_SAMPLER_INFO_ERR CL_HPP_ERR_STR_(clGetSamplerInfo) -#define __GET_KERNEL_INFO_ERR CL_HPP_ERR_STR_(clGetKernelInfo) -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __GET_KERNEL_ARG_INFO_ERR CL_HPP_ERR_STR_(clGetKernelArgInfo) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __GET_KERNEL_SUB_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelSubGroupInfo) -#else -#define __GET_KERNEL_SUB_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelSubGroupInfoKHR) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __GET_KERNEL_WORK_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelWorkGroupInfo) -#define __GET_PROGRAM_INFO_ERR CL_HPP_ERR_STR_(clGetProgramInfo) -#define __GET_PROGRAM_BUILD_INFO_ERR CL_HPP_ERR_STR_(clGetProgramBuildInfo) -#define __GET_COMMAND_QUEUE_INFO_ERR CL_HPP_ERR_STR_(clGetCommandQueueInfo) - -#define __CREATE_CONTEXT_ERR CL_HPP_ERR_STR_(clCreateContext) -#define __CREATE_CONTEXT_FROM_TYPE_ERR CL_HPP_ERR_STR_(clCreateContextFromType) -#define __GET_SUPPORTED_IMAGE_FORMATS_ERR CL_HPP_ERR_STR_(clGetSupportedImageFormats) -#if CL_HPP_TARGET_OPENCL_VERSION >= 300 -#define __SET_CONTEXT_DESCTRUCTOR_CALLBACK_ERR CL_HPP_ERR_STR_(clSetContextDestructorCallback) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 300 - -#define __CREATE_BUFFER_ERR CL_HPP_ERR_STR_(clCreateBuffer) -#define __COPY_ERR CL_HPP_ERR_STR_(cl::copy) -#define __CREATE_SUBBUFFER_ERR CL_HPP_ERR_STR_(clCreateSubBuffer) -#define __CREATE_GL_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) -#define __CREATE_GL_RENDER_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) -#define __GET_GL_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetGLObjectInfo) -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __CREATE_IMAGE_ERR CL_HPP_ERR_STR_(clCreateImage) -#define __CREATE_GL_TEXTURE_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture) -#define __IMAGE_DIMENSION_ERR CL_HPP_ERR_STR_(Incorrect image dimensions) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR CL_HPP_ERR_STR_(clSetMemObjectDestructorCallback) - -#define __CREATE_USER_EVENT_ERR CL_HPP_ERR_STR_(clCreateUserEvent) -#define __SET_USER_EVENT_STATUS_ERR CL_HPP_ERR_STR_(clSetUserEventStatus) -#define __SET_EVENT_CALLBACK_ERR CL_HPP_ERR_STR_(clSetEventCallback) -#define __WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clWaitForEvents) - -#define __CREATE_KERNEL_ERR CL_HPP_ERR_STR_(clCreateKernel) -#define __SET_KERNEL_ARGS_ERR CL_HPP_ERR_STR_(clSetKernelArg) -#define __CREATE_PROGRAM_WITH_SOURCE_ERR CL_HPP_ERR_STR_(clCreateProgramWithSource) -#define __CREATE_PROGRAM_WITH_BINARY_ERR CL_HPP_ERR_STR_(clCreateProgramWithBinary) -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithIL) -#else -#define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithILKHR) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR CL_HPP_ERR_STR_(clCreateProgramWithBuiltInKernels) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __BUILD_PROGRAM_ERR CL_HPP_ERR_STR_(clBuildProgram) -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __COMPILE_PROGRAM_ERR CL_HPP_ERR_STR_(clCompileProgram) -#define __LINK_PROGRAM_ERR CL_HPP_ERR_STR_(clLinkProgram) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __CREATE_KERNELS_IN_PROGRAM_ERR CL_HPP_ERR_STR_(clCreateKernelsInProgram) - -#if CL_HPP_TARGET_OPENCL_VERSION >= 200 -#define __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateCommandQueueWithProperties) -#define __CREATE_SAMPLER_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateSamplerWithProperties) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 -#define __SET_COMMAND_QUEUE_PROPERTY_ERR CL_HPP_ERR_STR_(clSetCommandQueueProperty) -#define __ENQUEUE_READ_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueReadBuffer) -#define __ENQUEUE_READ_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueReadBufferRect) -#define __ENQUEUE_WRITE_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueWriteBuffer) -#define __ENQUEUE_WRITE_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueWriteBufferRect) -#define __ENQEUE_COPY_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyBuffer) -#define __ENQEUE_COPY_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferRect) -#define __ENQUEUE_FILL_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueFillBuffer) -#define __ENQUEUE_READ_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueReadImage) -#define __ENQUEUE_WRITE_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueWriteImage) -#define __ENQUEUE_COPY_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyImage) -#define __ENQUEUE_FILL_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueFillImage) -#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyImageToBuffer) -#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferToImage) -#define __ENQUEUE_MAP_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueMapBuffer) -#define __ENQUEUE_MAP_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMap) -#define __ENQUEUE_FILL_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMemFill) -#define __ENQUEUE_COPY_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMemcpy) -#define __ENQUEUE_UNMAP_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMUnmap) -#define __ENQUEUE_MAP_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueMapImage) -#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR CL_HPP_ERR_STR_(clEnqueueUnMapMemObject) -#define __ENQUEUE_NDRANGE_KERNEL_ERR CL_HPP_ERR_STR_(clEnqueueNDRangeKernel) -#define __ENQUEUE_NATIVE_KERNEL CL_HPP_ERR_STR_(clEnqueueNativeKernel) -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR CL_HPP_ERR_STR_(clEnqueueMigrateMemObjects) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __ENQUEUE_MIGRATE_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMigrateMem) -#define __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clSetDefaultDeviceCommandQueue) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 - - -#define __ENQUEUE_ACQUIRE_GL_ERR CL_HPP_ERR_STR_(clEnqueueAcquireGLObjects) -#define __ENQUEUE_RELEASE_GL_ERR CL_HPP_ERR_STR_(clEnqueueReleaseGLObjects) - -#define __CREATE_PIPE_ERR CL_HPP_ERR_STR_(clCreatePipe) -#define __GET_PIPE_INFO_ERR CL_HPP_ERR_STR_(clGetPipeInfo) - -#define __RETAIN_ERR CL_HPP_ERR_STR_(Retain Object) -#define __RELEASE_ERR CL_HPP_ERR_STR_(Release Object) -#define __FLUSH_ERR CL_HPP_ERR_STR_(clFlush) -#define __FINISH_ERR CL_HPP_ERR_STR_(clFinish) -#define __VECTOR_CAPACITY_ERR CL_HPP_ERR_STR_(Vector capacity error) - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __GET_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetHostTimer) -#define __GET_DEVICE_AND_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetDeviceAndHostTimer) -#endif -#if CL_HPP_TARGET_OPENCL_VERSION >= 220 -#define __SET_PROGRAM_RELEASE_CALLBACK_ERR CL_HPP_ERR_STR_(clSetProgramReleaseCallback) -#define __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR CL_HPP_ERR_STR_(clSetProgramSpecializationConstant) -#endif - -#ifdef cl_khr_external_memory -#define __ENQUEUE_ACQUIRE_EXTERNAL_MEMORY_ERR CL_HPP_ERR_STR_(clEnqueueAcquireExternalMemObjectsKHR) -#define __ENQUEUE_RELEASE_EXTERNAL_MEMORY_ERR CL_HPP_ERR_STR_(clEnqueueReleaseExternalMemObjectsKHR) -#endif - -#ifdef cl_khr_semaphore -#define __GET_SEMAPHORE_KHR_INFO_ERR CL_HPP_ERR_STR_(clGetSemaphoreInfoKHR) -#define __CREATE_SEMAPHORE_KHR_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateSemaphoreWithPropertiesKHR) -#define __ENQUEUE_WAIT_SEMAPHORE_KHR_ERR CL_HPP_ERR_STR_(clEnqueueWaitSemaphoresKHR) -#define __ENQUEUE_SIGNAL_SEMAPHORE_KHR_ERR CL_HPP_ERR_STR_(clEnqueueSignalSemaphoresKHR) -#define __RETAIN_SEMAPHORE_KHR_ERR CL_HPP_ERR_STR_(clRetainSemaphoreKHR) -#define __RELEASE_SEMAPHORE_KHR_ERR CL_HPP_ERR_STR_(clReleaseSemaphoreKHR) -#endif - -#ifdef cl_khr_external_semaphore -#define __GET_SEMAPHORE_HANDLE_FOR_TYPE_KHR_ERR CL_HPP_ERR_STR_(clGetSemaphoreHandleForTypeKHR) -#endif // cl_khr_external_semaphore - -#if defined(cl_khr_command_buffer) -#define __CREATE_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clCreateCommandBufferKHR) -#define __GET_COMMAND_BUFFER_INFO_KHR_ERR CL_HPP_ERR_STR_(clGetCommandBufferInfoKHR) -#define __FINALIZE_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clFinalizeCommandBufferKHR) -#define __ENQUEUE_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clEnqueueCommandBufferKHR) -#define __COMMAND_BARRIER_WITH_WAIT_LIST_KHR_ERR CL_HPP_ERR_STR_(clCommandBarrierWithWaitListKHR) -#define __COMMAND_COPY_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clCommandCopyBufferKHR) -#define __COMMAND_COPY_BUFFER_RECT_KHR_ERR CL_HPP_ERR_STR_(clCommandCopyBufferRectKHR) -#define __COMMAND_COPY_BUFFER_TO_IMAGE_KHR_ERR CL_HPP_ERR_STR_(clCommandCopyBufferToImageKHR) -#define __COMMAND_COPY_IMAGE_KHR_ERR CL_HPP_ERR_STR_(clCommandCopyImageKHR) -#define __COMMAND_COPY_IMAGE_TO_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clCommandCopyImageToBufferKHR) -#define __COMMAND_FILL_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clCommandFillBufferKHR) -#define __COMMAND_FILL_IMAGE_KHR_ERR CL_HPP_ERR_STR_(clCommandFillImageKHR) -#define __COMMAND_NDRANGE_KERNEL_KHR_ERR CL_HPP_ERR_STR_(clCommandNDRangeKernelKHR) -#define __UPDATE_MUTABLE_COMMANDS_KHR_ERR CL_HPP_ERR_STR_(clUpdateMutableCommandsKHR) -#define __GET_MUTABLE_COMMAND_INFO_KHR_ERR CL_HPP_ERR_STR_(clGetMutableCommandInfoKHR) -#define __RETAIN_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clRetainCommandBufferKHR) -#define __RELEASE_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clReleaseCommandBufferKHR) -#endif // cl_khr_command_buffer - -#if defined(cl_ext_image_requirements_info) -#define __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR CL_HPP_ERR_STR_(clGetImageRequirementsInfoEXT) -#endif //cl_ext_image_requirements_info +# define __GET_DEVICE_INFO_ERR CL_HPP_ERR_STR_(clGetDeviceInfo) +# define __GET_PLATFORM_INFO_ERR CL_HPP_ERR_STR_(clGetPlatformInfo) +# define __GET_DEVICE_IDS_ERR CL_HPP_ERR_STR_(clGetDeviceIDs) +# define __GET_PLATFORM_IDS_ERR CL_HPP_ERR_STR_(clGetPlatformIDs) +# define __GET_CONTEXT_INFO_ERR CL_HPP_ERR_STR_(clGetContextInfo) +# define __GET_EVENT_INFO_ERR CL_HPP_ERR_STR_(clGetEventInfo) +# define __GET_EVENT_PROFILE_INFO_ERR CL_HPP_ERR_STR_(clGetEventProfileInfo) +# define __GET_MEM_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetMemObjectInfo) +# define __GET_IMAGE_INFO_ERR CL_HPP_ERR_STR_(clGetImageInfo) +# define __GET_SAMPLER_INFO_ERR CL_HPP_ERR_STR_(clGetSamplerInfo) +# define __GET_KERNEL_INFO_ERR CL_HPP_ERR_STR_(clGetKernelInfo) +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __GET_KERNEL_ARG_INFO_ERR CL_HPP_ERR_STR_(clGetKernelArgInfo) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +# if CL_HPP_TARGET_OPENCL_VERSION >= 210 +# define __GET_KERNEL_SUB_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelSubGroupInfo) +# else +# define __GET_KERNEL_SUB_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelSubGroupInfoKHR) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 +# define __GET_KERNEL_WORK_GROUP_INFO_ERR CL_HPP_ERR_STR_(clGetKernelWorkGroupInfo) +# define __GET_PROGRAM_INFO_ERR CL_HPP_ERR_STR_(clGetProgramInfo) +# define __GET_PROGRAM_BUILD_INFO_ERR CL_HPP_ERR_STR_(clGetProgramBuildInfo) +# define __GET_COMMAND_QUEUE_INFO_ERR CL_HPP_ERR_STR_(clGetCommandQueueInfo) + +# define __CREATE_CONTEXT_ERR CL_HPP_ERR_STR_(clCreateContext) +# define __CREATE_CONTEXT_FROM_TYPE_ERR CL_HPP_ERR_STR_(clCreateContextFromType) +# define __GET_SUPPORTED_IMAGE_FORMATS_ERR CL_HPP_ERR_STR_(clGetSupportedImageFormats) +# if CL_HPP_TARGET_OPENCL_VERSION >= 300 +# define __SET_CONTEXT_DESCTRUCTOR_CALLBACK_ERR \ + CL_HPP_ERR_STR_(clSetContextDestructorCallback) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 300 + +# define __CREATE_BUFFER_ERR CL_HPP_ERR_STR_(clCreateBuffer) +# define __COPY_ERR CL_HPP_ERR_STR_(cl::copy) +# define __CREATE_SUBBUFFER_ERR CL_HPP_ERR_STR_(clCreateSubBuffer) +# define __CREATE_GL_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) +# define __CREATE_GL_RENDER_BUFFER_ERR CL_HPP_ERR_STR_(clCreateFromGLBuffer) +# define __GET_GL_OBJECT_INFO_ERR CL_HPP_ERR_STR_(clGetGLObjectInfo) +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __CREATE_IMAGE_ERR CL_HPP_ERR_STR_(clCreateImage) +# define __CREATE_GL_TEXTURE_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture) +# define __IMAGE_DIMENSION_ERR CL_HPP_ERR_STR_(Incorrect image dimensions) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR \ + CL_HPP_ERR_STR_(clSetMemObjectDestructorCallback) + +# define __CREATE_USER_EVENT_ERR CL_HPP_ERR_STR_(clCreateUserEvent) +# define __SET_USER_EVENT_STATUS_ERR CL_HPP_ERR_STR_(clSetUserEventStatus) +# define __SET_EVENT_CALLBACK_ERR CL_HPP_ERR_STR_(clSetEventCallback) +# define __WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clWaitForEvents) + +# define __CREATE_KERNEL_ERR CL_HPP_ERR_STR_(clCreateKernel) +# define __SET_KERNEL_ARGS_ERR CL_HPP_ERR_STR_(clSetKernelArg) +# define __CREATE_PROGRAM_WITH_SOURCE_ERR CL_HPP_ERR_STR_(clCreateProgramWithSource) +# define __CREATE_PROGRAM_WITH_BINARY_ERR CL_HPP_ERR_STR_(clCreateProgramWithBinary) +# if CL_HPP_TARGET_OPENCL_VERSION >= 210 +# define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithIL) +# else +# define __CREATE_PROGRAM_WITH_IL_ERR CL_HPP_ERR_STR_(clCreateProgramWithILKHR) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR \ + CL_HPP_ERR_STR_(clCreateProgramWithBuiltInKernels) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __BUILD_PROGRAM_ERR CL_HPP_ERR_STR_(clBuildProgram) +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __COMPILE_PROGRAM_ERR CL_HPP_ERR_STR_(clCompileProgram) +# define __LINK_PROGRAM_ERR CL_HPP_ERR_STR_(clLinkProgram) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __CREATE_KERNELS_IN_PROGRAM_ERR CL_HPP_ERR_STR_(clCreateKernelsInProgram) + +# if CL_HPP_TARGET_OPENCL_VERSION >= 200 +# define __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR \ + CL_HPP_ERR_STR_(clCreateCommandQueueWithProperties) +# define __CREATE_SAMPLER_WITH_PROPERTIES_ERR CL_HPP_ERR_STR_(clCreateSamplerWithProperties) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 +# define __SET_COMMAND_QUEUE_PROPERTY_ERR CL_HPP_ERR_STR_(clSetCommandQueueProperty) +# define __ENQUEUE_READ_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueReadBuffer) +# define __ENQUEUE_READ_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueReadBufferRect) +# define __ENQUEUE_WRITE_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueWriteBuffer) +# define __ENQUEUE_WRITE_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueWriteBufferRect) +# define __ENQEUE_COPY_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyBuffer) +# define __ENQEUE_COPY_BUFFER_RECT_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferRect) +# define __ENQUEUE_FILL_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueFillBuffer) +# define __ENQUEUE_READ_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueReadImage) +# define __ENQUEUE_WRITE_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueWriteImage) +# define __ENQUEUE_COPY_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyImage) +# define __ENQUEUE_FILL_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueFillImage) +# define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueCopyImageToBuffer) +# define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueCopyBufferToImage) +# define __ENQUEUE_MAP_BUFFER_ERR CL_HPP_ERR_STR_(clEnqueueMapBuffer) +# define __ENQUEUE_MAP_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMap) +# define __ENQUEUE_FILL_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMemFill) +# define __ENQUEUE_COPY_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMemcpy) +# define __ENQUEUE_UNMAP_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMUnmap) +# define __ENQUEUE_MAP_IMAGE_ERR CL_HPP_ERR_STR_(clEnqueueMapImage) +# define __ENQUEUE_UNMAP_MEM_OBJECT_ERR CL_HPP_ERR_STR_(clEnqueueUnMapMemObject) +# define __ENQUEUE_NDRANGE_KERNEL_ERR CL_HPP_ERR_STR_(clEnqueueNDRangeKernel) +# define __ENQUEUE_NATIVE_KERNEL CL_HPP_ERR_STR_(clEnqueueNativeKernel) +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR CL_HPP_ERR_STR_(clEnqueueMigrateMemObjects) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +# if CL_HPP_TARGET_OPENCL_VERSION >= 210 +# define __ENQUEUE_MIGRATE_SVM_ERR CL_HPP_ERR_STR_(clEnqueueSVMMigrateMem) +# define __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR \ + CL_HPP_ERR_STR_(clSetDefaultDeviceCommandQueue) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 + +# define __ENQUEUE_ACQUIRE_GL_ERR CL_HPP_ERR_STR_(clEnqueueAcquireGLObjects) +# define __ENQUEUE_RELEASE_GL_ERR CL_HPP_ERR_STR_(clEnqueueReleaseGLObjects) + +# define __CREATE_PIPE_ERR CL_HPP_ERR_STR_(clCreatePipe) +# define __GET_PIPE_INFO_ERR CL_HPP_ERR_STR_(clGetPipeInfo) + +# define __RETAIN_ERR CL_HPP_ERR_STR_(Retain Object) +# define __RELEASE_ERR CL_HPP_ERR_STR_(Release Object) +# define __FLUSH_ERR CL_HPP_ERR_STR_(clFlush) +# define __FINISH_ERR CL_HPP_ERR_STR_(clFinish) +# define __VECTOR_CAPACITY_ERR CL_HPP_ERR_STR_(Vector capacity error) + +# if CL_HPP_TARGET_OPENCL_VERSION >= 210 +# define __GET_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetHostTimer) +# define __GET_DEVICE_AND_HOST_TIMER_ERR CL_HPP_ERR_STR_(clGetDeviceAndHostTimer) +# endif +# if CL_HPP_TARGET_OPENCL_VERSION >= 220 +# define __SET_PROGRAM_RELEASE_CALLBACK_ERR CL_HPP_ERR_STR_(clSetProgramReleaseCallback) +# define __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR \ + CL_HPP_ERR_STR_(clSetProgramSpecializationConstant) +# endif + +# ifdef cl_khr_external_memory +# define __ENQUEUE_ACQUIRE_EXTERNAL_MEMORY_ERR \ + CL_HPP_ERR_STR_(clEnqueueAcquireExternalMemObjectsKHR) +# define __ENQUEUE_RELEASE_EXTERNAL_MEMORY_ERR \ + CL_HPP_ERR_STR_(clEnqueueReleaseExternalMemObjectsKHR) +# endif + +# ifdef cl_khr_semaphore +# define __GET_SEMAPHORE_KHR_INFO_ERR CL_HPP_ERR_STR_(clGetSemaphoreInfoKHR) +# define __CREATE_SEMAPHORE_KHR_WITH_PROPERTIES_ERR \ + CL_HPP_ERR_STR_(clCreateSemaphoreWithPropertiesKHR) +# define __ENQUEUE_WAIT_SEMAPHORE_KHR_ERR CL_HPP_ERR_STR_(clEnqueueWaitSemaphoresKHR) +# define __ENQUEUE_SIGNAL_SEMAPHORE_KHR_ERR CL_HPP_ERR_STR_(clEnqueueSignalSemaphoresKHR) +# define __RETAIN_SEMAPHORE_KHR_ERR CL_HPP_ERR_STR_(clRetainSemaphoreKHR) +# define __RELEASE_SEMAPHORE_KHR_ERR CL_HPP_ERR_STR_(clReleaseSemaphoreKHR) +# endif + +# ifdef cl_khr_external_semaphore +# define __GET_SEMAPHORE_HANDLE_FOR_TYPE_KHR_ERR \ + CL_HPP_ERR_STR_(clGetSemaphoreHandleForTypeKHR) +# endif // cl_khr_external_semaphore + +# if defined(cl_khr_command_buffer) +# define __CREATE_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clCreateCommandBufferKHR) +# define __GET_COMMAND_BUFFER_INFO_KHR_ERR CL_HPP_ERR_STR_(clGetCommandBufferInfoKHR) +# define __FINALIZE_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clFinalizeCommandBufferKHR) +# define __ENQUEUE_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clEnqueueCommandBufferKHR) +# define __COMMAND_BARRIER_WITH_WAIT_LIST_KHR_ERR \ + CL_HPP_ERR_STR_(clCommandBarrierWithWaitListKHR) +# define __COMMAND_COPY_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clCommandCopyBufferKHR) +# define __COMMAND_COPY_BUFFER_RECT_KHR_ERR CL_HPP_ERR_STR_(clCommandCopyBufferRectKHR) +# define __COMMAND_COPY_BUFFER_TO_IMAGE_KHR_ERR \ + CL_HPP_ERR_STR_(clCommandCopyBufferToImageKHR) +# define __COMMAND_COPY_IMAGE_KHR_ERR CL_HPP_ERR_STR_(clCommandCopyImageKHR) +# define __COMMAND_COPY_IMAGE_TO_BUFFER_KHR_ERR \ + CL_HPP_ERR_STR_(clCommandCopyImageToBufferKHR) +# define __COMMAND_FILL_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clCommandFillBufferKHR) +# define __COMMAND_FILL_IMAGE_KHR_ERR CL_HPP_ERR_STR_(clCommandFillImageKHR) +# define __COMMAND_NDRANGE_KERNEL_KHR_ERR CL_HPP_ERR_STR_(clCommandNDRangeKernelKHR) +# define __UPDATE_MUTABLE_COMMANDS_KHR_ERR CL_HPP_ERR_STR_(clUpdateMutableCommandsKHR) +# define __GET_MUTABLE_COMMAND_INFO_KHR_ERR CL_HPP_ERR_STR_(clGetMutableCommandInfoKHR) +# define __RETAIN_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clRetainCommandBufferKHR) +# define __RELEASE_COMMAND_BUFFER_KHR_ERR CL_HPP_ERR_STR_(clReleaseCommandBufferKHR) +# endif // cl_khr_command_buffer + +# if defined(cl_ext_image_requirements_info) +# define __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR CL_HPP_ERR_STR_(clGetImageRequirementsInfoEXT) +# endif // cl_ext_image_requirements_info /** * CL 1.2 version that uses device fission. */ -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevices) -#else -#define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevicesEXT) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevices) +# else +# define __CREATE_SUB_DEVICES_ERR CL_HPP_ERR_STR_(clCreateSubDevicesEXT) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 /** * Deprecated APIs for 1.2 */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -#define __ENQUEUE_MARKER_ERR CL_HPP_ERR_STR_(clEnqueueMarker) -#define __ENQUEUE_WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clEnqueueWaitForEvents) -#define __ENQUEUE_BARRIER_ERR CL_HPP_ERR_STR_(clEnqueueBarrier) -#define __UNLOAD_COMPILER_ERR CL_HPP_ERR_STR_(clUnloadCompiler) -#define __CREATE_GL_TEXTURE_2D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture2D) -#define __CREATE_GL_TEXTURE_3D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture3D) -#define __CREATE_IMAGE2D_ERR CL_HPP_ERR_STR_(clCreateImage2D) -#define __CREATE_IMAGE3D_ERR CL_HPP_ERR_STR_(clCreateImage3D) -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +# if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +# define __ENQUEUE_MARKER_ERR CL_HPP_ERR_STR_(clEnqueueMarker) +# define __ENQUEUE_WAIT_FOR_EVENTS_ERR CL_HPP_ERR_STR_(clEnqueueWaitForEvents) +# define __ENQUEUE_BARRIER_ERR CL_HPP_ERR_STR_(clEnqueueBarrier) +# define __UNLOAD_COMPILER_ERR CL_HPP_ERR_STR_(clUnloadCompiler) +# define __CREATE_GL_TEXTURE_2D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture2D) +# define __CREATE_GL_TEXTURE_3D_ERR CL_HPP_ERR_STR_(clCreateFromGLTexture3D) +# define __CREATE_IMAGE2D_ERR CL_HPP_ERR_STR_(clCreateImage2D) +# define __CREATE_IMAGE3D_ERR CL_HPP_ERR_STR_(clCreateImage3D) +# endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) /** * Deprecated APIs for 2.0 */ -#if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) -#define __CREATE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clCreateCommandQueue) -#define __ENQUEUE_TASK_ERR CL_HPP_ERR_STR_(clEnqueueTask) -#define __CREATE_SAMPLER_ERR CL_HPP_ERR_STR_(clCreateSampler) -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) +# if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) +# define __CREATE_COMMAND_QUEUE_ERR CL_HPP_ERR_STR_(clCreateCommandQueue) +# define __ENQUEUE_TASK_ERR CL_HPP_ERR_STR_(clEnqueueTask) +# define __CREATE_SAMPLER_ERR CL_HPP_ERR_STR_(clCreateSampler) +# endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) /** * CL 1.2 marker and barrier commands */ -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#define __ENQUEUE_MARKER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueMarkerWithWaitList) -#define __ENQUEUE_BARRIER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueBarrierWithWaitList) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 +# define __ENQUEUE_MARKER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueMarkerWithWaitList) +# define __ENQUEUE_BARRIER_WAIT_LIST_ERR CL_HPP_ERR_STR_(clEnqueueBarrierWithWaitList) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 -#define __CLONE_KERNEL_ERR CL_HPP_ERR_STR_(clCloneKernel) -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 +# if CL_HPP_TARGET_OPENCL_VERSION >= 210 +# define __CLONE_KERNEL_ERR CL_HPP_ERR_STR_(clCloneKernel) +# endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 #endif // CL_HPP_USER_OVERRIDE_ERROR_STRINGS -//! \endcond + //! \endcond #ifdef cl_khr_external_memory -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueAcquireExternalMemObjectsKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueReleaseExternalMemObjectsKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueAcquireExternalMemObjectsKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueReleaseExternalMemObjectsKHR); -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueAcquireExternalMemObjectsKHR pfn_clEnqueueAcquireExternalMemObjectsKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueReleaseExternalMemObjectsKHR pfn_clEnqueueReleaseExternalMemObjectsKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueAcquireExternalMemObjectsKHR + pfn_clEnqueueAcquireExternalMemObjectsKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueReleaseExternalMemObjectsKHR + pfn_clEnqueueReleaseExternalMemObjectsKHR = nullptr; #endif // cl_khr_external_memory #ifdef cl_khr_semaphore -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCreateSemaphoreWithPropertiesKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clReleaseSemaphoreKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clRetainSemaphoreKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueWaitSemaphoresKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueSignalSemaphoresKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetSemaphoreInfoKHR); - -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCreateSemaphoreWithPropertiesKHR pfn_clCreateSemaphoreWithPropertiesKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clReleaseSemaphoreKHR pfn_clReleaseSemaphoreKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clRetainSemaphoreKHR pfn_clRetainSemaphoreKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueWaitSemaphoresKHR pfn_clEnqueueWaitSemaphoresKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueSignalSemaphoresKHR pfn_clEnqueueSignalSemaphoresKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetSemaphoreInfoKHR pfn_clGetSemaphoreInfoKHR = nullptr; + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCreateSemaphoreWithPropertiesKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clReleaseSemaphoreKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clRetainSemaphoreKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueWaitSemaphoresKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueSignalSemaphoresKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetSemaphoreInfoKHR); + + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCreateSemaphoreWithPropertiesKHR + pfn_clCreateSemaphoreWithPropertiesKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clReleaseSemaphoreKHR pfn_clReleaseSemaphoreKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clRetainSemaphoreKHR pfn_clRetainSemaphoreKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueWaitSemaphoresKHR pfn_clEnqueueWaitSemaphoresKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueSignalSemaphoresKHR pfn_clEnqueueSignalSemaphoresKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetSemaphoreInfoKHR pfn_clGetSemaphoreInfoKHR = nullptr; #endif // cl_khr_semaphore #ifdef cl_khr_external_semaphore -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetSemaphoreHandleForTypeKHR); -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetSemaphoreHandleForTypeKHR pfn_clGetSemaphoreHandleForTypeKHR = nullptr; + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetSemaphoreHandleForTypeKHR); + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetSemaphoreHandleForTypeKHR + pfn_clGetSemaphoreHandleForTypeKHR = nullptr; #endif // cl_khr_external_semaphore #if defined(cl_khr_command_buffer) -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCreateCommandBufferKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clFinalizeCommandBufferKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clRetainCommandBufferKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clReleaseCommandBufferKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetCommandBufferInfoKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueCommandBufferKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandBarrierWithWaitListKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyBufferKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyBufferRectKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyBufferToImageKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyImageKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyImageToBufferKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandFillBufferKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandFillImageKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandNDRangeKernelKHR); - -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCreateCommandBufferKHR pfn_clCreateCommandBufferKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clFinalizeCommandBufferKHR pfn_clFinalizeCommandBufferKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clRetainCommandBufferKHR pfn_clRetainCommandBufferKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clReleaseCommandBufferKHR pfn_clReleaseCommandBufferKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetCommandBufferInfoKHR pfn_clGetCommandBufferInfoKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueCommandBufferKHR pfn_clEnqueueCommandBufferKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandBarrierWithWaitListKHR pfn_clCommandBarrierWithWaitListKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyBufferKHR pfn_clCommandCopyBufferKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyBufferRectKHR pfn_clCommandCopyBufferRectKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyBufferToImageKHR pfn_clCommandCopyBufferToImageKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyImageKHR pfn_clCommandCopyImageKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyImageToBufferKHR pfn_clCommandCopyImageToBufferKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandFillBufferKHR pfn_clCommandFillBufferKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandFillImageKHR pfn_clCommandFillImageKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandNDRangeKernelKHR pfn_clCommandNDRangeKernelKHR = nullptr; + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCreateCommandBufferKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clFinalizeCommandBufferKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clRetainCommandBufferKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clReleaseCommandBufferKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetCommandBufferInfoKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clEnqueueCommandBufferKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandBarrierWithWaitListKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyBufferKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyBufferRectKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyBufferToImageKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyImageKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandCopyImageToBufferKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandFillBufferKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandFillImageKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clCommandNDRangeKernelKHR); + + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCreateCommandBufferKHR pfn_clCreateCommandBufferKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clFinalizeCommandBufferKHR pfn_clFinalizeCommandBufferKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clRetainCommandBufferKHR pfn_clRetainCommandBufferKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clReleaseCommandBufferKHR pfn_clReleaseCommandBufferKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetCommandBufferInfoKHR pfn_clGetCommandBufferInfoKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clEnqueueCommandBufferKHR pfn_clEnqueueCommandBufferKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandBarrierWithWaitListKHR + pfn_clCommandBarrierWithWaitListKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyBufferKHR pfn_clCommandCopyBufferKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyBufferRectKHR pfn_clCommandCopyBufferRectKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyBufferToImageKHR + pfn_clCommandCopyBufferToImageKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyImageKHR pfn_clCommandCopyImageKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandCopyImageToBufferKHR + pfn_clCommandCopyImageToBufferKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandFillBufferKHR pfn_clCommandFillBufferKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandFillImageKHR pfn_clCommandFillImageKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clCommandNDRangeKernelKHR pfn_clCommandNDRangeKernelKHR = + nullptr; #endif /* cl_khr_command_buffer */ #if defined(cl_khr_command_buffer_mutable_dispatch) -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clUpdateMutableCommandsKHR); -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetMutableCommandInfoKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clUpdateMutableCommandsKHR); + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetMutableCommandInfoKHR); -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clUpdateMutableCommandsKHR pfn_clUpdateMutableCommandsKHR = nullptr; -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetMutableCommandInfoKHR pfn_clGetMutableCommandInfoKHR = nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clUpdateMutableCommandsKHR pfn_clUpdateMutableCommandsKHR = + nullptr; + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetMutableCommandInfoKHR pfn_clGetMutableCommandInfoKHR = + nullptr; #endif /* cl_khr_command_buffer_mutable_dispatch */ #if defined(cl_ext_image_requirements_info) -CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetImageRequirementsInfoEXT); -CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetImageRequirementsInfoEXT pfn_clGetImageRequirementsInfoEXT = nullptr; + CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_(clGetImageRequirementsInfoEXT); + CL_HPP_DEFINE_STATIC_MEMBER_ PFN_clGetImageRequirementsInfoEXT + pfn_clGetImageRequirementsInfoEXT = nullptr; #endif -namespace detail { - -// Generic getInfoHelper. The final parameter is used to guide overload -// resolution: the actual parameter passed is an int, which makes this -// a worse conversion sequence than a specialization that declares the -// parameter as an int. -template -inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long) -{ - return f(name, sizeof(T), param, nullptr); -} - -// Specialized for getInfo -// Assumes that the output vector was correctly resized on the way in -template -inline cl_int getInfoHelper(Func f, cl_uint name, vector>* param, int) -{ - if (name != CL_PROGRAM_BINARIES) { - return CL_INVALID_VALUE; - } - if (param) { - // Create array of pointers, calculate total size and pass pointer array in - size_type numBinaries = param->size(); - vector binariesPointers(numBinaries); - - for (size_type i = 0; i < numBinaries; ++i) - { - binariesPointers[i] = (*param)[i].data(); - } - - cl_int err = f(name, numBinaries * sizeof(unsigned char*), binariesPointers.data(), nullptr); - - if (err != CL_SUCCESS) { - return err; - } - } - - - return CL_SUCCESS; -} - -// Specialized getInfoHelper for vector params -template -inline cl_int getInfoHelper(Func f, cl_uint name, vector* param, long) -{ - size_type required; - cl_int err = f(name, 0, nullptr, &required); - if (err != CL_SUCCESS) { - return err; - } - const size_type elements = required / sizeof(T); - - // Temporary to avoid changing param on an error - vector localData(elements); - err = f(name, required, localData.data(), nullptr); - if (err != CL_SUCCESS) { - return err; - } - if (param) { - *param = std::move(localData); - } - - return CL_SUCCESS; -} - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper( - Func f, cl_uint name, vector* param, int, typename T::cl_type = 0) -{ - size_type required; - cl_int err = f(name, 0, nullptr, &required); - if (err != CL_SUCCESS) { - return err; - } - - const size_type elements = required / sizeof(typename T::cl_type); - - vector value(elements); - err = f(name, required, value.data(), nullptr); - if (err != CL_SUCCESS) { - return err; - } - - if (param) { - // Assign to convert CL type to T for each element - param->resize(elements); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < elements; i++) { - (*param)[i] = T(value[i], true); - } - } - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for string params -template -inline cl_int getInfoHelper(Func f, cl_uint name, string* param, long) -{ - size_type required; - cl_int err = f(name, 0, nullptr, &required); - if (err != CL_SUCCESS) { - return err; - } - - // std::string has a constant data member - // a char vector does not - if (required > 0) { - vector value(required); - err = f(name, required, value.data(), nullptr); - if (err != CL_SUCCESS) { - return err; - } - if (param) { - param->assign(value.begin(), value.end() - 1); - } - } - else if (param) { - param->assign(""); - } - return CL_SUCCESS; -} - -// Specialized GetInfoHelper for clsize_t params -template -inline cl_int getInfoHelper(Func f, cl_uint name, array* param, long) -{ - size_type required; - cl_int err = f(name, 0, nullptr, &required); - if (err != CL_SUCCESS) { - return err; - } - - size_type elements = required / sizeof(size_type); - vector value(elements, 0); - - err = f(name, required, value.data(), nullptr); - if (err != CL_SUCCESS) { - return err; - } - - // Bound the copy with N to prevent overruns - // if passed N > than the amount copied - if (elements > N) { - elements = N; - } - for (size_type i = 0; i < elements; ++i) { - (*param)[i] = value[i]; - } - - return CL_SUCCESS; -} - -template struct ReferenceHandler; - -/* Specialization for reference-counted types. This depends on the - * existence of Wrapper::cl_type, and none of the other types having the - * cl_type member. Note that simplify specifying the parameter as Wrapper - * does not work, because when using a derived type (e.g. Context) the generic - * template will provide a better match. - */ -template -inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0) -{ - typename T::cl_type value; - cl_int err = f(name, sizeof(value), &value, nullptr); - if (err != CL_SUCCESS) { - return err; - } - *param = value; - if (value != nullptr) - { - err = param->retain(); - if (err != CL_SUCCESS) { - return err; - } - } - return CL_SUCCESS; -} - -#define CL_HPP_PARAM_NAME_INFO_1_0_(F) \ - F(cl_platform_info, CL_PLATFORM_PROFILE, string) \ - F(cl_platform_info, CL_PLATFORM_VERSION, string) \ - F(cl_platform_info, CL_PLATFORM_NAME, string) \ - F(cl_platform_info, CL_PLATFORM_VENDOR, string) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS, string) \ - \ - F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ - F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_type) \ - F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, cl::vector) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ - F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, size_type) \ - F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ - F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ - F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ - F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_type) \ - F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ - F(cl_device_info, CL_DEVICE_PLATFORM, cl::Platform) \ - F(cl_device_info, CL_DEVICE_NAME, string) \ - F(cl_device_info, CL_DEVICE_VENDOR, string) \ - F(cl_device_info, CL_DRIVER_VERSION, string) \ - F(cl_device_info, CL_DEVICE_PROFILE, string) \ - F(cl_device_info, CL_DEVICE_VERSION, string) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS, string) \ - \ - F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ - F(cl_context_info, CL_CONTEXT_DEVICES, cl::vector) \ - F(cl_context_info, CL_CONTEXT_PROPERTIES, cl::vector) \ - \ - F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ - F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ - F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ - F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_int) \ - \ - F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ - \ - F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ - F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ - F(cl_mem_info, CL_MEM_SIZE, size_type) \ - F(cl_mem_info, CL_MEM_HOST_PTR, void*) \ - F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ - F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ - \ - F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ - F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, size_type) \ - F(cl_image_info, CL_IMAGE_ROW_PITCH, size_type) \ - F(cl_image_info, CL_IMAGE_SLICE_PITCH, size_type) \ - F(cl_image_info, CL_IMAGE_WIDTH, size_type) \ - F(cl_image_info, CL_IMAGE_HEIGHT, size_type) \ - F(cl_image_info, CL_IMAGE_DEPTH, size_type) \ - \ - F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ - F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ - F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ - F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ - F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ - \ - F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ - F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ - F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ - F(cl_program_info, CL_PROGRAM_DEVICES, cl::vector) \ - F(cl_program_info, CL_PROGRAM_SOURCE, string) \ - F(cl_program_info, CL_PROGRAM_BINARY_SIZES, cl::vector) \ - F(cl_program_info, CL_PROGRAM_BINARIES, cl::vector>) \ - \ - F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, string) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, string) \ - \ - F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, string) \ - F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ - F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ - F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, size_type) \ - F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::detail::size_t_array) \ - F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ - \ - F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ - F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) - - -#define CL_HPP_PARAM_NAME_INFO_1_1_(F) \ - F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\ - F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ - F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, string) \ - \ - F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ - F(cl_mem_info, CL_MEM_OFFSET, size_type) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_type) \ - F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ - \ - F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) - -#define CL_HPP_PARAM_NAME_INFO_1_2_(F) \ - F(cl_program_info, CL_PROGRAM_NUM_KERNELS, size_type) \ - F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, string) \ - \ - F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ - \ - F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, string) \ - \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, string) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, string) \ - F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ - \ - F(cl_kernel_work_group_info, CL_KERNEL_GLOBAL_WORK_SIZE, cl::detail::size_t_array) \ - \ - F(cl_device_info, CL_DEVICE_LINKER_AVAILABLE, cl_bool) \ - F(cl_device_info, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, size_type) \ - F(cl_device_info, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, size_type) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl::Device) \ - F(cl_device_info, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, cl::vector) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPE, cl::vector) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, cl_bool) \ - F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) \ - F(cl_device_info, CL_DEVICE_PRINTF_BUFFER_SIZE, size_type) \ - \ - F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ - F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ - F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) - -#define CL_HPP_PARAM_NAME_INFO_2_0_(F) \ - F(cl_device_info, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, cl_command_queue_properties) \ - F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_QUEUES, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_EVENTS, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_PIPE_ARGS, cl_uint) \ - F(cl_device_info, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, cl_uint) \ - F(cl_device_info, CL_DEVICE_PIPE_MAX_PACKET_SIZE, cl_uint) \ - F(cl_device_info, CL_DEVICE_SVM_CAPABILITIES, cl_device_svm_capabilities) \ - F(cl_device_info, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint) \ - F(cl_device_info, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, cl_uint) \ - F(cl_device_info, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, cl_uint) \ - F(cl_device_info, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, cl_uint) \ - F(cl_device_info, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, cl_uint ) \ - F(cl_device_info, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, size_type ) \ - F(cl_device_info, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_type ) \ - F(cl_profiling_info, CL_PROFILING_COMMAND_COMPLETE, cl_ulong) \ - F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, cl_bool) \ - F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_PTRS, void**) \ - F(cl_command_queue_info, CL_QUEUE_SIZE, cl_uint) \ - F(cl_mem_info, CL_MEM_USES_SVM_POINTER, cl_bool) \ - F(cl_program_build_info, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, size_type) \ - F(cl_pipe_info, CL_PIPE_PACKET_SIZE, cl_uint) \ - F(cl_pipe_info, CL_PIPE_MAX_PACKETS, cl_uint) - -#define CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(F) \ - F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, size_type) - -#define CL_HPP_PARAM_NAME_INFO_IL_KHR_(F) \ - F(cl_device_info, CL_DEVICE_IL_VERSION_KHR, string) \ - F(cl_program_info, CL_PROGRAM_IL_KHR, cl::vector) - -#define CL_HPP_PARAM_NAME_INFO_2_1_(F) \ - F(cl_platform_info, CL_PLATFORM_HOST_TIMER_RESOLUTION, cl_ulong) \ - F(cl_program_info, CL_PROGRAM_IL, cl::vector) \ - F(cl_device_info, CL_DEVICE_MAX_NUM_SUB_GROUPS, cl_uint) \ - F(cl_device_info, CL_DEVICE_IL_VERSION, string) \ - F(cl_device_info, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, cl_bool) \ - F(cl_command_queue_info, CL_QUEUE_DEVICE_DEFAULT, cl::DeviceCommandQueue) \ - F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, cl::detail::size_t_array) \ - F(cl_kernel_sub_group_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ - F(cl_kernel_sub_group_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) - -#define CL_HPP_PARAM_NAME_INFO_2_2_(F) \ - F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, cl_bool) \ - F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT, cl_bool) - -#define CL_HPP_PARAM_NAME_DEVICE_FISSION_EXT_(F) \ - F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl::Device) \ - F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, cl::vector) \ - F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, cl::vector) \ - F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \ - F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, cl::vector) - -#define CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_CL3_SHARED_(F) \ - F(cl_platform_info, CL_PLATFORM_NUMERIC_VERSION_KHR, cl_version_khr) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR, cl::vector) \ - \ - F(cl_device_info, CL_DEVICE_NUMERIC_VERSION_KHR, cl_version_khr) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR, cl::vector) \ - F(cl_device_info, CL_DEVICE_ILS_WITH_VERSION_KHR, cl::vector) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR, cl::vector) - -#define CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_KHRONLY_(F) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR, cl_version_khr) - -#define CL_HPP_PARAM_NAME_CL_KHR_SEMAPHORE_(F) \ - F(cl_semaphore_info_khr, CL_SEMAPHORE_CONTEXT_KHR, cl::Context) \ - F(cl_semaphore_info_khr, CL_SEMAPHORE_REFERENCE_COUNT_KHR, cl_uint) \ - F(cl_semaphore_info_khr, CL_SEMAPHORE_PROPERTIES_KHR, cl::vector) \ - F(cl_semaphore_info_khr, CL_SEMAPHORE_TYPE_KHR, cl_semaphore_type_khr) \ - F(cl_semaphore_info_khr, CL_SEMAPHORE_PAYLOAD_KHR, cl_semaphore_payload_khr) \ - F(cl_semaphore_info_khr, CL_DEVICE_HANDLE_LIST_KHR, cl::vector) \ - F(cl_platform_info, CL_PLATFORM_SEMAPHORE_TYPES_KHR, cl::vector) \ - F(cl_device_info, CL_DEVICE_SEMAPHORE_TYPES_KHR, cl::vector) \ - -#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_MEMORY_(F) \ - F(cl_device_info, CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, cl::vector) \ - F(cl_platform_info, CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, cl::vector) - -#define CL_HPP_PARAM_NAME_CL_KHR_SEMAPHORE_EXT(F) \ - F(cl_platform_info, CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, cl::vector) \ - F(cl_platform_info, CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl::vector) \ - F(cl_device_info, CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, cl::vector) \ - F(cl_device_info, CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl::vector) \ - F(cl_semaphore_info_khr, CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, cl::vector) \ - -#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_DX_FENCE_EXT(F) \ - F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, void*) \ - -#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXT(F) \ - F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, int) \ - -#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_SYNC_FD_EXT(F) \ - F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_SYNC_FD_KHR, int) \ - -#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_WIN32_EXT(F) \ - F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR, void*) \ - F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR, void*) \ - -#define CL_HPP_PARAM_NAME_INFO_3_0_(F) \ - F(cl_platform_info, CL_PLATFORM_NUMERIC_VERSION, cl_version) \ - F(cl_platform_info, CL_PLATFORM_EXTENSIONS_WITH_VERSION, cl::vector) \ - \ - F(cl_device_info, CL_DEVICE_NUMERIC_VERSION, cl_version) \ - F(cl_device_info, CL_DEVICE_EXTENSIONS_WITH_VERSION, cl::vector) \ - F(cl_device_info, CL_DEVICE_ILS_WITH_VERSION, cl::vector) \ - F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION, cl::vector) \ - F(cl_device_info, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, cl_device_atomic_capabilities) \ - F(cl_device_info, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, cl_device_atomic_capabilities) \ - F(cl_device_info, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_ALL_VERSIONS, cl::vector) \ - F(cl_device_info, CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_type) \ - F(cl_device_info, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_OPENCL_C_FEATURES, cl::vector) \ - F(cl_device_info, CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, cl_device_device_enqueue_capabilities) \ - F(cl_device_info, CL_DEVICE_PIPE_SUPPORT, cl_bool) \ - F(cl_device_info, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, string) \ - \ - F(cl_command_queue_info, CL_QUEUE_PROPERTIES_ARRAY, cl::vector) \ - F(cl_mem_info, CL_MEM_PROPERTIES, cl::vector) \ - F(cl_pipe_info, CL_PIPE_PROPERTIES, cl::vector) \ - F(cl_sampler_info, CL_SAMPLER_PROPERTIES, cl::vector) \ - -#define CL_HPP_PARAM_NAME_CL_IMAGE_REQUIREMENTS_EXT(F) \ - F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, size_type) \ - F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT, size_type) \ - F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_SIZE_EXT, size_type) \ - F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT, cl_uint) \ - F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, cl_uint) \ - F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT, cl_uint) \ - F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, cl_uint) \ - -#define CL_HPP_PARAM_NAME_CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT(F) \ - F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT, size_type) \ - -template -struct param_traits {}; - -#define CL_HPP_DECLARE_PARAM_TRAITS_(token, param_name, T) \ -struct token; \ -template<> \ -struct param_traits \ -{ \ - enum { value = param_name }; \ - typedef T param_type; \ -}; - -CL_HPP_PARAM_NAME_INFO_1_0_(CL_HPP_DECLARE_PARAM_TRAITS_) + namespace detail { + + // Generic getInfoHelper. The final parameter is used to guide overload + // resolution: the actual parameter passed is an int, which makes this + // a worse conversion sequence than a specialization that declares the + // parameter as an int. + template + inline cl_int getInfoHelper(Functor f, cl_uint name, T *param, long) { + return f(name, sizeof(T), param, nullptr); + } + + // Specialized for getInfo + // Assumes that the output vector was correctly resized on the way in + template + inline cl_int getInfoHelper(Func f, cl_uint name, vector> *param, + int) { + if (name != CL_PROGRAM_BINARIES) { return CL_INVALID_VALUE; } + if (param) { + // Create array of pointers, calculate total size and pass pointer array in + size_type numBinaries = param->size(); + vector binariesPointers(numBinaries); + + for (size_type i = 0; i < numBinaries; ++i) { + binariesPointers[i] = (*param)[i].data(); + } + + cl_int err = + f(name, numBinaries * sizeof(unsigned char *), binariesPointers.data(), nullptr); + + if (err != CL_SUCCESS) { return err; } + } + + return CL_SUCCESS; + } + + // Specialized getInfoHelper for vector params + template + inline cl_int getInfoHelper(Func f, cl_uint name, vector *param, long) { + size_type required; + cl_int err = f(name, 0, nullptr, &required); + if (err != CL_SUCCESS) { return err; } + const size_type elements = required / sizeof(T); + + // Temporary to avoid changing param on an error + vector localData(elements); + err = f(name, required, localData.data(), nullptr); + if (err != CL_SUCCESS) { return err; } + if (param) { *param = std::move(localData); } + + return CL_SUCCESS; + } + + /* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ + template + inline cl_int getInfoHelper(Func f, cl_uint name, vector *param, int, + typename T::cl_type = 0) { + size_type required; + cl_int err = f(name, 0, nullptr, &required); + if (err != CL_SUCCESS) { return err; } + + const size_type elements = required / sizeof(typename T::cl_type); + + vector value(elements); + err = f(name, required, value.data(), nullptr); + if (err != CL_SUCCESS) { return err; } + + if (param) { + // Assign to convert CL type to T for each element + param->resize(elements); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < elements; i++) { (*param)[i] = T(value[i], true); } + } + return CL_SUCCESS; + } + + // Specialized GetInfoHelper for string params + template + inline cl_int getInfoHelper(Func f, cl_uint name, string *param, long) { + size_type required; + cl_int err = f(name, 0, nullptr, &required); + if (err != CL_SUCCESS) { return err; } + + // std::string has a constant data member + // a char vector does not + if (required > 0) { + vector value(required); + err = f(name, required, value.data(), nullptr); + if (err != CL_SUCCESS) { return err; } + if (param) { param->assign(value.begin(), value.end() - 1); } + } else if (param) { + param->assign(""); + } + return CL_SUCCESS; + } + + // Specialized GetInfoHelper for clsize_t params + template + inline cl_int getInfoHelper(Func f, cl_uint name, array *param, long) { + size_type required; + cl_int err = f(name, 0, nullptr, &required); + if (err != CL_SUCCESS) { return err; } + + size_type elements = required / sizeof(size_type); + vector value(elements, 0); + + err = f(name, required, value.data(), nullptr); + if (err != CL_SUCCESS) { return err; } + + // Bound the copy with N to prevent overruns + // if passed N > than the amount copied + if (elements > N) { elements = N; } + for (size_type i = 0; i < elements; ++i) { (*param)[i] = value[i]; } + + return CL_SUCCESS; + } + + template + struct ReferenceHandler; + + /* Specialization for reference-counted types. This depends on the + * existence of Wrapper::cl_type, and none of the other types having the + * cl_type member. Note that simplify specifying the parameter as Wrapper + * does not work, because when using a derived type (e.g. Context) the generic + * template will provide a better match. + */ + template + inline cl_int getInfoHelper(Func f, cl_uint name, T *param, int, typename T::cl_type = 0) { + typename T::cl_type value; + cl_int err = f(name, sizeof(value), &value, nullptr); + if (err != CL_SUCCESS) { return err; } + *param = value; + if (value != nullptr) { + err = param->retain(); + if (err != CL_SUCCESS) { return err; } + } + return CL_SUCCESS; + } + +#define CL_HPP_PARAM_NAME_INFO_1_0_(F) \ + F(cl_platform_info, CL_PLATFORM_PROFILE, string) \ + F(cl_platform_info, CL_PLATFORM_VERSION, string) \ + F(cl_platform_info, CL_PLATFORM_NAME, string) \ + F(cl_platform_info, CL_PLATFORM_VENDOR, string) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS, string) \ + \ + F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \ + F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, cl::vector) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \ + F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \ + F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \ + F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \ + F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, size_type) \ + F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \ + F(cl_device_info, CL_DEVICE_PLATFORM, cl::Platform) \ + F(cl_device_info, CL_DEVICE_NAME, string) \ + F(cl_device_info, CL_DEVICE_VENDOR, string) \ + F(cl_device_info, CL_DRIVER_VERSION, string) \ + F(cl_device_info, CL_DEVICE_PROFILE, string) \ + F(cl_device_info, CL_DEVICE_VERSION, string) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS, string) \ + \ + F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \ + F(cl_context_info, CL_CONTEXT_DEVICES, cl::vector) \ + F(cl_context_info, CL_CONTEXT_PROPERTIES, cl::vector) \ + \ + F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \ + F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \ + F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \ + F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_int) \ + \ + F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \ + \ + F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \ + F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \ + F(cl_mem_info, CL_MEM_SIZE, size_type) \ + F(cl_mem_info, CL_MEM_HOST_PTR, void *) \ + F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \ + F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \ + \ + F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \ + F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, size_type) \ + F(cl_image_info, CL_IMAGE_ROW_PITCH, size_type) \ + F(cl_image_info, CL_IMAGE_SLICE_PITCH, size_type) \ + F(cl_image_info, CL_IMAGE_WIDTH, size_type) \ + F(cl_image_info, CL_IMAGE_HEIGHT, size_type) \ + F(cl_image_info, CL_IMAGE_DEPTH, size_type) \ + \ + F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \ + F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \ + F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_bool) \ + F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_addressing_mode) \ + F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_filter_mode) \ + \ + F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \ + F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \ + F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \ + F(cl_program_info, CL_PROGRAM_DEVICES, cl::vector) \ + F(cl_program_info, CL_PROGRAM_SOURCE, string) \ + F(cl_program_info, CL_PROGRAM_BINARY_SIZES, cl::vector) \ + F(cl_program_info, CL_PROGRAM_BINARIES, cl::vector>) \ + \ + F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, string) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, string) \ + \ + F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, string) \ + F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \ + F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \ + F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, size_type) \ + F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::detail::size_t_array) \ + F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \ + \ + F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \ + F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties) + +#define CL_HPP_PARAM_NAME_INFO_1_1_(F) \ + F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \ + F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, string) \ + \ + F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \ + F(cl_mem_info, CL_MEM_OFFSET, size_type) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_type) \ + F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \ + \ + F(cl_event_info, CL_EVENT_CONTEXT, cl::Context) + +#define CL_HPP_PARAM_NAME_INFO_1_2_(F) \ + F(cl_program_info, CL_PROGRAM_NUM_KERNELS, size_type) \ + F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, string) \ + \ + F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \ + \ + F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, string) \ + \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, string) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, string) \ + F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_QUALIFIER, cl_kernel_arg_type_qualifier) \ + \ + F(cl_kernel_work_group_info, CL_KERNEL_GLOBAL_WORK_SIZE, cl::detail::size_t_array) \ + \ + F(cl_device_info, CL_DEVICE_LINKER_AVAILABLE, cl_bool) \ + F(cl_device_info, CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl::Device) \ + F(cl_device_info, CL_DEVICE_PARTITION_MAX_SUB_DEVICES, cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, cl::vector) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPE, cl::vector) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, cl_bool) \ + F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, string) \ + F(cl_device_info, CL_DEVICE_PRINTF_BUFFER_SIZE, size_type) \ + \ + F(cl_image_info, CL_IMAGE_ARRAY_SIZE, size_type) \ + F(cl_image_info, CL_IMAGE_NUM_MIP_LEVELS, cl_uint) \ + F(cl_image_info, CL_IMAGE_NUM_SAMPLES, cl_uint) + +#define CL_HPP_PARAM_NAME_INFO_2_0_(F) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_HOST_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PROPERTIES, cl_command_queue_properties) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_PREFERRED_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_QUEUE_ON_DEVICE_MAX_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_QUEUES, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_ON_DEVICE_EVENTS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_PIPE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_PIPE_MAX_ACTIVE_RESERVATIONS, cl_uint) \ + F(cl_device_info, CL_DEVICE_PIPE_MAX_PACKET_SIZE, cl_uint) \ + F(cl_device_info, CL_DEVICE_SVM_CAPABILITIES, cl_device_svm_capabilities) \ + F(cl_device_info, CL_DEVICE_PREFERRED_PLATFORM_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_GLOBAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PREFERRED_LOCAL_ATOMIC_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_IMAGE_PITCH_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_READ_WRITE_IMAGE_ARGS, cl_uint) \ + F(cl_device_info, CL_DEVICE_MAX_GLOBAL_VARIABLE_SIZE, size_type) \ + F(cl_device_info, CL_DEVICE_GLOBAL_VARIABLE_PREFERRED_TOTAL_SIZE, size_type) \ + F(cl_profiling_info, CL_PROFILING_COMMAND_COMPLETE, cl_ulong) \ + F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, cl_bool) \ + F(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_SVM_PTRS, void **) \ + F(cl_command_queue_info, CL_QUEUE_SIZE, cl_uint) \ + F(cl_mem_info, CL_MEM_USES_SVM_POINTER, cl_bool) \ + F(cl_program_build_info, CL_PROGRAM_BUILD_GLOBAL_VARIABLE_TOTAL_SIZE, size_type) \ + F(cl_pipe_info, CL_PIPE_PACKET_SIZE, cl_uint) \ + F(cl_pipe_info, CL_PIPE_MAX_PACKETS, cl_uint) + +#define CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(F) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE_KHR, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE_KHR, size_type) + +#define CL_HPP_PARAM_NAME_INFO_IL_KHR_(F) \ + F(cl_device_info, CL_DEVICE_IL_VERSION_KHR, string) \ + F(cl_program_info, CL_PROGRAM_IL_KHR, cl::vector) + +#define CL_HPP_PARAM_NAME_INFO_2_1_(F) \ + F(cl_platform_info, CL_PLATFORM_HOST_TIMER_RESOLUTION, cl_ulong) \ + F(cl_program_info, CL_PROGRAM_IL, cl::vector) \ + F(cl_device_info, CL_DEVICE_MAX_NUM_SUB_GROUPS, cl_uint) \ + F(cl_device_info, CL_DEVICE_IL_VERSION, string) \ + F(cl_device_info, CL_DEVICE_SUB_GROUP_INDEPENDENT_FORWARD_PROGRESS, cl_bool) \ + F(cl_command_queue_info, CL_QUEUE_DEVICE_DEFAULT, cl::DeviceCommandQueue) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_SUB_GROUP_SIZE_FOR_NDRANGE, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_SUB_GROUP_COUNT_FOR_NDRANGE, size_type) \ + F(cl_kernel_sub_group_info, \ + CL_KERNEL_LOCAL_SIZE_FOR_SUB_GROUP_COUNT, \ + cl::detail::size_t_array) \ + F(cl_kernel_sub_group_info, CL_KERNEL_MAX_NUM_SUB_GROUPS, size_type) \ + F(cl_kernel_sub_group_info, CL_KERNEL_COMPILE_NUM_SUB_GROUPS, size_type) + +#define CL_HPP_PARAM_NAME_INFO_2_2_(F) \ + F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_CTORS_PRESENT, cl_bool) \ + F(cl_program_info, CL_PROGRAM_SCOPE_GLOBAL_DTORS_PRESENT, cl_bool) + +#define CL_HPP_PARAM_NAME_DEVICE_FISSION_EXT_(F) \ + F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl::Device) \ + F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, cl::vector) \ + F(cl_device_info, \ + CL_DEVICE_AFFINITY_DOMAINS_EXT, \ + cl::vector) \ + F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT, cl_uint) \ + F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, cl::vector) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_CL3_SHARED_(F) \ + F(cl_platform_info, CL_PLATFORM_NUMERIC_VERSION_KHR, cl_version_khr) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS_WITH_VERSION_KHR, cl::vector) \ + \ + F(cl_device_info, CL_DEVICE_NUMERIC_VERSION_KHR, cl_version_khr) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS_WITH_VERSION_KHR, cl::vector) \ + F(cl_device_info, CL_DEVICE_ILS_WITH_VERSION_KHR, cl::vector) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION_KHR, cl::vector) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_KHRONLY_(F) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_NUMERIC_VERSION_KHR, cl_version_khr) + +#define CL_HPP_PARAM_NAME_CL_KHR_SEMAPHORE_(F) \ + F(cl_semaphore_info_khr, CL_SEMAPHORE_CONTEXT_KHR, cl::Context) \ + F(cl_semaphore_info_khr, CL_SEMAPHORE_REFERENCE_COUNT_KHR, cl_uint) \ + F(cl_semaphore_info_khr, CL_SEMAPHORE_PROPERTIES_KHR, cl::vector) \ + F(cl_semaphore_info_khr, CL_SEMAPHORE_TYPE_KHR, cl_semaphore_type_khr) \ + F(cl_semaphore_info_khr, CL_SEMAPHORE_PAYLOAD_KHR, cl_semaphore_payload_khr) \ + F(cl_semaphore_info_khr, CL_DEVICE_HANDLE_LIST_KHR, cl::vector) \ + F(cl_platform_info, CL_PLATFORM_SEMAPHORE_TYPES_KHR, cl::vector) \ + F(cl_device_info, CL_DEVICE_SEMAPHORE_TYPES_KHR, cl::vector) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_MEMORY_(F) \ + F(cl_device_info, \ + CL_DEVICE_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, \ + cl::vector) \ + F(cl_platform_info, \ + CL_PLATFORM_EXTERNAL_MEMORY_IMPORT_HANDLE_TYPES_KHR, \ + cl::vector) + +#define CL_HPP_PARAM_NAME_CL_KHR_SEMAPHORE_EXT(F) \ + F(cl_platform_info, \ + CL_PLATFORM_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, \ + cl::vector) \ + F(cl_platform_info, \ + CL_PLATFORM_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, \ + cl::vector) \ + F(cl_device_info, \ + CL_DEVICE_SEMAPHORE_IMPORT_HANDLE_TYPES_KHR, \ + cl::vector) \ + F(cl_device_info, \ + CL_DEVICE_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, \ + cl::vector) \ + F(cl_semaphore_info_khr, \ + CL_SEMAPHORE_EXPORT_HANDLE_TYPES_KHR, \ + cl::vector) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_DX_FENCE_EXT(F) \ + F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, void *) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXT(F) \ + F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, int) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_SYNC_FD_EXT(F) \ + F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_SYNC_FD_KHR, int) + +#define CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_WIN32_EXT(F) \ + F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR, void *) \ + F(cl_external_semaphore_handle_type_khr, CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR, void *) + +#define CL_HPP_PARAM_NAME_INFO_3_0_(F) \ + F(cl_platform_info, CL_PLATFORM_NUMERIC_VERSION, cl_version) \ + F(cl_platform_info, CL_PLATFORM_EXTENSIONS_WITH_VERSION, cl::vector) \ + \ + F(cl_device_info, CL_DEVICE_NUMERIC_VERSION, cl_version) \ + F(cl_device_info, CL_DEVICE_EXTENSIONS_WITH_VERSION, cl::vector) \ + F(cl_device_info, CL_DEVICE_ILS_WITH_VERSION, cl::vector) \ + F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS_WITH_VERSION, cl::vector) \ + F(cl_device_info, CL_DEVICE_ATOMIC_MEMORY_CAPABILITIES, cl_device_atomic_capabilities) \ + F(cl_device_info, CL_DEVICE_ATOMIC_FENCE_CAPABILITIES, cl_device_atomic_capabilities) \ + F(cl_device_info, CL_DEVICE_NON_UNIFORM_WORK_GROUP_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_ALL_VERSIONS, cl::vector) \ + F(cl_device_info, CL_DEVICE_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, size_type) \ + F(cl_device_info, CL_DEVICE_WORK_GROUP_COLLECTIVE_FUNCTIONS_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_GENERIC_ADDRESS_SPACE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_OPENCL_C_FEATURES, cl::vector) \ + F(cl_device_info, \ + CL_DEVICE_DEVICE_ENQUEUE_CAPABILITIES, \ + cl_device_device_enqueue_capabilities) \ + F(cl_device_info, CL_DEVICE_PIPE_SUPPORT, cl_bool) \ + F(cl_device_info, CL_DEVICE_LATEST_CONFORMANCE_VERSION_PASSED, string) \ + \ + F(cl_command_queue_info, CL_QUEUE_PROPERTIES_ARRAY, cl::vector) \ + F(cl_mem_info, CL_MEM_PROPERTIES, cl::vector) \ + F(cl_pipe_info, CL_PIPE_PROPERTIES, cl::vector) \ + F(cl_sampler_info, CL_SAMPLER_PROPERTIES, cl::vector) + +#define CL_HPP_PARAM_NAME_CL_IMAGE_REQUIREMENTS_EXT(F) \ + F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, size_type) \ + F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT, size_type) \ + F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_SIZE_EXT, size_type) \ + F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT, cl_uint) \ + F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, cl_uint) \ + F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT, cl_uint) \ + F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, cl_uint) + +#define CL_HPP_PARAM_NAME_CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT(F) \ + F(cl_image_requirements_info_ext, CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT, size_type) + + template + struct param_traits {}; + +#define CL_HPP_DECLARE_PARAM_TRAITS_(token, param_name, T) \ + struct token; \ + template<> \ + struct param_traits { \ + enum { value = param_name }; \ + typedef T param_type; \ + }; + + CL_HPP_PARAM_NAME_INFO_1_0_(CL_HPP_DECLARE_PARAM_TRAITS_) #if CL_HPP_TARGET_OPENCL_VERSION >= 110 -CL_HPP_PARAM_NAME_INFO_1_1_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_1_1_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 #if CL_HPP_TARGET_OPENCL_VERSION >= 120 -CL_HPP_PARAM_NAME_INFO_1_2_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_1_2_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -CL_HPP_PARAM_NAME_INFO_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_TARGET_OPENCL_VERSION >= 210 -CL_HPP_PARAM_NAME_INFO_2_1_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_2_1_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // CL_HPP_TARGET_OPENCL_VERSION >= 210 #if CL_HPP_TARGET_OPENCL_VERSION >= 220 -CL_HPP_PARAM_NAME_INFO_2_2_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_2_2_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 #if CL_HPP_TARGET_OPENCL_VERSION >= 300 -CL_HPP_PARAM_NAME_INFO_3_0_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_3_0_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // CL_HPP_TARGET_OPENCL_VERSION >= 300 #if defined(cl_khr_subgroups) && CL_HPP_TARGET_OPENCL_VERSION < 210 -CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_SUBGROUP_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // #if defined(cl_khr_subgroups) && CL_HPP_TARGET_OPENCL_VERSION < 210 #if defined(cl_khr_il_program) && CL_HPP_TARGET_OPENCL_VERSION < 210 -CL_HPP_PARAM_NAME_INFO_IL_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_IL_KHR_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // #if defined(cl_khr_il_program) && CL_HPP_TARGET_OPENCL_VERSION < 210 - // Flags deprecated in OpenCL 2.0 -#define CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(F) \ - F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) +#define CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(F) \ + F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) -#define CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(F) \ - F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) +#define CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(F) \ + F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) -#define CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(F) \ - F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) +#define CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(F) \ + F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) // Include deprecated query flags based on versions // Only include deprecated 1.0 flags if 2.0 not active as there is an enum clash -#if CL_HPP_TARGET_OPENCL_VERSION > 100 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 && CL_HPP_TARGET_OPENCL_VERSION < 200 -CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) +#if CL_HPP_TARGET_OPENCL_VERSION > 100 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 && \ + CL_HPP_TARGET_OPENCL_VERSION < 200 + CL_HPP_PARAM_NAME_INFO_1_0_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 110 #if CL_HPP_TARGET_OPENCL_VERSION > 110 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 -CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_1_1_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 #if CL_HPP_TARGET_OPENCL_VERSION > 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 -CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_INFO_1_2_DEPRECATED_IN_2_0_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 #if defined(cl_ext_device_fission) -CL_HPP_PARAM_NAME_DEVICE_FISSION_EXT_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_DEVICE_FISSION_EXT_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_ext_device_fission #if defined(cl_khr_extended_versioning) -#if CL_HPP_TARGET_OPENCL_VERSION < 300 -CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_CL3_SHARED_(CL_HPP_DECLARE_PARAM_TRAITS_) -#endif // CL_HPP_TARGET_OPENCL_VERSION < 300 -CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_KHRONLY_(CL_HPP_DECLARE_PARAM_TRAITS_) +# if CL_HPP_TARGET_OPENCL_VERSION < 300 + CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_CL3_SHARED_(CL_HPP_DECLARE_PARAM_TRAITS_) +# endif // CL_HPP_TARGET_OPENCL_VERSION < 300 + CL_HPP_PARAM_NAME_CL_KHR_EXTENDED_VERSIONING_KHRONLY_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_extended_versioning #if defined(cl_khr_semaphore) -CL_HPP_PARAM_NAME_CL_KHR_SEMAPHORE_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_CL_KHR_SEMAPHORE_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_semaphore #ifdef cl_khr_external_memory -CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_MEMORY_(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_MEMORY_(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_external_memory #if defined(cl_khr_external_semaphore) -CL_HPP_PARAM_NAME_CL_KHR_SEMAPHORE_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_CL_KHR_SEMAPHORE_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_external_semaphore #if defined(cl_khr_external_semaphore_dx_fence) -CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_DX_FENCE_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_DX_FENCE_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_external_semaphore_dx_fence #if defined(cl_khr_external_semaphore_opaque_fd) -CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_OPAQUE_FD_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_external_semaphore_opaque_fd #if defined(cl_khr_external_semaphore_sync_fd) -CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_SYNC_FD_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_SYNC_FD_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_external_semaphore_sync_fd #if defined(cl_khr_external_semaphore_win32) -CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_WIN32_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_CL_KHR_EXTERNAL_SEMAPHORE_WIN32_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_khr_external_semaphore_win32 #if defined(cl_khr_device_uuid) -using uuid_array = array; -using luid_array = array; -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_UUID_KHR, uuid_array) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DRIVER_UUID_KHR, uuid_array) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_VALID_KHR, cl_bool) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_KHR, luid_array) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NODE_MASK_KHR, cl_uint) + using uuid_array = array; + using luid_array = array; + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_UUID_KHR, uuid_array) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DRIVER_UUID_KHR, uuid_array) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_VALID_KHR, cl_bool) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LUID_KHR, luid_array) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_NODE_MASK_KHR, cl_uint) #endif #if defined(cl_khr_pci_bus_info) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PCI_BUS_INFO_KHR, cl_device_pci_bus_info_khr) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PCI_BUS_INFO_KHR, + cl_device_pci_bus_info_khr) #endif // Note: some headers do not define cl_khr_image2d_from_buffer #if CL_HPP_TARGET_OPENCL_VERSION < 200 -#if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR, cl_uint) -#endif -#if defined(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR, cl_uint) -#endif +# if defined(CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_IMAGE_PITCH_ALIGNMENT_KHR, cl_uint) +# endif +# if defined(CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT_KHR, + cl_uint) +# endif #endif // CL_HPP_TARGET_OPENCL_VERSION < 200 #if defined(cl_khr_integer_dot_product) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, cl_device_integer_dot_product_capabilities_khr) -#if defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, cl_device_integer_dot_product_acceleration_properties_khr) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR, cl_device_integer_dot_product_acceleration_properties_khr) -#endif // defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) -#endif // defined(cl_khr_integer_dot_product) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_CAPABILITIES_KHR, + cl_device_integer_dot_product_capabilities_khr) +# if defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, + CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR, + cl_device_integer_dot_product_acceleration_properties_khr) + CL_HPP_DECLARE_PARAM_TRAITS_( + cl_device_info, CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_4x8BIT_PACKED_KHR, + cl_device_integer_dot_product_acceleration_properties_khr) +# endif // defined(CL_DEVICE_INTEGER_DOT_PRODUCT_ACCELERATION_PROPERTIES_8BIT_KHR) +#endif // defined(cl_khr_integer_dot_product) #if defined(cl_ext_image_requirements_info) -CL_HPP_PARAM_NAME_CL_IMAGE_REQUIREMENTS_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_CL_IMAGE_REQUIREMENTS_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_ext_image_requirements_info #if defined(cl_ext_image_from_buffer) -CL_HPP_PARAM_NAME_CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT(CL_HPP_DECLARE_PARAM_TRAITS_) + CL_HPP_PARAM_NAME_CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT( + CL_HPP_DECLARE_PARAM_TRAITS_) #endif // cl_ext_image_from_buffer #ifdef CL_PLATFORM_ICD_SUFFIX_KHR -CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, string) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, string) #endif #ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong) #endif #ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, vector) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, + vector) #endif #ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint) #endif #ifdef CL_DEVICE_SIMD_WIDTH_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint) #endif #ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint) #endif #ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint) #endif #ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint) #endif #ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, + cl_uint) #endif #ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, + cl_uint) #endif #ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, + cl_uint) #endif #ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint) #endif #ifdef CL_DEVICE_BOARD_NAME_AMD -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_BOARD_NAME_AMD, string) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_BOARD_NAME_AMD, string) #endif #ifdef CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, cl_ulong) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_UNITS_BITFIELD_ARM, cl_ulong) #endif #ifdef CL_DEVICE_JOB_SLOTS_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_JOB_SLOTS_ARM, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_JOB_SLOTS_ARM, cl_uint) #endif #ifdef CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, cl_bitfield) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SCHEDULING_CONTROLS_CAPABILITIES_ARM, + cl_bitfield) #endif #ifdef CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM, vector) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_SUPPORTED_REGISTER_ALLOCATIONS_ARM, + vector) #endif #ifdef CL_DEVICE_MAX_WARP_COUNT_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_MAX_WARP_COUNT_ARM, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_MAX_WARP_COUNT_ARM, cl_uint) #endif #ifdef CL_KERNEL_MAX_WARP_COUNT_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_info, CL_KERNEL_MAX_WARP_COUNT_ARM, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_info, CL_KERNEL_MAX_WARP_COUNT_ARM, cl_uint) #endif #ifdef CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, + CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_ARM, cl_uint) #endif #ifdef CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM, cl_int) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, + CL_KERNEL_EXEC_INFO_WORKGROUP_BATCH_SIZE_MODIFIER_ARM, cl_int) #endif #ifdef CL_KERNEL_EXEC_INFO_WARP_COUNT_LIMIT_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WARP_COUNT_LIMIT_ARM, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_WARP_COUNT_LIMIT_ARM, + cl_uint) #endif #ifdef CL_KERNEL_EXEC_INFO_COMPUTE_UNIT_MAX_QUEUED_BATCHES_ARM -CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, CL_KERNEL_EXEC_INFO_COMPUTE_UNIT_MAX_QUEUED_BATCHES_ARM, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_kernel_exec_info, + CL_KERNEL_EXEC_INFO_COMPUTE_UNIT_MAX_QUEUED_BATCHES_ARM, + cl_uint) #endif #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint) #endif #ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint) #endif #ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint) #endif #ifdef CL_DEVICE_WARP_SIZE_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint) #endif #ifdef CL_DEVICE_GPU_OVERLAP_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool) #endif #ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool) #endif #ifdef CL_DEVICE_INTEGRATED_MEMORY_NV -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool) #endif #if defined(cl_khr_command_buffer) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR, cl_device_command_buffer_capabilities_khr) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR, cl_command_buffer_properties_khr) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, CL_COMMAND_BUFFER_QUEUES_KHR, cl::vector) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, CL_COMMAND_BUFFER_NUM_QUEUES_KHR, cl_uint) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR, cl_uint) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, CL_COMMAND_BUFFER_STATE_KHR, cl_command_buffer_state_khr) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR, cl::vector) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, CL_DEVICE_COMMAND_BUFFER_CAPABILITIES_KHR, + cl_device_command_buffer_capabilities_khr) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_device_info, + CL_DEVICE_COMMAND_BUFFER_REQUIRED_QUEUE_PROPERTIES_KHR, + cl_command_buffer_properties_khr) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, CL_COMMAND_BUFFER_QUEUES_KHR, + cl::vector) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, CL_COMMAND_BUFFER_NUM_QUEUES_KHR, + cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, + CL_COMMAND_BUFFER_REFERENCE_COUNT_KHR, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, CL_COMMAND_BUFFER_STATE_KHR, + cl_command_buffer_state_khr) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_command_buffer_info_khr, + CL_COMMAND_BUFFER_PROPERTIES_ARRAY_KHR, + cl::vector) #endif /* cl_khr_command_buffer */ #if defined(cl_khr_command_buffer_mutable_dispatch) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR, CommandQueue) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR, CommandBufferKhr) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR, cl_command_type) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR, cl::vector) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_DISPATCH_KERNEL_KHR, cl_kernel) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_DISPATCH_DIMENSIONS_KHR, cl_uint) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR, cl::vector) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR, cl::vector) -CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR, cl::vector) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, + CL_MUTABLE_COMMAND_COMMAND_QUEUE_KHR, CommandQueue) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, + CL_MUTABLE_COMMAND_COMMAND_BUFFER_KHR, CommandBufferKhr) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, + CL_MUTABLE_COMMAND_COMMAND_TYPE_KHR, cl_command_type) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, + CL_MUTABLE_DISPATCH_PROPERTIES_ARRAY_KHR, + cl::vector) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, CL_MUTABLE_DISPATCH_KERNEL_KHR, + cl_kernel) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, + CL_MUTABLE_DISPATCH_DIMENSIONS_KHR, cl_uint) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, + CL_MUTABLE_DISPATCH_GLOBAL_WORK_OFFSET_KHR, + cl::vector) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, + CL_MUTABLE_DISPATCH_GLOBAL_WORK_SIZE_KHR, + cl::vector) + CL_HPP_DECLARE_PARAM_TRAITS_(cl_mutable_command_info_khr, + CL_MUTABLE_DISPATCH_LOCAL_WORK_SIZE_KHR, cl::vector) #endif /* cl_khr_command_buffer_mutable_dispatch */ -// Convenience functions - -template -inline cl_int -getInfo(Func f, cl_uint name, T* param) -{ - return getInfoHelper(f, name, param, 0); -} - -template -struct GetInfoFunctor0 -{ - Func f_; const Arg0& arg0_; - cl_int operator ()( - cl_uint param, size_type size, void* value, size_type* size_ret) - { return f_(arg0_, param, size, value, size_ret); } -}; - -template -struct GetInfoFunctor1 -{ - Func f_; const Arg0& arg0_; const Arg1& arg1_; - cl_int operator ()( - cl_uint param, size_type size, void* value, size_type* size_ret) - { return f_(arg0_, arg1_, param, size, value, size_ret); } -}; - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, cl_uint name, T* param) -{ - GetInfoFunctor0 f0 = { f, arg0 }; - return getInfoHelper(f0, name, param, 0); -} - -template -inline cl_int -getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param) -{ - GetInfoFunctor1 f0 = { f, arg0, arg1 }; - return getInfoHelper(f0, name, param, 0); -} - - -template -struct ReferenceHandler -{ }; + // Convenience functions + + template + inline cl_int getInfo(Func f, cl_uint name, T *param) { + return getInfoHelper(f, name, param, 0); + } + + template + struct GetInfoFunctor0 { + Func f_; + const Arg0 &arg0_; + cl_int operator()(cl_uint param, size_type size, void *value, size_type *size_ret) { + return f_(arg0_, param, size, value, size_ret); + } + }; + + template + struct GetInfoFunctor1 { + Func f_; + const Arg0 &arg0_; + const Arg1 &arg1_; + cl_int operator()(cl_uint param, size_type size, void *value, size_type *size_ret) { + return f_(arg0_, arg1_, param, size, value, size_ret); + } + }; + + template + inline cl_int getInfo(Func f, const Arg0 &arg0, cl_uint name, T *param) { + GetInfoFunctor0 f0 = {f, arg0}; + return getInfoHelper(f0, name, param, 0); + } + + template + inline cl_int getInfo(Func f, const Arg0 &arg0, const Arg1 &arg1, cl_uint name, T *param) { + GetInfoFunctor1 f0 = {f, arg0, arg1}; + return getInfoHelper(f0, name, param, 0); + } + + template + struct ReferenceHandler {}; #if CL_HPP_TARGET_OPENCL_VERSION >= 120 -/** - * OpenCL 1.2 devices do have retain/release. - */ -template <> -struct ReferenceHandler -{ - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int retain(cl_device_id device) - { return ::clRetainDevice(device); } - /** - * Retain the device. - * \param device A valid device created using createSubDevices - * \return - * CL_SUCCESS if the function executed successfully. - * CL_INVALID_DEVICE if device was not a valid subdevice - * CL_OUT_OF_RESOURCES - * CL_OUT_OF_HOST_MEMORY - */ - static cl_int release(cl_device_id device) - { return ::clReleaseDevice(device); } -}; -#else // CL_HPP_TARGET_OPENCL_VERSION >= 120 -/** - * OpenCL 1.1 devices do not have retain/release. - */ -template <> -struct ReferenceHandler -{ - // cl_device_id does not have retain(). - static cl_int retain(cl_device_id) - { return CL_SUCCESS; } - // cl_device_id does not have release(). - static cl_int release(cl_device_id) - { return CL_SUCCESS; } -}; + /** + * OpenCL 1.2 devices do have retain/release. + */ + template<> + struct ReferenceHandler { + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int retain(cl_device_id device) { return ::clRetainDevice(device); } + /** + * Retain the device. + * \param device A valid device created using createSubDevices + * \return + * CL_SUCCESS if the function executed successfully. + * CL_INVALID_DEVICE if device was not a valid subdevice + * CL_OUT_OF_RESOURCES + * CL_OUT_OF_HOST_MEMORY + */ + static cl_int release(cl_device_id device) { return ::clReleaseDevice(device); } + }; +#else // CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * OpenCL 1.1 devices do not have retain/release. + */ + template<> + struct ReferenceHandler { + // cl_device_id does not have retain(). + static cl_int retain(cl_device_id) { return CL_SUCCESS; } + // cl_device_id does not have release(). + static cl_int release(cl_device_id) { return CL_SUCCESS; } + }; #endif // ! (CL_HPP_TARGET_OPENCL_VERSION >= 120) -template <> -struct ReferenceHandler -{ - // cl_platform_id does not have retain(). - static cl_int retain(cl_platform_id) - { return CL_SUCCESS; } - // cl_platform_id does not have release(). - static cl_int release(cl_platform_id) - { return CL_SUCCESS; } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_context context) - { return ::clRetainContext(context); } - static cl_int release(cl_context context) - { return ::clReleaseContext(context); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_queue queue) - { return ::clRetainCommandQueue(queue); } - static cl_int release(cl_command_queue queue) - { return ::clReleaseCommandQueue(queue); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_mem memory) - { return ::clRetainMemObject(memory); } - static cl_int release(cl_mem memory) - { return ::clReleaseMemObject(memory); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_sampler sampler) - { return ::clRetainSampler(sampler); } - static cl_int release(cl_sampler sampler) - { return ::clReleaseSampler(sampler); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_program program) - { return ::clRetainProgram(program); } - static cl_int release(cl_program program) - { return ::clReleaseProgram(program); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_kernel kernel) - { return ::clRetainKernel(kernel); } - static cl_int release(cl_kernel kernel) - { return ::clReleaseKernel(kernel); } -}; - -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_event event) - { return ::clRetainEvent(event); } - static cl_int release(cl_event event) - { return ::clReleaseEvent(event); } -}; + template<> + struct ReferenceHandler { + // cl_platform_id does not have retain(). + static cl_int retain(cl_platform_id) { return CL_SUCCESS; } + // cl_platform_id does not have release(). + static cl_int release(cl_platform_id) { return CL_SUCCESS; } + }; + + template<> + struct ReferenceHandler { + static cl_int retain(cl_context context) { return ::clRetainContext(context); } + static cl_int release(cl_context context) { return ::clReleaseContext(context); } + }; + + template<> + struct ReferenceHandler { + static cl_int retain(cl_command_queue queue) { return ::clRetainCommandQueue(queue); } + static cl_int release(cl_command_queue queue) { return ::clReleaseCommandQueue(queue); } + }; + + template<> + struct ReferenceHandler { + static cl_int retain(cl_mem memory) { return ::clRetainMemObject(memory); } + static cl_int release(cl_mem memory) { return ::clReleaseMemObject(memory); } + }; + + template<> + struct ReferenceHandler { + static cl_int retain(cl_sampler sampler) { return ::clRetainSampler(sampler); } + static cl_int release(cl_sampler sampler) { return ::clReleaseSampler(sampler); } + }; + + template<> + struct ReferenceHandler { + static cl_int retain(cl_program program) { return ::clRetainProgram(program); } + static cl_int release(cl_program program) { return ::clReleaseProgram(program); } + }; + + template<> + struct ReferenceHandler { + static cl_int retain(cl_kernel kernel) { return ::clRetainKernel(kernel); } + static cl_int release(cl_kernel kernel) { return ::clReleaseKernel(kernel); } + }; + + template<> + struct ReferenceHandler { + static cl_int retain(cl_event event) { return ::clRetainEvent(event); } + static cl_int release(cl_event event) { return ::clReleaseEvent(event); } + }; #ifdef cl_khr_semaphore -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_semaphore_khr semaphore) - { - if (pfn_clRetainSemaphoreKHR != nullptr) { - return pfn_clRetainSemaphoreKHR(semaphore); - } - - return CL_INVALID_OPERATION; - } - - static cl_int release(cl_semaphore_khr semaphore) - { - if (pfn_clReleaseSemaphoreKHR != nullptr) { - return pfn_clReleaseSemaphoreKHR(semaphore); - } - - return CL_INVALID_OPERATION; - } -}; + template<> + struct ReferenceHandler { + static cl_int retain(cl_semaphore_khr semaphore) { + if (pfn_clRetainSemaphoreKHR != nullptr) { + return pfn_clRetainSemaphoreKHR(semaphore); + } + + return CL_INVALID_OPERATION; + } + + static cl_int release(cl_semaphore_khr semaphore) { + if (pfn_clReleaseSemaphoreKHR != nullptr) { + return pfn_clReleaseSemaphoreKHR(semaphore); + } + + return CL_INVALID_OPERATION; + } + }; #endif // cl_khr_semaphore #if defined(cl_khr_command_buffer) -template <> -struct ReferenceHandler -{ - static cl_int retain(cl_command_buffer_khr cmdBufferKhr) - { - if (pfn_clRetainCommandBufferKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, __RETAIN_COMMAND_BUFFER_KHR_ERR); - } - return pfn_clRetainCommandBufferKHR(cmdBufferKhr); - } - - static cl_int release(cl_command_buffer_khr cmdBufferKhr) - { - if (pfn_clReleaseCommandBufferKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, __RELEASE_COMMAND_BUFFER_KHR_ERR); - } - return pfn_clReleaseCommandBufferKHR(cmdBufferKhr); - } -}; - -template <> -struct ReferenceHandler -{ - // cl_mutable_command_khr does not have retain(). - static cl_int retain(cl_mutable_command_khr) - { return CL_SUCCESS; } - // cl_mutable_command_khr does not have release(). - static cl_int release(cl_mutable_command_khr) - { return CL_SUCCESS; } -}; + template<> + struct ReferenceHandler { + static cl_int retain(cl_command_buffer_khr cmdBufferKhr) { + if (pfn_clRetainCommandBufferKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, + __RETAIN_COMMAND_BUFFER_KHR_ERR); + } + return pfn_clRetainCommandBufferKHR(cmdBufferKhr); + } + + static cl_int release(cl_command_buffer_khr cmdBufferKhr) { + if (pfn_clReleaseCommandBufferKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, + __RELEASE_COMMAND_BUFFER_KHR_ERR); + } + return pfn_clReleaseCommandBufferKHR(cmdBufferKhr); + } + }; + + template<> + struct ReferenceHandler { + // cl_mutable_command_khr does not have retain(). + static cl_int retain(cl_mutable_command_khr) { return CL_SUCCESS; } + // cl_mutable_command_khr does not have release(). + static cl_int release(cl_mutable_command_khr) { return CL_SUCCESS; } + }; #endif // cl_khr_command_buffer - #if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 -// Extracts version number with major in the upper 16 bits, minor in the lower 16 -static cl_uint getVersion(const vector &versionInfo) -{ - int highVersion = 0; - int lowVersion = 0; - int index = 7; - while(versionInfo[index] != '.' ) { - highVersion *= 10; - highVersion += versionInfo[index]-'0'; - ++index; - } - ++index; - while(versionInfo[index] != ' ' && versionInfo[index] != '\0') { - lowVersion *= 10; - lowVersion += versionInfo[index]-'0'; - ++index; - } - return (highVersion << 16) | lowVersion; -} - -static cl_uint getPlatformVersion(cl_platform_id platform) -{ - size_type size = 0; - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, nullptr, &size); - - vector versionInfo(size); - clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, versionInfo.data(), &size); - return getVersion(versionInfo); -} - -static cl_uint getDevicePlatformVersion(cl_device_id device) -{ - cl_platform_id platform; - clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, nullptr); - return getPlatformVersion(platform); -} - -static cl_uint getContextPlatformVersion(cl_context context) -{ - // The platform cannot be queried directly, so we first have to grab a - // device and obtain its context - size_type size = 0; - clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr, &size); - if (size == 0) - return 0; - vector devices(size/sizeof(cl_device_id)); - clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices.data(), nullptr); - return getDevicePlatformVersion(devices[0]); -} + // Extracts version number with major in the upper 16 bits, minor in the lower 16 + static cl_uint getVersion(const vector &versionInfo) { + int highVersion = 0; + int lowVersion = 0; + int index = 7; + while (versionInfo[index] != '.') { + highVersion *= 10; + highVersion += versionInfo[index] - '0'; + ++index; + } + ++index; + while (versionInfo[index] != ' ' && versionInfo[index] != '\0') { + lowVersion *= 10; + lowVersion += versionInfo[index] - '0'; + ++index; + } + return (highVersion << 16) | lowVersion; + } + + static cl_uint getPlatformVersion(cl_platform_id platform) { + size_type size = 0; + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, nullptr, &size); + + vector versionInfo(size); + clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, versionInfo.data(), &size); + return getVersion(versionInfo); + } + + static cl_uint getDevicePlatformVersion(cl_device_id device) { + cl_platform_id platform; + clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, nullptr); + return getPlatformVersion(platform); + } + + static cl_uint getContextPlatformVersion(cl_context context) { + // The platform cannot be queried directly, so we first have to grab a + // device and obtain its context + size_type size = 0; + clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, nullptr, &size); + if (size == 0) return 0; + vector devices(size / sizeof(cl_device_id)); + clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices.data(), nullptr); + return getDevicePlatformVersion(devices[0]); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 -template -class Wrapper -{ -public: - typedef T cl_type; - -protected: - cl_type object_; - -public: - Wrapper() : object_(nullptr) { } - - Wrapper(const cl_type &obj, bool retainObject) : object_(obj) - { - if (retainObject) { - detail::errHandler(retain(), __RETAIN_ERR); - } - } - - ~Wrapper() - { - if (object_ != nullptr) { release(); } - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - detail::errHandler(retain(), __RETAIN_ERR); - } - - Wrapper(Wrapper&& rhs) noexcept - { - object_ = rhs.object_; - rhs.object_ = nullptr; - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (this != &rhs) { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs.object_; - detail::errHandler(retain(), __RETAIN_ERR); - } - return *this; - } - - Wrapper& operator = (Wrapper&& rhs) - { - if (this != &rhs) { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs.object_; - rhs.object_ = nullptr; - } - return *this; - } - - Wrapper& operator = (const cl_type &rhs) - { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs; - return *this; - } - - const cl_type& operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - - cl_type get() const { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - cl_int retain() const - { - if (object_ != nullptr) { - return ReferenceHandler::retain(object_); - } - else { - return CL_SUCCESS; - } - } - - cl_int release() const - { - if (object_ != nullptr) { - return ReferenceHandler::release(object_); - } - else { - return CL_SUCCESS; - } - } -}; - -template <> -class Wrapper -{ -public: - typedef cl_device_id cl_type; - -protected: - cl_type object_; - bool referenceCountable_; - - static bool isReferenceCountable(cl_device_id device) - { - bool retVal = false; + template + class Wrapper { + public: + typedef T cl_type; + + protected: + cl_type object_; + + public: + Wrapper() : object_(nullptr) {} + + Wrapper(const cl_type &obj, bool retainObject) : object_(obj) { + if (retainObject) { detail::errHandler(retain(), __RETAIN_ERR); } + } + + ~Wrapper() { + if (object_ != nullptr) { release(); } + } + + Wrapper(const Wrapper &rhs) { + object_ = rhs.object_; + detail::errHandler(retain(), __RETAIN_ERR); + } + + Wrapper(Wrapper &&rhs) noexcept { + object_ = rhs.object_; + rhs.object_ = nullptr; + } + + Wrapper &operator=(const Wrapper &rhs) { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + detail::errHandler(retain(), __RETAIN_ERR); + } + return *this; + } + + Wrapper &operator=(Wrapper &&rhs) { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + rhs.object_ = nullptr; + } + return *this; + } + + Wrapper &operator=(const cl_type &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs; + return *this; + } + + const cl_type &operator()() const { return object_; } + + cl_type &operator()() { return object_; } + + cl_type get() const { return object_; } + + protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U *, int, typename U::cl_type); + + cl_int retain() const { + if (object_ != nullptr) { + return ReferenceHandler::retain(object_); + } else { + return CL_SUCCESS; + } + } + + cl_int release() const { + if (object_ != nullptr) { + return ReferenceHandler::release(object_); + } else { + return CL_SUCCESS; + } + } + }; + + template<> + class Wrapper { + public: + typedef cl_device_id cl_type; + + protected: + cl_type object_; + bool referenceCountable_; + + static bool isReferenceCountable(cl_device_id device) { + bool retVal = false; #if CL_HPP_TARGET_OPENCL_VERSION >= 120 -#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - if (device != nullptr) { - int version = getDevicePlatformVersion(device); - if(version > ((1 << 16) + 1)) { - retVal = true; - } - } -#else // CL_HPP_MINIMUM_OPENCL_VERSION < 120 - retVal = true; -#endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - (void)device; - return retVal; - } - -public: - Wrapper() : object_(nullptr), referenceCountable_(false) - { - } - - Wrapper(const cl_type &obj, bool retainObject) : - object_(obj), - referenceCountable_(false) - { - referenceCountable_ = isReferenceCountable(obj); - - if (retainObject) { - detail::errHandler(retain(), __RETAIN_ERR); - } - } - - ~Wrapper() - { - release(); - } - - Wrapper(const Wrapper& rhs) - { - object_ = rhs.object_; - referenceCountable_ = isReferenceCountable(object_); - detail::errHandler(retain(), __RETAIN_ERR); - } - - Wrapper(Wrapper&& rhs) noexcept - { - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = nullptr; - rhs.referenceCountable_ = false; - } - - Wrapper& operator = (const Wrapper& rhs) - { - if (this != &rhs) { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - detail::errHandler(retain(), __RETAIN_ERR); - } - return *this; - } - - Wrapper& operator = (Wrapper&& rhs) - { - if (this != &rhs) { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs.object_; - referenceCountable_ = rhs.referenceCountable_; - rhs.object_ = nullptr; - rhs.referenceCountable_ = false; - } - return *this; - } - - Wrapper& operator = (const cl_type &rhs) - { - detail::errHandler(release(), __RELEASE_ERR); - object_ = rhs; - referenceCountable_ = isReferenceCountable(object_); - return *this; - } - - const cl_type& operator ()() const { return object_; } - - cl_type& operator ()() { return object_; } - - cl_type get() const { return object_; } - -protected: - template - friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type); - - template - friend inline cl_int getInfoHelper(Func, cl_uint, vector*, int, typename U::cl_type); - - cl_int retain() const - { - if( object_ != nullptr && referenceCountable_ ) { - return ReferenceHandler::retain(object_); - } - else { - return CL_SUCCESS; - } - } - - cl_int release() const - { - if (object_ != nullptr && referenceCountable_) { - return ReferenceHandler::release(object_); - } - else { - return CL_SUCCESS; - } - } -}; - -template -inline bool operator==(const Wrapper &lhs, const Wrapper &rhs) -{ - return lhs() == rhs(); -} - -template -inline bool operator!=(const Wrapper &lhs, const Wrapper &rhs) -{ - return !operator==(lhs, rhs); -} - -} // namespace detail -//! \endcond - - - - - -/*! \stuct ImageFormat - * \brief Adds constructors and member functions for cl_image_format. - * - * \see cl_image_format - */ -struct ImageFormat : public cl_image_format -{ - //! \brief Default constructor - performs no initialization. - ImageFormat(){} - - //! \brief Initializing constructor. - ImageFormat(cl_channel_order order, cl_channel_type type) - { - image_channel_order = order; - image_channel_data_type = type; - } - - //! \brief Copy constructor. - ImageFormat(const ImageFormat &other) { *this = other; } - - //! \brief Assignment operator. - ImageFormat& operator = (const ImageFormat& rhs) - { - if (this != &rhs) { - this->image_channel_data_type = rhs.image_channel_data_type; - this->image_channel_order = rhs.image_channel_order; - } - return *this; - } -}; - -/*! \brief Class interface for cl_device_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_device_id - */ -class Device : public detail::Wrapper -{ -private: - static std::once_flag default_initialized_; - static Device default_; - static cl_int default_error_; - - /*! \brief Create the default context. - * - * This sets @c default_ and @c default_error_. It does not throw - * @c cl::Error. - */ - static void makeDefault(); - - /*! \brief Create the default platform from a provided platform. - * - * This sets @c default_. It does not throw - * @c cl::Error. - */ - static void makeDefaultProvided(const Device &p) { - default_ = p; - } - -public: +# if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + if (device != nullptr) { + int version = getDevicePlatformVersion(device); + if (version > ((1 << 16) + 1)) { retVal = true; } + } +# else // CL_HPP_MINIMUM_OPENCL_VERSION < 120 + retVal = true; +# endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + (void)device; + return retVal; + } + + public: + Wrapper() : object_(nullptr), referenceCountable_(false) {} + + Wrapper(const cl_type &obj, bool retainObject) : + object_(obj), referenceCountable_(false) { + referenceCountable_ = isReferenceCountable(obj); + + if (retainObject) { detail::errHandler(retain(), __RETAIN_ERR); } + } + + ~Wrapper() { release(); } + + Wrapper(const Wrapper &rhs) { + object_ = rhs.object_; + referenceCountable_ = isReferenceCountable(object_); + detail::errHandler(retain(), __RETAIN_ERR); + } + + Wrapper(Wrapper &&rhs) noexcept { + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + rhs.object_ = nullptr; + rhs.referenceCountable_ = false; + } + + Wrapper &operator=(const Wrapper &rhs) { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + detail::errHandler(retain(), __RETAIN_ERR); + } + return *this; + } + + Wrapper &operator=(Wrapper &&rhs) { + if (this != &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs.object_; + referenceCountable_ = rhs.referenceCountable_; + rhs.object_ = nullptr; + rhs.referenceCountable_ = false; + } + return *this; + } + + Wrapper &operator=(const cl_type &rhs) { + detail::errHandler(release(), __RELEASE_ERR); + object_ = rhs; + referenceCountable_ = isReferenceCountable(object_); + return *this; + } + + const cl_type &operator()() const { return object_; } + + cl_type &operator()() { return object_; } + + cl_type get() const { return object_; } + + protected: + template + friend inline cl_int getInfoHelper(Func, cl_uint, U *, int, typename U::cl_type); + + template + friend inline cl_int getInfoHelper(Func, cl_uint, vector *, int, + typename U::cl_type); + + cl_int retain() const { + if (object_ != nullptr && referenceCountable_) { + return ReferenceHandler::retain(object_); + } else { + return CL_SUCCESS; + } + } + + cl_int release() const { + if (object_ != nullptr && referenceCountable_) { + return ReferenceHandler::release(object_); + } else { + return CL_SUCCESS; + } + } + }; + + template + inline bool operator==(const Wrapper &lhs, const Wrapper &rhs) { + return lhs() == rhs(); + } + + template + inline bool operator!=(const Wrapper &lhs, const Wrapper &rhs) { + return !operator==(lhs, rhs); + } + + } // namespace detail + //! \endcond + + /*! \stuct ImageFormat + * \brief Adds constructors and member functions for cl_image_format. + * + * \see cl_image_format + */ + struct ImageFormat : public cl_image_format { + //! \brief Default constructor - performs no initialization. + ImageFormat() {} + + //! \brief Initializing constructor. + ImageFormat(cl_channel_order order, cl_channel_type type) { + image_channel_order = order; + image_channel_data_type = type; + } + + //! \brief Copy constructor. + ImageFormat(const ImageFormat &other) { *this = other; } + + //! \brief Assignment operator. + ImageFormat &operator=(const ImageFormat &rhs) { + if (this != &rhs) { + this->image_channel_data_type = rhs.image_channel_data_type; + this->image_channel_order = rhs.image_channel_order; + } + return *this; + } + }; + + /*! \brief Class interface for cl_device_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_device_id + */ + class Device : public detail::Wrapper { + private: + static std::once_flag default_initialized_; + static Device default_; + static cl_int default_error_; + + /*! \brief Create the default context. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault(); + + /*! \brief Create the default platform from a provided platform. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Device &p) { default_ = p; } + + public: #ifdef CL_HPP_UNIT_TEST_ENABLE - /*! \brief Reset the default. - * - * This sets @c default_ to an empty value to support cleanup in - * the unit test framework. - * This function is not thread safe. - */ - static void unitTestClearDefault() { - default_ = Device(); - } + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { default_ = Device(); } #endif // #ifdef CL_HPP_UNIT_TEST_ENABLE - //! \brief Default constructor - initializes to nullptr. - Device() : detail::Wrapper() { } - - /*! \brief Constructor from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - explicit Device(const cl_device_id &device, bool retainObject = false) : - detail::Wrapper(device, retainObject) { } - - /*! \brief Returns the first device on the default context. - * - * \see Context::getDefault() - */ - static Device getDefault( - cl_int *errResult = nullptr) - { - std::call_once(default_initialized_, makeDefault); - detail::errHandler(default_error_); - if (errResult != nullptr) { - *errResult = default_error_; - } - return default_; - } - - /** - * Modify the default device to be used by - * subsequent operations. - * Will only set the default if no default was previously created. - * @return updated default device. - * Should be compared to the passed value to ensure that it was updated. - */ - static Device setDefault(const Device &default_device) - { - std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_device)); - detail::errHandler(default_error_); - return default_; - } - - /*! \brief Assignment operator from cl_device_id. - * - * This simply copies the device ID value, which is an inexpensive operation. - */ - Device& operator = (const cl_device_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - - //! \brief Wrapper for clGetDeviceInfo(). - template - cl_int getInfo(cl_device_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetDeviceInfo, object_, name, param), - __GET_DEVICE_INFO_ERR); - } - - //! \brief Wrapper for clGetDeviceInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_device_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } + //! \brief Default constructor - initializes to nullptr. + Device() : detail::Wrapper() {} + + /*! \brief Constructor from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + explicit Device(const cl_device_id &device, bool retainObject = false) : + detail::Wrapper(device, retainObject) {} + + /*! \brief Returns the first device on the default context. + * + * \see Context::getDefault() + */ + static Device getDefault(cl_int *errResult = nullptr) { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (errResult != nullptr) { *errResult = default_error_; } + return default_; + } + + /** + * Modify the default device to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default device. + * Should be compared to the passed value to ensure that it was updated. + */ + static Device setDefault(const Device &default_device) { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_device)); + detail::errHandler(default_error_); + return default_; + } + + /*! \brief Assignment operator from cl_device_id. + * + * This simply copies the device ID value, which is an inexpensive operation. + */ + Device &operator=(const cl_device_id &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetDeviceInfo(). + template + cl_int getInfo(cl_device_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetDeviceInfo, object_, name, param), + __GET_DEVICE_INFO_ERR); + } + + //! \brief Wrapper for clGetDeviceInfo() that returns by value. + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - /** - * Return the current value of the host clock as seen by the device. - * The resolution of the device timer may be queried with the - * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. - * @return The host timer value. - */ - cl_ulong getHostTimer(cl_int *error = nullptr) - { - cl_ulong retVal = 0; - cl_int err = - clGetHostTimer(this->get(), &retVal); - detail::errHandler( - err, - __GET_HOST_TIMER_ERR); - if (error) { - *error = err; - } - return retVal; - } - - /** - * Return a synchronized pair of host and device timestamps as seen by device. - * Use to correlate the clocks and get the host timer only using getHostTimer - * as a lower cost mechanism in between calls. - * The resolution of the host timer may be queried with the - * CL_PLATFORM_HOST_TIMER_RESOLUTION query. - * The resolution of the device timer may be queried with the - * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. - * @return A pair of (device timer, host timer) timer values. - */ - std::pair getDeviceAndHostTimer(cl_int *error = nullptr) - { - std::pair retVal; - cl_int err = - clGetDeviceAndHostTimer(this->get(), &(retVal.first), &(retVal.second)); - detail::errHandler( - err, - __GET_DEVICE_AND_HOST_TIMER_ERR); - if (error) { - *error = err; - } - return retVal; - } + /** + * Return the current value of the host clock as seen by the device. + * The resolution of the device timer may be queried with the + * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. + * @return The host timer value. + */ + cl_ulong getHostTimer(cl_int *error = nullptr) { + cl_ulong retVal = 0; + cl_int err = clGetHostTimer(this->get(), &retVal); + detail::errHandler(err, __GET_HOST_TIMER_ERR); + if (error) { *error = err; } + return retVal; + } + + /** + * Return a synchronized pair of host and device timestamps as seen by device. + * Use to correlate the clocks and get the host timer only using getHostTimer + * as a lower cost mechanism in between calls. + * The resolution of the host timer may be queried with the + * CL_PLATFORM_HOST_TIMER_RESOLUTION query. + * The resolution of the device timer may be queried with the + * CL_DEVICE_PROFILING_TIMER_RESOLUTION query. + * @return A pair of (device timer, host timer) timer values. + */ + std::pair getDeviceAndHostTimer(cl_int *error = nullptr) { + std::pair retVal; + cl_int err = clGetDeviceAndHostTimer(this->get(), &(retVal.first), &(retVal.second)); + detail::errHandler(err, __GET_DEVICE_AND_HOST_TIMER_ERR); + if (error) { *error = err; } + return retVal; + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - //! \brief Wrapper for clCreateSubDevices(). - cl_int createSubDevices(const cl_device_partition_property* properties, - vector* devices); + //! \brief Wrapper for clCreateSubDevices(). + cl_int createSubDevices(const cl_device_partition_property *properties, + vector *devices); #endif // defined (CL_HPP_TARGET_OPENCL_VERSION >= 120) #if defined(cl_ext_device_fission) - //! \brief Wrapper for clCreateSubDevices(). - cl_int createSubDevices(const cl_device_partition_property_ext* properties, - vector* devices); + //! \brief Wrapper for clCreateSubDevices(). + cl_int createSubDevices(const cl_device_partition_property_ext *properties, + vector *devices); #endif // defined(cl_ext_device_fission) -}; + }; -using BuildLogType = vector::param_type>>; + using BuildLogType = vector< + std::pair::param_type>>; #if defined(CL_HPP_ENABLE_EXCEPTIONS) -/** -* Exception class for build errors to carry build info -*/ -class BuildError : public Error -{ -private: - BuildLogType buildLogs; -public: - BuildError(cl_int err, const char * errStr, const BuildLogType &vec) : Error(err, errStr), buildLogs(vec) - { - } - - BuildLogType getBuildLog() const - { - return buildLogs; - } -}; -namespace detail { - static inline cl_int buildErrHandler( - cl_int err, - const char * errStr, - const BuildLogType &buildLogs) - { - if (err != CL_SUCCESS) { - throw BuildError(err, errStr, buildLogs); - } - return err; - } -} // namespace detail + /** + * Exception class for build errors to carry build info + */ + class BuildError : public Error { + private: + BuildLogType buildLogs; + + public: + BuildError(cl_int err, const char *errStr, const BuildLogType &vec) : + Error(err, errStr), buildLogs(vec) {} + + BuildLogType getBuildLog() const { return buildLogs; } + }; + namespace detail { + static inline cl_int buildErrHandler(cl_int err, const char *errStr, + const BuildLogType &buildLogs) { + if (err != CL_SUCCESS) { throw BuildError(err, errStr, buildLogs); } + return err; + } + } // namespace detail #else -namespace detail { - static inline cl_int buildErrHandler( - cl_int err, - const char * errStr, - const BuildLogType &buildLogs) - { - (void)buildLogs; // suppress unused variable warning - (void)errStr; - return err; - } -} // namespace detail + namespace detail { + static inline cl_int buildErrHandler(cl_int err, const char *errStr, + const BuildLogType &buildLogs) { + (void)buildLogs; // suppress unused variable warning + (void)errStr; + return err; + } + } // namespace detail #endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Device::default_initialized_; -CL_HPP_DEFINE_STATIC_MEMBER_ Device Device::default_; -CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Device::default_error_ = CL_SUCCESS; - -/*! \brief Class interface for cl_platform_id. - * - * \note Copies of these objects are inexpensive, since they don't 'own' - * any underlying resources or data structures. - * - * \see cl_platform_id - */ -class Platform : public detail::Wrapper -{ -private: - static std::once_flag default_initialized_; - static Platform default_; - static cl_int default_error_; - - /*! \brief Create the default context. - * - * This sets @c default_ and @c default_error_. It does not throw - * @c cl::Error. - */ - static void makeDefault() { - /* Throwing an exception from a call_once invocation does not do - * what we wish, so we catch it and save the error. - */ + CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Device::default_initialized_; + CL_HPP_DEFINE_STATIC_MEMBER_ Device Device::default_; + CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Device::default_error_ = CL_SUCCESS; + + /*! \brief Class interface for cl_platform_id. + * + * \note Copies of these objects are inexpensive, since they don't 'own' + * any underlying resources or data structures. + * + * \see cl_platform_id + */ + class Platform : public detail::Wrapper { + private: + static std::once_flag default_initialized_; + static Platform default_; + static cl_int default_error_; + + /*! \brief Create the default context. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault() { + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ #if defined(CL_HPP_ENABLE_EXCEPTIONS) - try + try #endif - { - // If default wasn't passed ,generate one - // Otherwise set it - cl_uint n = 0; - - cl_int err = ::clGetPlatformIDs(0, nullptr, &n); - if (err != CL_SUCCESS) { - default_error_ = err; - return; - } - if (n == 0) { - default_error_ = CL_INVALID_PLATFORM; - return; - } - - vector ids(n); - err = ::clGetPlatformIDs(n, ids.data(), nullptr); - if (err != CL_SUCCESS) { - default_error_ = err; - return; - } - - default_ = Platform(ids[0]); - } + { + // If default wasn't passed ,generate one + // Otherwise set it + cl_uint n = 0; + + cl_int err = ::clGetPlatformIDs(0, nullptr, &n); + if (err != CL_SUCCESS) { + default_error_ = err; + return; + } + if (n == 0) { + default_error_ = CL_INVALID_PLATFORM; + return; + } + + vector ids(n); + err = ::clGetPlatformIDs(n, ids.data(), nullptr); + if (err != CL_SUCCESS) { + default_error_ = err; + return; + } + + default_ = Platform(ids[0]); + } #if defined(CL_HPP_ENABLE_EXCEPTIONS) - catch (cl::Error &e) { - default_error_ = e.err(); - } + catch (cl::Error &e) { + default_error_ = e.err(); + } #endif - } - - /*! \brief Create the default platform from a provided platform. - * - * This sets @c default_. It does not throw - * @c cl::Error. - */ - static void makeDefaultProvided(const Platform &p) { - default_ = p; - } - -public: + } + + /*! \brief Create the default platform from a provided platform. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Platform &p) { default_ = p; } + + public: #ifdef CL_HPP_UNIT_TEST_ENABLE - /*! \brief Reset the default. - * - * This sets @c default_ to an empty value to support cleanup in - * the unit test framework. - * This function is not thread safe. - */ - static void unitTestClearDefault() { - default_ = Platform(); - } + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { default_ = Platform(); } #endif // #ifdef CL_HPP_UNIT_TEST_ENABLE - //! \brief Default constructor - initializes to nullptr. - Platform() : detail::Wrapper() { } - - /*! \brief Constructor from cl_platform_id. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * This simply copies the platform ID value, which is an inexpensive operation. - */ - explicit Platform(const cl_platform_id &platform, bool retainObject = false) : - detail::Wrapper(platform, retainObject) { } - - /*! \brief Assignment operator from cl_platform_id. - * - * This simply copies the platform ID value, which is an inexpensive operation. - */ - Platform& operator = (const cl_platform_id& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - static Platform getDefault( - cl_int *errResult = nullptr) - { - std::call_once(default_initialized_, makeDefault); - detail::errHandler(default_error_); - if (errResult != nullptr) { - *errResult = default_error_; - } - return default_; - } - - /** - * Modify the default platform to be used by - * subsequent operations. - * Will only set the default if no default was previously created. - * @return updated default platform. - * Should be compared to the passed value to ensure that it was updated. - */ - static Platform setDefault(const Platform &default_platform) - { - std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_platform)); - detail::errHandler(default_error_); - return default_; - } - - //! \brief Wrapper for clGetPlatformInfo(). - template - cl_int getInfo(cl_platform_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPlatformInfo, object_, name, param), - __GET_PLATFORM_INFO_ERR); - } - - //! \brief Wrapper for clGetPlatformInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_platform_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of devices for this platform. - * - * Wraps clGetDeviceIDs(). - */ - cl_int getDevices( - cl_device_type type, - vector* devices) const - { - cl_uint n = 0; - if( devices == nullptr ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - cl_int err = ::clGetDeviceIDs(object_, type, 0, nullptr, &n); - if (err != CL_SUCCESS && err != CL_DEVICE_NOT_FOUND) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - vector ids(n); - if (n>0) { - err = ::clGetDeviceIDs(object_, type, n, ids.data(), nullptr); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - } - - // Cannot trivially assign because we need to capture intermediates - // with safe construction - // We must retain things we obtain from the API to avoid releasing - // API-owned objects. - if (devices) { - devices->resize(ids.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < ids.size(); i++) { - (*devices)[i] = Device(ids[i], true); - } - } - return CL_SUCCESS; - } + //! \brief Default constructor - initializes to nullptr. + Platform() : detail::Wrapper() {} + + /*! \brief Constructor from cl_platform_id. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This simply copies the platform ID value, which is an inexpensive operation. + */ + explicit Platform(const cl_platform_id &platform, bool retainObject = false) : + detail::Wrapper(platform, retainObject) {} + + /*! \brief Assignment operator from cl_platform_id. + * + * This simply copies the platform ID value, which is an inexpensive operation. + */ + Platform &operator=(const cl_platform_id &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + static Platform getDefault(cl_int *errResult = nullptr) { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (errResult != nullptr) { *errResult = default_error_; } + return default_; + } + + /** + * Modify the default platform to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default platform. + * Should be compared to the passed value to ensure that it was updated. + */ + static Platform setDefault(const Platform &default_platform) { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_platform)); + detail::errHandler(default_error_); + return default_; + } + + //! \brief Wrapper for clGetPlatformInfo(). + template + cl_int getInfo(cl_platform_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetPlatformInfo, object_, name, param), + __GET_PLATFORM_INFO_ERR); + } + + //! \brief Wrapper for clGetPlatformInfo() that returns by value. + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + + /*! \brief Gets a list of devices for this platform. + * + * Wraps clGetDeviceIDs(). + */ + cl_int getDevices(cl_device_type type, vector *devices) const { + cl_uint n = 0; + if (devices == nullptr) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + cl_int err = ::clGetDeviceIDs(object_, type, 0, nullptr, &n); + if (err != CL_SUCCESS && err != CL_DEVICE_NOT_FOUND) { + return detail::errHandler(err, __GET_DEVICE_IDS_ERR); + } + + vector ids(n); + if (n > 0) { + err = ::clGetDeviceIDs(object_, type, n, ids.data(), nullptr); + if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } + } + + // Cannot trivially assign because we need to capture intermediates + // with safe construction + // We must retain things we obtain from the API to avoid releasing + // API-owned objects. + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { (*devices)[i] = Device(ids[i], true); } + } + return CL_SUCCESS; + } #if defined(CL_HPP_USE_DX_INTEROP) - /*! \brief Get the list of available D3D10 devices. - * - * \param d3d_device_source. - * - * \param d3d_object. - * - * \param d3d_device_set. - * - * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device - * values returned in devices can be used to identify a specific OpenCL - * device. If \a devices argument is nullptr, this argument is ignored. - * - * \return One of the following values: - * - CL_SUCCESS if the function is executed successfully. - * - * The application can query specific capabilities of the OpenCL device(s) - * returned by cl::getDevices. This can be used by the application to - * determine which device(s) to use. - * - * \note In the case that exceptions are enabled and a return value - * other than CL_SUCCESS is generated, then cl::Error exception is - * generated. - */ - cl_int getDevices( - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - vector* devices) const - { - typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)( - cl_platform_id platform, - cl_d3d10_device_source_khr d3d_device_source, - void * d3d_object, - cl_d3d10_device_set_khr d3d_device_set, - cl_uint num_entries, - cl_device_id * devices, - cl_uint* num_devices); - - if( devices == nullptr ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); - } - - static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = nullptr; -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(object_, clGetDeviceIDsFromD3D10KHR); -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetDeviceIDsFromD3D10KHR); + /*! \brief Get the list of available D3D10 devices. + * + * \param d3d_device_source. + * + * \param d3d_object. + * + * \param d3d_device_set. + * + * \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device + * values returned in devices can be used to identify a specific OpenCL + * device. If \a devices argument is nullptr, this argument is ignored. + * + * \return One of the following values: + * - CL_SUCCESS if the function is executed successfully. + * + * The application can query specific capabilities of the OpenCL device(s) + * returned by cl::getDevices. This can be used by the application to + * determine which device(s) to use. + * + * \note In the case that exceptions are enabled and a return value + * other than CL_SUCCESS is generated, then cl::Error exception is + * generated. + */ + cl_int getDevices(cl_d3d10_device_source_khr d3d_device_source, void *d3d_object, + cl_d3d10_device_set_khr d3d_device_set, vector *devices) const { + typedef CL_API_ENTRY cl_int(CL_API_CALL * PFN_clGetDeviceIDsFromD3D10KHR)( + cl_platform_id platform, + cl_d3d10_device_source_khr d3d_device_source, + void *d3d_object, + cl_d3d10_device_set_khr d3d_device_set, + cl_uint num_entries, + cl_device_id *devices, + cl_uint *num_devices); + + if (devices == nullptr) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR); + } + + static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = nullptr; +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(object_, clGetDeviceIDsFromD3D10KHR); +# endif +# if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetDeviceIDsFromD3D10KHR); +# endif + + cl_uint n = 0; + cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, d3d_device_source, d3d_object, d3d_device_set, 0, nullptr, &n); + if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } + + vector ids(n); + err = pfn_clGetDeviceIDsFromD3D10KHR( + object_, d3d_device_source, d3d_object, d3d_device_set, n, ids.data(), nullptr); + if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_DEVICE_IDS_ERR); } + + // Cannot trivially assign because we need to capture intermediates + // with safe construction + // We must retain things we obtain from the API to avoid releasing + // API-owned objects. + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { (*devices)[i] = Device(ids[i], true); } + } + return CL_SUCCESS; + } #endif - cl_uint n = 0; - cl_int err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - 0, - nullptr, - &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - vector ids(n); - err = pfn_clGetDeviceIDsFromD3D10KHR( - object_, - d3d_device_source, - d3d_object, - d3d_device_set, - n, - ids.data(), - nullptr); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_DEVICE_IDS_ERR); - } - - // Cannot trivially assign because we need to capture intermediates - // with safe construction - // We must retain things we obtain from the API to avoid releasing - // API-owned objects. - if (devices) { - devices->resize(ids.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < ids.size(); i++) { - (*devices)[i] = Device(ids[i], true); - } - } - return CL_SUCCESS; - } -#endif + /*! \brief Gets a list of available platforms. + * + * Wraps clGetPlatformIDs(). + */ + static cl_int get(vector *platforms) { + cl_uint n = 0; + + if (platforms == nullptr) { + return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); + } + + cl_int err = ::clGetPlatformIDs(0, nullptr, &n); + if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); } + + vector ids(n); + err = ::clGetPlatformIDs(n, ids.data(), nullptr); + if (err != CL_SUCCESS) { return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); } + + if (platforms) { + platforms->resize(ids.size()); + + // Platforms don't reference count + for (size_type i = 0; i < ids.size(); i++) { (*platforms)[i] = Platform(ids[i]); } + } + return CL_SUCCESS; + } + + /*! \brief Gets the first available platform. + * + * Wraps clGetPlatformIDs(), returning the first result. + */ + static cl_int get(Platform *platform) { + cl_int err; + Platform default_platform = Platform::getDefault(&err); + if (platform) { *platform = default_platform; } + return err; + } + + /*! \brief Gets the first available platform, returning it by value. + * + * \return Returns a valid platform if one is available. + * If no platform is available will return a null platform. + * Throws an exception if no platforms are available + * or an error condition occurs. + * Wraps clGetPlatformIDs(), returning the first result. + */ + static Platform get(cl_int *errResult = nullptr) { + cl_int err; + Platform default_platform = Platform::getDefault(&err); + if (errResult) { *errResult = err; } + return default_platform; + } - /*! \brief Gets a list of available platforms. - * - * Wraps clGetPlatformIDs(). - */ - static cl_int get( - vector* platforms) - { - cl_uint n = 0; - - if( platforms == nullptr ) { - return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR); - } - - cl_int err = ::clGetPlatformIDs(0, nullptr, &n); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - vector ids(n); - err = ::clGetPlatformIDs(n, ids.data(), nullptr); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_PLATFORM_IDS_ERR); - } - - if (platforms) { - platforms->resize(ids.size()); - - // Platforms don't reference count - for (size_type i = 0; i < ids.size(); i++) { - (*platforms)[i] = Platform(ids[i]); - } - } - return CL_SUCCESS; - } - - /*! \brief Gets the first available platform. - * - * Wraps clGetPlatformIDs(), returning the first result. - */ - static cl_int get( - Platform * platform) - { - cl_int err; - Platform default_platform = Platform::getDefault(&err); - if (platform) { - *platform = default_platform; - } - return err; - } - - /*! \brief Gets the first available platform, returning it by value. - * - * \return Returns a valid platform if one is available. - * If no platform is available will return a null platform. - * Throws an exception if no platforms are available - * or an error condition occurs. - * Wraps clGetPlatformIDs(), returning the first result. - */ - static Platform get( - cl_int * errResult = nullptr) - { - cl_int err; - Platform default_platform = Platform::getDefault(&err); - if (errResult) { - *errResult = err; - } - return default_platform; - } - #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - //! \brief Wrapper for clUnloadCompiler(). - cl_int - unloadCompiler() - { - return ::clUnloadPlatformCompiler(object_); - } + //! \brief Wrapper for clUnloadCompiler(). + cl_int unloadCompiler() { return ::clUnloadPlatformCompiler(object_); } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -}; // class Platform + }; // class Platform #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - //! \brief Wrapper for clCreateSubDevices(). -inline cl_int Device::createSubDevices(const cl_device_partition_property* properties, - vector* devices) -{ - cl_uint n = 0; - cl_int err = clCreateSubDevices(object_, properties, 0, nullptr, &n); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); - } - - vector ids(n); - err = clCreateSubDevices(object_, properties, n, ids.data(), nullptr); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); - } - - // Cannot trivially assign because we need to capture intermediates - // with safe construction - if (devices) - { - devices->resize(ids.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < ids.size(); i++) - { - // We do not need to retain because this device is being created - // by the runtime - (*devices)[i] = Device(ids[i], false); - } - } - - return CL_SUCCESS; -} + //! \brief Wrapper for clCreateSubDevices(). + inline cl_int Device::createSubDevices(const cl_device_partition_property *properties, + vector *devices) { + cl_uint n = 0; + cl_int err = clCreateSubDevices(object_, properties, 0, nullptr, &n); + if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); } + + vector ids(n); + err = clCreateSubDevices(object_, properties, n, ids.data(), nullptr); + if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); } + + // Cannot trivially assign because we need to capture intermediates + // with safe construction + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + // We do not need to retain because this device is being created + // by the runtime + (*devices)[i] = Device(ids[i], false); + } + } + + return CL_SUCCESS; + } #endif // defined (CL_HPP_TARGET_OPENCL_VERSION >= 120) #if defined(cl_ext_device_fission) - //! \brief Wrapper for clCreateSubDevices(). -inline cl_int Device::createSubDevices(const cl_device_partition_property_ext* properties, - vector* devices) -{ - typedef CL_API_ENTRY cl_int(CL_API_CALL * PFN_clCreateSubDevicesEXT)( - cl_device_id /*in_device*/, - const cl_device_partition_property_ext* /* properties */, - cl_uint /*num_entries*/, cl_device_id* /*out_devices*/, - cl_uint* /*num_devices*/) CL_API_SUFFIX__VERSION_1_1; - - static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = nullptr; -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl::Device device(object_); - cl_platform_id platform = device.getInfo()(); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateSubDevicesEXT); -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSubDevicesEXT); -#endif - - cl_uint n = 0; - cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, nullptr, &n); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); - } - - vector ids(n); - err = - pfn_clCreateSubDevicesEXT(object_, properties, n, ids.data(), nullptr); - if (err != CL_SUCCESS) - { - return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); - } - // Cannot trivially assign because we need to capture intermediates - // with safe construction - if (devices) - { - devices->resize(ids.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < ids.size(); i++) - { - // We do not need to retain because this device is being created - // by the runtime - (*devices)[i] = Device(ids[i], false); - } - } - - return CL_SUCCESS; -} + //! \brief Wrapper for clCreateSubDevices(). + inline cl_int Device::createSubDevices(const cl_device_partition_property_ext *properties, + vector *devices) { + typedef CL_API_ENTRY cl_int(CL_API_CALL * PFN_clCreateSubDevicesEXT)( + cl_device_id /*in_device*/, + const cl_device_partition_property_ext * /* properties */, + cl_uint /*num_entries*/, + cl_device_id * /*out_devices*/, + cl_uint * /*num_devices*/) CL_API_SUFFIX__VERSION_1_1; + + static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = nullptr; +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl::Device device(object_); + cl_platform_id platform = device.getInfo()(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateSubDevicesEXT); +# endif +# if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSubDevicesEXT); +# endif + + cl_uint n = 0; + cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, nullptr, &n); + if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); } + + vector ids(n); + err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids.data(), nullptr); + if (err != CL_SUCCESS) { return detail::errHandler(err, __CREATE_SUB_DEVICES_ERR); } + // Cannot trivially assign because we need to capture intermediates + // with safe construction + if (devices) { + devices->resize(ids.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < ids.size(); i++) { + // We do not need to retain because this device is being created + // by the runtime + (*devices)[i] = Device(ids[i], false); + } + } + + return CL_SUCCESS; + } #endif // defined(cl_ext_device_fission) -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Platform::default_initialized_; -CL_HPP_DEFINE_STATIC_MEMBER_ Platform Platform::default_; -CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Platform::default_error_ = CL_SUCCESS; - + CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Platform::default_initialized_; + CL_HPP_DEFINE_STATIC_MEMBER_ Platform Platform::default_; + CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Platform::default_error_ = CL_SUCCESS; /** * Deprecated APIs for 1.2 */ #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -/** - * Unload the OpenCL compiler. - * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. - */ -inline CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int -UnloadCompiler() CL_API_SUFFIX__VERSION_1_1_DEPRECATED; -inline cl_int -UnloadCompiler() -{ - return ::clUnloadCompiler(); -} + /** + * Unload the OpenCL compiler. + * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead. + */ + inline CL_API_PREFIX__VERSION_1_1_DEPRECATED cl_int UnloadCompiler() + CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + inline cl_int UnloadCompiler() { return ::clUnloadCompiler(); } #endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - #if defined(cl_ext_image_requirements_info) -enum ImageRequirementsInfoExt : cl_image_requirements_info_ext -{ - RowPitchAlign = CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, - BaseAddAlign = CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT, - Size = CL_IMAGE_REQUIREMENTS_SIZE_EXT, - MaxWidth = CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT, - MaxHeight = CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, - MaxDepth = CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT, - MaxArraySize = CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, -#if defined(cl_ext_image_from_buffer) - SlicePitchAlign = CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT, -#endif -}; + enum ImageRequirementsInfoExt : cl_image_requirements_info_ext { + RowPitchAlign = CL_IMAGE_REQUIREMENTS_ROW_PITCH_ALIGNMENT_EXT, + BaseAddAlign = CL_IMAGE_REQUIREMENTS_BASE_ADDRESS_ALIGNMENT_EXT, + Size = CL_IMAGE_REQUIREMENTS_SIZE_EXT, + MaxWidth = CL_IMAGE_REQUIREMENTS_MAX_WIDTH_EXT, + MaxHeight = CL_IMAGE_REQUIREMENTS_MAX_HEIGHT_EXT, + MaxDepth = CL_IMAGE_REQUIREMENTS_MAX_DEPTH_EXT, + MaxArraySize = CL_IMAGE_REQUIREMENTS_MAX_ARRAY_SIZE_EXT, +# if defined(cl_ext_image_from_buffer) + SlicePitchAlign = CL_IMAGE_REQUIREMENTS_SLICE_PITCH_ALIGNMENT_EXT, +# endif + }; #endif // cl_ext_image_requirements_info - -/*! \brief Class interface for cl_context. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_context as the original. For details, see - * clRetainContext() and clReleaseContext(). - * - * \see cl_context - */ -class Context - : public detail::Wrapper -{ -private: - static std::once_flag default_initialized_; - static Context default_; - static cl_int default_error_; - - /*! \brief Create the default context from the default device type in the default platform. - * - * This sets @c default_ and @c default_error_. It does not throw - * @c cl::Error. - */ - static void makeDefault() { - /* Throwing an exception from a call_once invocation does not do - * what we wish, so we catch it and save the error. - */ + /*! \brief Class interface for cl_context. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_context as the original. For details, see + * clRetainContext() and clReleaseContext(). + * + * \see cl_context + */ + class Context : public detail::Wrapper { + private: + static std::once_flag default_initialized_; + static Context default_; + static cl_int default_error_; + + /*! \brief Create the default context from the default device type in the default platform. + * + * This sets @c default_ and @c default_error_. It does not throw + * @c cl::Error. + */ + static void makeDefault() { + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ #if defined(CL_HPP_ENABLE_EXCEPTIONS) - try + try #endif - { + { #if !defined(__APPLE__) && !defined(__MACOS) - const Platform &p = Platform::getDefault(); - cl_platform_id defaultPlatform = p(); - cl_context_properties properties[3] = { - CL_CONTEXT_PLATFORM, (cl_context_properties)defaultPlatform, 0 - }; -#else // #if !defined(__APPLE__) && !defined(__MACOS) - cl_context_properties *properties = nullptr; + const Platform &p = Platform::getDefault(); + cl_platform_id defaultPlatform = p(); + cl_context_properties properties[3] = { + CL_CONTEXT_PLATFORM, (cl_context_properties)defaultPlatform, 0}; +#else // #if !defined(__APPLE__) && !defined(__MACOS) + cl_context_properties *properties = nullptr; #endif // #if !defined(__APPLE__) && !defined(__MACOS) - default_ = Context( - CL_DEVICE_TYPE_DEFAULT, - properties, - nullptr, - nullptr, - &default_error_); - } + default_ = + Context(CL_DEVICE_TYPE_DEFAULT, properties, nullptr, nullptr, &default_error_); + } #if defined(CL_HPP_ENABLE_EXCEPTIONS) - catch (cl::Error &e) { - default_error_ = e.err(); - } + catch (cl::Error &e) { + default_error_ = e.err(); + } #endif - } + } - - /*! \brief Create the default context from a provided Context. - * - * This sets @c default_. It does not throw - * @c cl::Error. - */ - static void makeDefaultProvided(const Context &c) { - default_ = c; - } + /*! \brief Create the default context from a provided Context. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const Context &c) { default_ = c; } #if defined(cl_ext_image_requirements_info) - struct ImageRequirementsInfo { - - ImageRequirementsInfo(cl_mem_flags f, const cl_mem_properties* properties, const ImageFormat* format, const cl_image_desc* desc) - { - flags = f; - properties = properties; - image_format = format; - image_desc = desc; - } - - cl_mem_flags flags = 0; - const cl_mem_properties* properties; - const ImageFormat* image_format; - const cl_image_desc* image_desc; - }; - - static cl_int getImageRequirementsInfoExtHelper(const Context &context, - const ImageRequirementsInfo &info, - cl_image_requirements_info_ext param_name, - size_type param_value_size, - void* param_value, - size_type* param_value_size_ret) - { - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - Device device = context.getInfo().at(0); - cl_platform_id platform = device.getInfo()(); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetImageRequirementsInfoEXT); -#else - CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetImageRequirementsInfoEXT); -#endif - - if (pfn_clGetImageRequirementsInfoEXT == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR); - } - - return detail::errHandler( - pfn_clGetImageRequirementsInfoEXT(context(), info.properties, - info.flags, info.image_format, info.image_desc, param_name, - param_value_size, param_value, param_value_size_ret), - __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR); - } + struct ImageRequirementsInfo { + ImageRequirementsInfo(cl_mem_flags f, const cl_mem_properties *properties, + const ImageFormat *format, const cl_image_desc *desc) { + flags = f; + properties = properties; + image_format = format; + image_desc = desc; + } + + cl_mem_flags flags = 0; + const cl_mem_properties *properties; + const ImageFormat *image_format; + const cl_image_desc *image_desc; + }; + + static cl_int getImageRequirementsInfoExtHelper(const Context &context, + const ImageRequirementsInfo &info, + cl_image_requirements_info_ext param_name, + size_type param_value_size, + void *param_value, + size_type *param_value_size_ret) { +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 + Device device = context.getInfo().at(0); + cl_platform_id platform = device.getInfo()(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetImageRequirementsInfoEXT); +# else + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetImageRequirementsInfoEXT); +# endif + + if (pfn_clGetImageRequirementsInfoEXT == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, + __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR); + } + + return detail::errHandler(pfn_clGetImageRequirementsInfoEXT(context(), + info.properties, + info.flags, + info.image_format, + info.image_desc, + param_name, + param_value_size, + param_value, + param_value_size_ret), + __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR); + } #endif // cl_ext_image_requirements_info - -public: + + public: #ifdef CL_HPP_UNIT_TEST_ENABLE - /*! \brief Reset the default. - * - * This sets @c default_ to an empty value to support cleanup in - * the unit test framework. - * This function is not thread safe. - */ - static void unitTestClearDefault() { - default_ = Context(); - } + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { default_ = Context(); } #endif // #ifdef CL_HPP_UNIT_TEST_ENABLE - /*! \brief Constructs a context including a list of specified devices. - * - * Wraps clCreateContext(). - */ - Context( - const vector& devices, - const cl_context_properties* properties = nullptr, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - size_type, - void *) = nullptr, - void* data = nullptr, - cl_int* err = nullptr) - { - cl_int error; - - size_type numDevices = devices.size(); - vector deviceIDs(numDevices); - - for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateContext( - properties, (cl_uint) numDevices, - deviceIDs.data(), - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != nullptr) { - *err = error; - } - } - - /*! \brief Constructs a context including a specific device. - * - * Wraps clCreateContext(). - */ - Context( - const Device& device, - const cl_context_properties* properties = nullptr, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - size_type, - void *) = nullptr, - void* data = nullptr, - cl_int* err = nullptr) - { - cl_int error; - - cl_device_id deviceID = device(); - - object_ = ::clCreateContext( - properties, 1, - &deviceID, - notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - if (err != nullptr) { - *err = error; - } - } - - /*! \brief Constructs a context including all or a subset of devices of a specified type. - * - * Wraps clCreateContextFromType(). - */ - Context( - cl_device_type type, - const cl_context_properties* properties = nullptr, - void (CL_CALLBACK * notifyFptr)( - const char *, - const void *, - size_type, - void *) = nullptr, - void* data = nullptr, - cl_int* err = nullptr) - { - cl_int error; + /*! \brief Constructs a context including a list of specified devices. + * + * Wraps clCreateContext(). + */ + Context(const vector &devices, const cl_context_properties *properties = nullptr, + void(CL_CALLBACK *notifyFptr)(const char *, const void *, size_type, + void *) = nullptr, + void *data = nullptr, cl_int *err = nullptr) { + cl_int error; + + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + + for (size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateContext( + properties, (cl_uint)numDevices, deviceIDs.data(), notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != nullptr) { *err = error; } + } + + /*! \brief Constructs a context including a specific device. + * + * Wraps clCreateContext(). + */ + Context(const Device &device, const cl_context_properties *properties = nullptr, + void(CL_CALLBACK *notifyFptr)(const char *, const void *, size_type, + void *) = nullptr, + void *data = nullptr, cl_int *err = nullptr) { + cl_int error; + + cl_device_id deviceID = device(); + + object_ = ::clCreateContext(properties, 1, &deviceID, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + if (err != nullptr) { *err = error; } + } + + /*! \brief Constructs a context including all or a subset of devices of a specified type. + * + * Wraps clCreateContextFromType(). + */ + Context(cl_device_type type, const cl_context_properties *properties = nullptr, + void(CL_CALLBACK *notifyFptr)(const char *, const void *, size_type, + void *) = nullptr, + void *data = nullptr, cl_int *err = nullptr) { + cl_int error; #if !defined(__APPLE__) && !defined(__MACOS) - cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 }; - - if (properties == nullptr) { - // Get a valid platform ID as we cannot send in a blank one - vector platforms; - error = Platform::get(&platforms); - if (error != CL_SUCCESS) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != nullptr) { - *err = error; - } - return; - } - - // Check the platforms we found for a device of our specified type - cl_context_properties platform_id = 0; - for (unsigned int i = 0; i < platforms.size(); i++) { - - vector devices; - -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - try { -#endif - - error = platforms[i].getDevices(type, &devices); - -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - } catch (cl::Error& e) { - error = e.err(); - } - // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type - // We do error checking next anyway, and can throw there if needed -#endif - - // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND - if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != nullptr) { - *err = error; - } - } - - if (devices.size() > 0) { - platform_id = (cl_context_properties)platforms[i](); - break; - } - } - - if (platform_id == 0) { - detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != nullptr) { - *err = CL_DEVICE_NOT_FOUND; - } - return; - } - - prop[1] = platform_id; - properties = &prop[0]; - } + cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0}; + + if (properties == nullptr) { + // Get a valid platform ID as we cannot send in a blank one + vector platforms; + error = Platform::get(&platforms); + if (error != CL_SUCCESS) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != nullptr) { *err = error; } + return; + } + + // Check the platforms we found for a device of our specified type + cl_context_properties platform_id = 0; + for (unsigned int i = 0; i < platforms.size(); i++) { + vector devices; + +# if defined(CL_HPP_ENABLE_EXCEPTIONS) + try { +# endif + + error = platforms[i].getDevices(type, &devices); + +# if defined(CL_HPP_ENABLE_EXCEPTIONS) + } catch (cl::Error &e) { error = e.err(); } + // Catch if exceptions are enabled as we don't want to exit if first platform + // has no devices of type We do error checking next anyway, and can throw there + // if needed +# endif + + // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND + if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) { + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != nullptr) { *err = error; } + } + + if (devices.size() > 0) { + platform_id = (cl_context_properties)platforms[i](); + break; + } + } + + if (platform_id == 0) { + detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != nullptr) { *err = CL_DEVICE_NOT_FOUND; } + return; + } + + prop[1] = platform_id; + properties = &prop[0]; + } #endif - object_ = ::clCreateContextFromType( - properties, type, notifyFptr, data, &error); - - detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); - if (err != nullptr) { - *err = error; - } - } - - - /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. - * - * \note All calls to this function return the same cl_context as the first. - */ - static Context getDefault(cl_int * err = nullptr) - { - std::call_once(default_initialized_, makeDefault); - detail::errHandler(default_error_); - if (err != nullptr) { - *err = default_error_; - } - return default_; - } - - /** - * Modify the default context to be used by - * subsequent operations. - * Will only set the default if no default was previously created. - * @return updated default context. - * Should be compared to the passed value to ensure that it was updated. - */ - static Context setDefault(const Context &default_context) - { - std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_context)); - detail::errHandler(default_error_); - return default_; - } - - //! \brief Default constructor - initializes to nullptr. - Context() : detail::Wrapper() { } - - /*! \brief Constructor from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the cl_context - * into the new Context object. - */ - explicit Context(const cl_context& context, bool retainObject = false) : - detail::Wrapper(context, retainObject) { } - - /*! \brief Assignment operator from cl_context - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseContext() on the value previously held by this instance. - */ - Context& operator = (const cl_context& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetContextInfo(). - template - cl_int getInfo(cl_context_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetContextInfo, object_, name, param), - __GET_CONTEXT_INFO_ERR); - } - - //! \brief Wrapper for clGetContextInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_context_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - - /*! \brief Gets a list of supported image formats. - * - * Wraps clGetSupportedImageFormats(). - */ - cl_int getSupportedImageFormats( - cl_mem_flags flags, - cl_mem_object_type type, - vector* formats) const - { - cl_uint numEntries; - - if (!formats) { - return CL_SUCCESS; - } - - cl_int err = ::clGetSupportedImageFormats( - object_, - flags, - type, - 0, - nullptr, - &numEntries); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - if (numEntries > 0) { - vector value(numEntries); - err = ::clGetSupportedImageFormats( - object_, - flags, - type, - numEntries, - (cl_image_format*)value.data(), - nullptr); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); - } - - formats->assign(value.begin(), value.end()); - } - else { - // If no values are being returned, ensure an empty vector comes back - formats->clear(); - } - - return CL_SUCCESS; - } + object_ = ::clCreateContextFromType(properties, type, notifyFptr, data, &error); + + detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR); + if (err != nullptr) { *err = error; } + } + + /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT. + * + * \note All calls to this function return the same cl_context as the first. + */ + static Context getDefault(cl_int *err = nullptr) { + std::call_once(default_initialized_, makeDefault); + detail::errHandler(default_error_); + if (err != nullptr) { *err = default_error_; } + return default_; + } + + /** + * Modify the default context to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default context. + * Should be compared to the passed value to ensure that it was updated. + */ + static Context setDefault(const Context &default_context) { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_context)); + detail::errHandler(default_error_); + return default_; + } + + //! \brief Default constructor - initializes to nullptr. + Context() : detail::Wrapper() {} + + /*! \brief Constructor from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the cl_context + * into the new Context object. + */ + explicit Context(const cl_context &context, bool retainObject = false) : + detail::Wrapper(context, retainObject) {} + + /*! \brief Assignment operator from cl_context - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseContext() on the value previously held by this instance. + */ + Context &operator=(const cl_context &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetContextInfo(). + template + cl_int getInfo(cl_context_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetContextInfo, object_, name, param), + __GET_CONTEXT_INFO_ERR); + } + + //! \brief Wrapper for clGetContextInfo() that returns by value. + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + + /*! \brief Gets a list of supported image formats. + * + * Wraps clGetSupportedImageFormats(). + */ + cl_int getSupportedImageFormats(cl_mem_flags flags, cl_mem_object_type type, + vector *formats) const { + cl_uint numEntries; + + if (!formats) { return CL_SUCCESS; } + + cl_int err = + ::clGetSupportedImageFormats(object_, flags, type, 0, nullptr, &numEntries); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + if (numEntries > 0) { + vector value(numEntries); + err = ::clGetSupportedImageFormats( + object_, flags, type, numEntries, (cl_image_format *)value.data(), nullptr); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR); + } + + formats->assign(value.begin(), value.end()); + } else { + // If no values are being returned, ensure an empty vector comes back + formats->clear(); + } + + return CL_SUCCESS; + } #if defined(cl_ext_image_requirements_info) - template - cl_int getImageRequirementsInfoExt(cl_image_requirements_info_ext name, - T* param, - cl_mem_flags flags = 0, - const cl_mem_properties* properties = nullptr, - const ImageFormat* image_format = nullptr, - const cl_image_desc* image_desc = nullptr) const - { - ImageRequirementsInfo imageInfo = {flags, properties, image_format, image_desc}; - - return detail::errHandler( - detail::getInfo( - Context::getImageRequirementsInfoExtHelper, *this, imageInfo, name, param), - __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR); - } - - template typename - detail::param_traits::param_type - getImageRequirementsInfoExt(cl_mem_flags flags = 0, - const cl_mem_properties* properties = nullptr, - const ImageFormat* image_format = nullptr, - const cl_image_desc* image_desc = nullptr, - cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_image_requirements_info_ext, type>::param_type param; - cl_int result = getImageRequirementsInfoExt(type, ¶m, flags, properties, image_format, image_desc); - if (err != nullptr) { - *err = result; - } - return param; - } + template + cl_int getImageRequirementsInfoExt(cl_image_requirements_info_ext name, T *param, + cl_mem_flags flags = 0, + const cl_mem_properties *properties = nullptr, + const ImageFormat *image_format = nullptr, + const cl_image_desc *image_desc = nullptr) const { + ImageRequirementsInfo imageInfo = {flags, properties, image_format, image_desc}; + + return detail::errHandler( + detail::getInfo( + Context::getImageRequirementsInfoExtHelper, *this, imageInfo, name, param), + __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR); + } + + template + typename detail::param_traits::param_type + getImageRequirementsInfoExt(cl_mem_flags flags = 0, + const cl_mem_properties *properties = nullptr, + const ImageFormat *image_format = nullptr, + const cl_image_desc *image_desc = nullptr, + cl_int *err = nullptr) const { + typename detail::param_traits::param_type + param; + cl_int result = getImageRequirementsInfoExt( + type, ¶m, flags, properties, image_format, image_desc); + if (err != nullptr) { *err = result; } + return param; + } #endif // cl_ext_image_requirements_info #if CL_HPP_TARGET_OPENCL_VERSION >= 300 - /*! \brief Registers a destructor callback function with a context. - * - * Wraps clSetContextDestructorCallback(). - * - * Each call to this function registers the specified callback function on - * a destructor callback stack associated with context. The registered - * callback functions are called in the reverse order in which they were registered. - * If a context callback function was specified when context was created, - * it will not be called after any context destructor callback is called. - */ - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_context, void *), - void * user_data = nullptr) - { - return detail::errHandler( - ::clSetContextDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_CONTEXT_DESCTRUCTOR_CALLBACK_ERR); - } + /*! \brief Registers a destructor callback function with a context. + * + * Wraps clSetContextDestructorCallback(). + * + * Each call to this function registers the specified callback function on + * a destructor callback stack associated with context. The registered + * callback functions are called in the reverse order in which they were registered. + * If a context callback function was specified when context was created, + * it will not be called after any context destructor callback is called. + */ + cl_int setDestructorCallback(void(CL_CALLBACK *pfn_notify)(cl_context, void *), + void *user_data = nullptr) { + return detail::errHandler( + ::clSetContextDestructorCallback(object_, pfn_notify, user_data), + __SET_CONTEXT_DESCTRUCTOR_CALLBACK_ERR); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 300 -}; + }; -inline void Device::makeDefault() -{ - /* Throwing an exception from a call_once invocation does not do - * what we wish, so we catch it and save the error. - */ + inline void Device::makeDefault() { + /* Throwing an exception from a call_once invocation does not do + * what we wish, so we catch it and save the error. + */ #if defined(CL_HPP_ENABLE_EXCEPTIONS) - try + try #endif - { - cl_int error = 0; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - default_error_ = error; - } - else { - default_ = context.getInfo()[0]; - default_error_ = CL_SUCCESS; - } - } + { + cl_int error = 0; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + default_error_ = error; + } else { + default_ = context.getInfo()[0]; + default_error_ = CL_SUCCESS; + } + } #if defined(CL_HPP_ENABLE_EXCEPTIONS) - catch (cl::Error &e) { - default_error_ = e.err(); - } + catch (cl::Error &e) { + default_error_ = e.err(); + } #endif -} - -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Context::default_initialized_; -CL_HPP_DEFINE_STATIC_MEMBER_ Context Context::default_; -CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Context::default_error_ = CL_SUCCESS; - -/*! \brief Class interface for cl_event. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_event as the original. For details, see - * clRetainEvent() and clReleaseEvent(). - * - * \see cl_event - */ -class Event : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to nullptr. - Event() : detail::Wrapper() { } - - /*! \brief Constructor from cl_event - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * This effectively transfers ownership of a refcount on the cl_event - * into the new Event object. - */ - explicit Event(const cl_event& event, bool retainObject = false) : - detail::Wrapper(event, retainObject) { } - - /*! \brief Assignment operator from cl_event - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseEvent() on the value previously held by this instance. - */ - Event& operator = (const cl_event& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetEventInfo(). - template - cl_int getInfo(cl_event_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetEventInfo, object_, name, param), - __GET_EVENT_INFO_ERR); - } - - //! \brief Wrapper for clGetEventInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_event_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - - //! \brief Wrapper for clGetEventProfilingInfo(). - template - cl_int getProfilingInfo(cl_profiling_info name, T* param) const - { - return detail::errHandler(detail::getInfo( - &::clGetEventProfilingInfo, object_, name, param), - __GET_EVENT_PROFILE_INFO_ERR); - } - - //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. - template typename - detail::param_traits::param_type - getProfilingInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_profiling_info, name>::param_type param; - cl_int result = getProfilingInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - - /*! \brief Blocks the calling thread until this event completes. - * - * Wraps clWaitForEvents(). - */ - cl_int wait() const - { - return detail::errHandler( - ::clWaitForEvents(1, &object_), - __WAIT_FOR_EVENTS_ERR); - } + } + + CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Context::default_initialized_; + CL_HPP_DEFINE_STATIC_MEMBER_ Context Context::default_; + CL_HPP_DEFINE_STATIC_MEMBER_ cl_int Context::default_error_ = CL_SUCCESS; + + /*! \brief Class interface for cl_event. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_event as the original. For details, see + * clRetainEvent() and clReleaseEvent(). + * + * \see cl_event + */ + class Event : public detail::Wrapper { + public: + //! \brief Default constructor - initializes to nullptr. + Event() : detail::Wrapper() {} + + /*! \brief Constructor from cl_event - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This effectively transfers ownership of a refcount on the cl_event + * into the new Event object. + */ + explicit Event(const cl_event &event, bool retainObject = false) : + detail::Wrapper(event, retainObject) {} + + /*! \brief Assignment operator from cl_event - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseEvent() on the value previously held by this instance. + */ + Event &operator=(const cl_event &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetEventInfo(). + template + cl_int getInfo(cl_event_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetEventInfo, object_, name, param), + __GET_EVENT_INFO_ERR); + } + + //! \brief Wrapper for clGetEventInfo() that returns by value. + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + + //! \brief Wrapper for clGetEventProfilingInfo(). + template + cl_int getProfilingInfo(cl_profiling_info name, T *param) const { + return detail::errHandler( + detail::getInfo(&::clGetEventProfilingInfo, object_, name, param), + __GET_EVENT_PROFILE_INFO_ERR); + } + + //! \brief Wrapper for clGetEventProfilingInfo() that returns by value. + template + typename detail::param_traits::param_type + getProfilingInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getProfilingInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + + /*! \brief Blocks the calling thread until this event completes. + * + * Wraps clWaitForEvents(). + */ + cl_int wait() const { + return detail::errHandler(::clWaitForEvents(1, &object_), __WAIT_FOR_EVENTS_ERR); + } #if CL_HPP_TARGET_OPENCL_VERSION >= 110 - /*! \brief Registers a user callback function for a specific command execution status. - * - * Wraps clSetEventCallback(). - */ - cl_int setCallback( - cl_int type, - void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *), - void * user_data = nullptr) - { - return detail::errHandler( - ::clSetEventCallback( - object_, - type, - pfn_notify, - user_data), - __SET_EVENT_CALLBACK_ERR); - } + /*! \brief Registers a user callback function for a specific command execution status. + * + * Wraps clSetEventCallback(). + */ + cl_int setCallback(cl_int type, void(CL_CALLBACK *pfn_notify)(cl_event, cl_int, void *), + void *user_data = nullptr) { + return detail::errHandler(::clSetEventCallback(object_, type, pfn_notify, user_data), + __SET_EVENT_CALLBACK_ERR); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 - /*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ - static cl_int - waitForEvents(const vector& events) - { - static_assert(sizeof(cl::Event) == sizeof(cl_event), - "Size of cl::Event must be equal to size of cl_event"); - - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : nullptr), - __WAIT_FOR_EVENTS_ERR); - } -}; + /*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ + static cl_int waitForEvents(const vector &events) { + static_assert(sizeof(cl::Event) == sizeof(cl_event), + "Size of cl::Event must be equal to size of cl_event"); + + return detail::errHandler( + ::clWaitForEvents((cl_uint)events.size(), + (events.size() > 0) ? (cl_event *)&events.front() : nullptr), + __WAIT_FOR_EVENTS_ERR); + } + }; #if CL_HPP_TARGET_OPENCL_VERSION >= 110 -/*! \brief Class interface for user events (a subset of cl_event's). - * - * See Event for details about copy semantics, etc. - */ -class UserEvent : public Event -{ -public: - /*! \brief Constructs a user event on a given context. - * - * Wraps clCreateUserEvent(). - */ - UserEvent( - const Context& context, - cl_int * err = nullptr) - { - cl_int error; - object_ = ::clCreateUserEvent( - context(), - &error); - - detail::errHandler(error, __CREATE_USER_EVENT_ERR); - if (err != nullptr) { - *err = error; - } - } - - //! \brief Default constructor - initializes to nullptr. - UserEvent() : Event() { } - - /*! \brief Sets the execution status of a user event object. - * - * Wraps clSetUserEventStatus(). - */ - cl_int setStatus(cl_int status) - { - return detail::errHandler( - ::clSetUserEventStatus(object_,status), - __SET_USER_EVENT_STATUS_ERR); - } -}; + /*! \brief Class interface for user events (a subset of cl_event's). + * + * See Event for details about copy semantics, etc. + */ + class UserEvent : public Event { + public: + /*! \brief Constructs a user event on a given context. + * + * Wraps clCreateUserEvent(). + */ + UserEvent(const Context &context, cl_int *err = nullptr) { + cl_int error; + object_ = ::clCreateUserEvent(context(), &error); + + detail::errHandler(error, __CREATE_USER_EVENT_ERR); + if (err != nullptr) { *err = error; } + } + + //! \brief Default constructor - initializes to nullptr. + UserEvent() : Event() {} + + /*! \brief Sets the execution status of a user event object. + * + * Wraps clSetUserEventStatus(). + */ + cl_int setStatus(cl_int status) { + return detail::errHandler(::clSetUserEventStatus(object_, status), + __SET_USER_EVENT_STATUS_ERR); + } + }; #endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 -/*! \brief Blocks the calling thread until every event specified is complete. - * - * Wraps clWaitForEvents(). - */ -inline static cl_int -WaitForEvents(const vector& events) -{ - return detail::errHandler( - ::clWaitForEvents( - (cl_uint) events.size(), (events.size() > 0) ? (cl_event*)&events.front() : nullptr), - __WAIT_FOR_EVENTS_ERR); -} - -/*! \brief Class interface for cl_mem. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_mem as the original. For details, see - * clRetainMemObject() and clReleaseMemObject(). - * - * \see cl_mem - */ -class Memory : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to nullptr. - Memory() : detail::Wrapper() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * Optionally transfer ownership of a refcount on the cl_mem - * into the new Memory object. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * - * See Memory for further details. - */ - explicit Memory(const cl_mem& memory, bool retainObject) : - detail::Wrapper(memory, retainObject) { } - - /*! \brief Assignment operator from cl_mem - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseMemObject() on the value previously held by this instance. - */ - Memory& operator = (const cl_mem& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - //! \brief Wrapper for clGetMemObjectInfo(). - template - cl_int getInfo(cl_mem_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetMemObjectInfo, object_, name, param), - __GET_MEM_OBJECT_INFO_ERR); - } - - //! \brief Wrapper for clGetMemObjectInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_mem_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } + /*! \brief Blocks the calling thread until every event specified is complete. + * + * Wraps clWaitForEvents(). + */ + inline static cl_int WaitForEvents(const vector &events) { + return detail::errHandler( + ::clWaitForEvents((cl_uint)events.size(), + (events.size() > 0) ? (cl_event *)&events.front() : nullptr), + __WAIT_FOR_EVENTS_ERR); + } + + /*! \brief Class interface for cl_mem. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_mem as the original. For details, see + * clRetainMemObject() and clReleaseMemObject(). + * + * \see cl_mem + */ + class Memory : public detail::Wrapper { + public: + //! \brief Default constructor - initializes to nullptr. + Memory() : detail::Wrapper() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * Optionally transfer ownership of a refcount on the cl_mem + * into the new Memory object. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * + * See Memory for further details. + */ + explicit Memory(const cl_mem &memory, bool retainObject) : + detail::Wrapper(memory, retainObject) {} + + /*! \brief Assignment operator from cl_mem - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseMemObject() on the value previously held by this instance. + */ + Memory &operator=(const cl_mem &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_mem_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetMemObjectInfo, object_, name, param), + __GET_MEM_OBJECT_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 110 - /*! \brief Registers a callback function to be called when the memory object - * is no longer needed. - * - * Wraps clSetMemObjectDestructorCallback(). - * - * Repeated calls to this function, for a given cl_mem value, will append - * to the list of functions called (in reverse order) when memory object's - * resources are freed and the memory object is deleted. - * - * \note - * The registered callbacks are associated with the underlying cl_mem - * value - not the Memory class instance. - */ - cl_int setDestructorCallback( - void (CL_CALLBACK * pfn_notify)(cl_mem, void *), - void * user_data = nullptr) - { - return detail::errHandler( - ::clSetMemObjectDestructorCallback( - object_, - pfn_notify, - user_data), - __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); - } + /*! \brief Registers a callback function to be called when the memory object + * is no longer needed. + * + * Wraps clSetMemObjectDestructorCallback(). + * + * Repeated calls to this function, for a given cl_mem value, will append + * to the list of functions called (in reverse order) when memory object's + * resources are freed and the memory object is deleted. + * + * \note + * The registered callbacks are associated with the underlying cl_mem + * value - not the Memory class instance. + */ + cl_int setDestructorCallback(void(CL_CALLBACK *pfn_notify)(cl_mem, void *), + void *user_data = nullptr) { + return detail::errHandler( + ::clSetMemObjectDestructorCallback(object_, pfn_notify, user_data), + __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 - -}; - -// Pre-declare copy functions -class Buffer; -template< typename IteratorType > -cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ); -template< typename IteratorType > -cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ); - + }; + + // Pre-declare copy functions + class Buffer; + template + cl_int copy(IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer); + template + cl_int copy(const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator); + template + cl_int copy(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, + cl::Buffer &buffer); + template + cl_int copy(const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, + IteratorType endIterator); #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -namespace detail -{ - class SVMTraitNull - { - public: - static cl_svm_mem_flags getSVMMemFlags() - { - return 0; - } - }; -} // namespace detail - -template -class SVMTraitReadWrite -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return CL_MEM_READ_WRITE | - Trait::getSVMMemFlags(); - } -}; - -template -class SVMTraitReadOnly -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return CL_MEM_READ_ONLY | - Trait::getSVMMemFlags(); - } -}; - -template -class SVMTraitWriteOnly -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return CL_MEM_WRITE_ONLY | - Trait::getSVMMemFlags(); - } -}; - -template> -class SVMTraitCoarse -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return Trait::getSVMMemFlags(); - } -}; - -template> -class SVMTraitFine -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return CL_MEM_SVM_FINE_GRAIN_BUFFER | - Trait::getSVMMemFlags(); - } -}; - -template> -class SVMTraitAtomic -{ -public: - static cl_svm_mem_flags getSVMMemFlags() - { - return - CL_MEM_SVM_FINE_GRAIN_BUFFER | - CL_MEM_SVM_ATOMICS | - Trait::getSVMMemFlags(); - } -}; - -// Pre-declare SVM map function -template -inline cl_int enqueueMapSVM( - T* ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events = nullptr, - Event* event = nullptr); - -/** - * STL-like allocator class for managing SVM objects provided for convenience. - * - * Note that while this behaves like an allocator for the purposes of constructing vectors and similar objects, - * care must be taken when using with smart pointers. - * The allocator should not be used to construct a unique_ptr if we are using coarse-grained SVM mode because - * the coarse-grained management behaviour would behave incorrectly with respect to reference counting. - * - * Instead the allocator embeds a Deleter which may be used with unique_ptr and is used - * with the allocate_shared and allocate_ptr supplied operations. - */ -template -class SVMAllocator { -private: - Context context_; - -public: - typedef T value_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - typedef value_type& reference; - typedef const value_type& const_reference; - typedef std::size_t size_type; - typedef std::ptrdiff_t difference_type; - - template - struct rebind - { - typedef SVMAllocator other; - }; - - template - friend class SVMAllocator; - - SVMAllocator() : - context_(Context::getDefault()) - { - } - - explicit SVMAllocator(cl::Context context) : - context_(context) - { - } - - - SVMAllocator(const SVMAllocator &other) : - context_(other.context_) - { - } - - template - SVMAllocator(const SVMAllocator &other) : - context_(other.context_) - { - } - - ~SVMAllocator() - { - } - - pointer address(reference r) noexcept - { - return std::addressof(r); - } - - const_pointer address(const_reference r) noexcept - { - return std::addressof(r); - } - - /** - * Allocate an SVM pointer. - * - * If the allocator is coarse-grained, this will take ownership to allow - * containers to correctly construct data in place. - */ - pointer allocate( - size_type size, - typename cl::SVMAllocator::const_pointer = 0, - bool map = true) - { - // Allocate memory with default alignment matching the size of the type - void* voidPointer = - clSVMAlloc( - context_(), - SVMTrait::getSVMMemFlags(), - size*sizeof(T), - 0); - pointer retValue = reinterpret_cast( - voidPointer); -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - if (!retValue) { - std::bad_alloc excep; - throw excep; - } -#endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) - - // If allocation was coarse-grained then map it - if (map && !(SVMTrait::getSVMMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { - cl_int err = enqueueMapSVM(retValue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, size*sizeof(T)); - if (err != CL_SUCCESS) { - clSVMFree(context_(), retValue); - retValue = nullptr; -#if defined(CL_HPP_ENABLE_EXCEPTIONS) - std::bad_alloc excep; - throw excep; -#endif - } - } - - // If exceptions disabled, return null pointer from allocator - return retValue; - } - - void deallocate(pointer p, size_type) - { - clSVMFree(context_(), p); - } - - /** - * Return the maximum possible allocation size. - * This is the minimum of the maximum sizes of all devices in the context. - */ - size_type max_size() const noexcept - { - size_type maxSize = std::numeric_limits::max() / sizeof(T); - - for (const Device &d : context_.getInfo()) { - maxSize = std::min( - maxSize, - static_cast(d.getInfo())); - } - - return maxSize; - } - - template< class U, class... Args > - void construct(U* p, Args&&... args) - { - new(p)T(args...); - } - - template< class U > - void destroy(U* p) - { - p->~U(); - } - - /** - * Returns true if the contexts match. - */ - inline bool operator==(SVMAllocator const& rhs) - { - return (context_==rhs.context_); - } - - inline bool operator!=(SVMAllocator const& a) - { - return !operator==(a); - } -}; // class SVMAllocator return cl::pointer(tmp, detail::Deleter{alloc, copies}); - - -template -class SVMAllocator { -public: - typedef void value_type; - typedef value_type* pointer; - typedef const value_type* const_pointer; - - template - struct rebind - { - typedef SVMAllocator other; - }; - - template - friend class SVMAllocator; -}; - -#if !defined(CL_HPP_NO_STD_UNIQUE_PTR) -namespace detail -{ - template - class Deleter { - private: - Alloc alloc_; - size_type copies_; - - public: - typedef typename std::allocator_traits::pointer pointer; - - Deleter(const Alloc &alloc, size_type copies) : alloc_{ alloc }, copies_{ copies } - { - } - - void operator()(pointer ptr) const { - Alloc tmpAlloc{ alloc_ }; - std::allocator_traits::destroy(tmpAlloc, std::addressof(*ptr)); - std::allocator_traits::deallocate(tmpAlloc, ptr, copies_); - } - }; -} // namespace detail - -/** - * Allocation operation compatible with std::allocate_ptr. - * Creates a unique_ptr by default. - * This requirement is to ensure that the control block is not - * allocated in memory inaccessible to the host. - */ -template -cl::pointer> allocate_pointer(const Alloc &alloc_, Args&&... args) -{ - Alloc alloc(alloc_); - static const size_type copies = 1; - - // Ensure that creation of the management block and the - // object are dealt with separately such that we only provide a deleter - - T* tmp = std::allocator_traits::allocate(alloc, copies); - if (!tmp) { - std::bad_alloc excep; - throw excep; - } - try { - std::allocator_traits::construct( - alloc, - std::addressof(*tmp), - std::forward(args)...); - - return cl::pointer>(tmp, detail::Deleter{alloc, copies}); - } - catch (std::bad_alloc&) - { - std::allocator_traits::deallocate(alloc, tmp, copies); - throw; - } -} - -template< class T, class SVMTrait, class... Args > -cl::pointer>> allocate_svm(Args... args) -{ - SVMAllocator alloc; - return cl::allocate_pointer(alloc, args...); -} - -template< class T, class SVMTrait, class... Args > -cl::pointer>> allocate_svm(const cl::Context &c, Args... args) -{ - SVMAllocator alloc(c); - return cl::allocate_pointer(alloc, args...); -} -#endif // #if !defined(CL_HPP_NO_STD_UNIQUE_PTR) - -/*! \brief Vector alias to simplify contruction of coarse-grained SVM containers. - * - */ -template < class T > -using coarse_svm_vector = vector>>; - -/*! \brief Vector alias to simplify contruction of fine-grained SVM containers. -* -*/ -template < class T > -using fine_svm_vector = vector>>; - -/*! \brief Vector alias to simplify contruction of fine-grained SVM containers that support platform atomics. -* -*/ -template < class T > -using atomic_svm_vector = vector>>; + namespace detail { + class SVMTraitNull { + public: + static cl_svm_mem_flags getSVMMemFlags() { return 0; } + }; + } // namespace detail + + template + class SVMTraitReadWrite { + public: + static cl_svm_mem_flags getSVMMemFlags() { + return CL_MEM_READ_WRITE | Trait::getSVMMemFlags(); + } + }; + + template + class SVMTraitReadOnly { + public: + static cl_svm_mem_flags getSVMMemFlags() { + return CL_MEM_READ_ONLY | Trait::getSVMMemFlags(); + } + }; + + template + class SVMTraitWriteOnly { + public: + static cl_svm_mem_flags getSVMMemFlags() { + return CL_MEM_WRITE_ONLY | Trait::getSVMMemFlags(); + } + }; + + template> + class SVMTraitCoarse { + public: + static cl_svm_mem_flags getSVMMemFlags() { return Trait::getSVMMemFlags(); } + }; + + template> + class SVMTraitFine { + public: + static cl_svm_mem_flags getSVMMemFlags() { + return CL_MEM_SVM_FINE_GRAIN_BUFFER | Trait::getSVMMemFlags(); + } + }; + + template> + class SVMTraitAtomic { + public: + static cl_svm_mem_flags getSVMMemFlags() { + return CL_MEM_SVM_FINE_GRAIN_BUFFER | CL_MEM_SVM_ATOMICS | Trait::getSVMMemFlags(); + } + }; + + // Pre-declare SVM map function + template + inline cl_int enqueueMapSVM(T *ptr, cl_bool blocking, cl_map_flags flags, size_type size, + const vector *events = nullptr, Event *event = nullptr); + + /** + * STL-like allocator class for managing SVM objects provided for convenience. + * + * Note that while this behaves like an allocator for the purposes of constructing vectors and + * similar objects, care must be taken when using with smart pointers. The allocator should not + * be used to construct a unique_ptr if we are using coarse-grained SVM mode because the + * coarse-grained management behaviour would behave incorrectly with respect to reference + * counting. + * + * Instead the allocator embeds a Deleter which may be used with unique_ptr and is used + * with the allocate_shared and allocate_ptr supplied operations. + */ + template + class SVMAllocator { + private: + Context context_; + + public: + typedef T value_type; + typedef value_type *pointer; + typedef const value_type *const_pointer; + typedef value_type &reference; + typedef const value_type &const_reference; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + + template + struct rebind { + typedef SVMAllocator other; + }; + + template + friend class SVMAllocator; + + SVMAllocator() : context_(Context::getDefault()) {} + + explicit SVMAllocator(cl::Context context) : context_(context) {} + + SVMAllocator(const SVMAllocator &other) : context_(other.context_) {} + + template + SVMAllocator(const SVMAllocator &other) : context_(other.context_) {} + + ~SVMAllocator() {} + + pointer address(reference r) noexcept { return std::addressof(r); } + + const_pointer address(const_reference r) noexcept { return std::addressof(r); } + + /** + * Allocate an SVM pointer. + * + * If the allocator is coarse-grained, this will take ownership to allow + * containers to correctly construct data in place. + */ + pointer allocate(size_type size, + typename cl::SVMAllocator::const_pointer = 0, + bool map = true) { + // Allocate memory with default alignment matching the size of the type + void *voidPointer = + clSVMAlloc(context_(), SVMTrait::getSVMMemFlags(), size * sizeof(T), 0); + pointer retValue = reinterpret_cast(voidPointer); +# if defined(CL_HPP_ENABLE_EXCEPTIONS) + if (!retValue) { + std::bad_alloc excep; + throw excep; + } +# endif // #if defined(CL_HPP_ENABLE_EXCEPTIONS) + + // If allocation was coarse-grained then map it + if (map && !(SVMTrait::getSVMMemFlags() & CL_MEM_SVM_FINE_GRAIN_BUFFER)) { + cl_int err = + enqueueMapSVM(retValue, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE, size * sizeof(T)); + if (err != CL_SUCCESS) { + clSVMFree(context_(), retValue); + retValue = nullptr; +# if defined(CL_HPP_ENABLE_EXCEPTIONS) + std::bad_alloc excep; + throw excep; +# endif + } + } + + // If exceptions disabled, return null pointer from allocator + return retValue; + } + + void deallocate(pointer p, size_type) { clSVMFree(context_(), p); } + + /** + * Return the maximum possible allocation size. + * This is the minimum of the maximum sizes of all devices in the context. + */ + size_type max_size() const noexcept { + size_type maxSize = std::numeric_limits::max() / sizeof(T); + + for (const Device &d : context_.getInfo()) { + maxSize = std::min( + maxSize, static_cast(d.getInfo())); + } + + return maxSize; + } + + template + void construct(U *p, Args &&...args) { + new (p) T(args...); + } + + template + void destroy(U *p) { + p->~U(); + } + + /** + * Returns true if the contexts match. + */ + inline bool operator==(SVMAllocator const &rhs) { return (context_ == rhs.context_); } + + inline bool operator!=(SVMAllocator const &a) { return !operator==(a); } + }; // class SVMAllocator return cl::pointer(tmp, detail::Deleter{alloc, + // copies}); + + template + class SVMAllocator { + public: + typedef void value_type; + typedef value_type *pointer; + typedef const value_type *const_pointer; + + template + struct rebind { + typedef SVMAllocator other; + }; + + template + friend class SVMAllocator; + }; + +# if !defined(CL_HPP_NO_STD_UNIQUE_PTR) + namespace detail { + template + class Deleter { + private: + Alloc alloc_; + size_type copies_; + + public: + typedef typename std::allocator_traits::pointer pointer; + + Deleter(const Alloc &alloc, size_type copies) : alloc_ {alloc}, copies_ {copies} {} + + void operator()(pointer ptr) const { + Alloc tmpAlloc {alloc_}; + std::allocator_traits::destroy(tmpAlloc, std::addressof(*ptr)); + std::allocator_traits::deallocate(tmpAlloc, ptr, copies_); + } + }; + } // namespace detail + + /** + * Allocation operation compatible with std::allocate_ptr. + * Creates a unique_ptr by default. + * This requirement is to ensure that the control block is not + * allocated in memory inaccessible to the host. + */ + template + cl::pointer> allocate_pointer(const Alloc &alloc_, Args &&...args) { + Alloc alloc(alloc_); + static const size_type copies = 1; + + // Ensure that creation of the management block and the + // object are dealt with separately such that we only provide a deleter + + T *tmp = std::allocator_traits::allocate(alloc, copies); + if (!tmp) { + std::bad_alloc excep; + throw excep; + } + try { + std::allocator_traits::construct( + alloc, std::addressof(*tmp), std::forward(args)...); + + return cl::pointer>(tmp, + detail::Deleter {alloc, copies}); + } catch (std::bad_alloc &) { + std::allocator_traits::deallocate(alloc, tmp, copies); + throw; + } + } + + template + cl::pointer>> allocate_svm(Args... args) { + SVMAllocator alloc; + return cl::allocate_pointer(alloc, args...); + } + + template + cl::pointer>> allocate_svm(const cl::Context &c, + Args... args) { + SVMAllocator alloc(c); + return cl::allocate_pointer(alloc, args...); + } +# endif // #if !defined(CL_HPP_NO_STD_UNIQUE_PTR) + + /*! \brief Vector alias to simplify contruction of coarse-grained SVM containers. + * + */ + template + using coarse_svm_vector = vector>>; + + /*! \brief Vector alias to simplify contruction of fine-grained SVM containers. + * + */ + template + using fine_svm_vector = vector>>; + + /*! \brief Vector alias to simplify contruction of fine-grained SVM containers that support + * platform atomics. + * + */ + template + using atomic_svm_vector = vector>>; #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -/*! \brief Class interface for Buffer Memory Objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Buffer : public Memory -{ -public: - - /*! \brief Constructs a Buffer in a specified context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - */ - Buffer( - const Context& context, - cl_mem_flags flags, - size_type size, - void* host_ptr = nullptr, - cl_int* err = nullptr) - { - cl_int error; - object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - } + /*! \brief Class interface for Buffer Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class Buffer : public Memory { + public: + /*! \brief Constructs a Buffer in a specified context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ + Buffer(const Context &context, cl_mem_flags flags, size_type size, void *host_ptr = nullptr, + cl_int *err = nullptr) { + cl_int error; + object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error); + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != nullptr) { *err = error; } + } #if CL_HPP_TARGET_OPENCL_VERSION >= 300 - /*! \brief Constructs a Buffer in a specified context and with specified properties. - * - * Wraps clCreateBufferWithProperties(). - * - * \param properties Optional list of properties for the buffer object and - * their corresponding values. The non-empty list must - * end with 0. - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - */ - Buffer( - const Context& context, - const vector& properties, - cl_mem_flags flags, - size_type size, - void* host_ptr = nullptr, - cl_int* err = nullptr) - { - cl_int error; - - if (properties.empty()) { - object_ = ::clCreateBufferWithProperties(context(), nullptr, flags, - size, host_ptr, &error); - } - else { - object_ = ::clCreateBufferWithProperties( - context(), properties.data(), flags, size, host_ptr, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - } + /*! \brief Constructs a Buffer in a specified context and with specified properties. + * + * Wraps clCreateBufferWithProperties(). + * + * \param properties Optional list of properties for the buffer object and + * their corresponding values. The non-empty list must + * end with 0. + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + */ + Buffer(const Context &context, const vector &properties, + cl_mem_flags flags, size_type size, void *host_ptr = nullptr, + cl_int *err = nullptr) { + cl_int error; + + if (properties.empty()) { + object_ = + ::clCreateBufferWithProperties(context(), nullptr, flags, size, host_ptr, &error); + } else { + object_ = ::clCreateBufferWithProperties( + context(), properties.data(), flags, size, host_ptr, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != nullptr) { *err = error; } + } #endif - /*! \brief Constructs a Buffer in the default context. - * - * Wraps clCreateBuffer(). - * - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - * - * \see Context::getDefault() - */ - Buffer( - cl_mem_flags flags, - size_type size, - void* host_ptr = nullptr, - cl_int* err = nullptr) : Buffer(Context::getDefault(err), flags, size, host_ptr, err) { } + /*! \brief Constructs a Buffer in the default context. + * + * Wraps clCreateBuffer(). + * + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer(cl_mem_flags flags, size_type size, void *host_ptr = nullptr, + cl_int *err = nullptr) : + Buffer(Context::getDefault(err), flags, size, host_ptr, err) {} #if CL_HPP_TARGET_OPENCL_VERSION >= 300 - /*! \brief Constructs a Buffer in the default context and with specified properties. - * - * Wraps clCreateBufferWithProperties(). - * - * \param properties Optional list of properties for the buffer object and - * their corresponding values. The non-empty list must - * end with 0. - * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was - * specified. Note alignment & exclusivity requirements. - * - * \see Context::getDefault() - */ - Buffer( - const vector& properties, - cl_mem_flags flags, - size_type size, - void* host_ptr = nullptr, - cl_int* err = nullptr) : Buffer(Context::getDefault(err), properties, flags, size, host_ptr, err) { } + /*! \brief Constructs a Buffer in the default context and with specified properties. + * + * Wraps clCreateBufferWithProperties(). + * + * \param properties Optional list of properties for the buffer object and + * their corresponding values. The non-empty list must + * end with 0. + * \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was + * specified. Note alignment & exclusivity requirements. + * + * \see Context::getDefault() + */ + Buffer(const vector &properties, cl_mem_flags flags, size_type size, + void *host_ptr = nullptr, cl_int *err = nullptr) : + Buffer(Context::getDefault(err), properties, flags, size, host_ptr, err) {} #endif - /*! - * \brief Construct a Buffer from a host container via iterators. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer( - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr = false, - cl_int* err = nullptr) - { - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - size_type size = sizeof(DataType)*(endIterator - startIterator); - - Context context = Context::getDefault(err); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - - if( !useHostPtr ) { - error = cl::copy(startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - } - } - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified context. - * IteratorType must be random access. - * If useHostPtr is specified iterators must represent contiguous data. - */ - template< typename IteratorType > - Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = nullptr); - - /*! - * \brief Construct a Buffer from a host container via iterators using a specified queue. - * If useHostPtr is specified iterators must be random access. - */ - template< typename IteratorType > - Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, - bool readOnly, bool useHostPtr = false, cl_int* err = nullptr); - - //! \brief Default constructor - initializes to nullptr. - Buffer() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with earlier versions. - * - * See Memory for further details. - */ - explicit Buffer(const cl_mem& buffer, bool retainObject = false) : - Memory(buffer, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Buffer& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - + /*! + * \brief Construct a Buffer from a host container via iterators. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template + Buffer(IteratorType startIterator, IteratorType endIterator, bool readOnly, + bool useHostPtr = false, cl_int *err = nullptr) { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if (readOnly) { + flags |= CL_MEM_READ_ONLY; + } else { + flags |= CL_MEM_READ_WRITE; + } + if (useHostPtr) { flags |= CL_MEM_USE_HOST_PTR; } + + size_type size = sizeof(DataType) * (endIterator - startIterator); + + Context context = Context::getDefault(err); + + if (useHostPtr) { + object_ = ::clCreateBuffer( + context(), flags, size, const_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != nullptr) { *err = error; } + + if (!useHostPtr) { + error = cl::copy(startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != nullptr) { *err = error; } + } + } + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified context. + * IteratorType must be random access. + * If useHostPtr is specified iterators must represent contiguous data. + */ + template + Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int *err = nullptr); + + /*! + * \brief Construct a Buffer from a host container via iterators using a specified queue. + * If useHostPtr is specified iterators must be random access. + */ + template + Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr = false, cl_int *err = nullptr); + + //! \brief Default constructor - initializes to nullptr. + Buffer() : Memory() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with earlier versions. + * + * See Memory for further details. + */ + explicit Buffer(const cl_mem &buffer, bool retainObject = false) : + Memory(buffer, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Buffer &operator=(const cl_mem &rhs) { + Memory::operator=(rhs); + return *this; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 110 - /*! \brief Creates a new buffer object from this. - * - * Wraps clCreateSubBuffer(). - */ - Buffer createSubBuffer( - cl_mem_flags flags, - cl_buffer_create_type buffer_create_type, - const void * buffer_create_info, - cl_int * err = nullptr) - { - Buffer result; - cl_int error; - result.object_ = ::clCreateSubBuffer( - object_, - flags, - buffer_create_type, - buffer_create_info, - &error); - - detail::errHandler(error, __CREATE_SUBBUFFER_ERR); - if (err != nullptr) { - *err = error; - } - - return result; - } + /*! \brief Creates a new buffer object from this. + * + * Wraps clCreateSubBuffer(). + */ + Buffer createSubBuffer(cl_mem_flags flags, cl_buffer_create_type buffer_create_type, + const void *buffer_create_info, cl_int *err = nullptr) { + Buffer result; + cl_int error; + result.object_ = + ::clCreateSubBuffer(object_, flags, buffer_create_type, buffer_create_info, &error); + + detail::errHandler(error, __CREATE_SUBBUFFER_ERR); + if (err != nullptr) { *err = error; } + + return result; + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 -}; + }; -#if defined (CL_HPP_USE_DX_INTEROP) -/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. - * - * This is provided to facilitate interoperability with Direct3D. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferD3D10 : public Buffer -{ -public: - - - /*! \brief Constructs a BufferD3D10, in a specified context, from a - * given ID3D10Buffer. - * - * Wraps clCreateFromD3D10BufferKHR(). - */ - BufferD3D10( - const Context& context, - cl_mem_flags flags, - ID3D10Buffer* bufobj, - cl_int * err = nullptr) : pfn_clCreateFromD3D10BufferKHR(nullptr) - { - typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)( - cl_context context, cl_mem_flags flags, ID3D10Buffer* buffer, - cl_int* errcode_ret); - PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR; -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - vector props = context.getInfo(); - cl_platform platform = nullptr; - for( int i = 0; i < props.size(); ++i ) { - if( props[i] == CL_CONTEXT_PLATFORM ) { - platform = props[i+1]; - } - } - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateFromD3D10BufferKHR); -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateFromD3D10BufferKHR); -#endif - - cl_int error; - object_ = pfn_clCreateFromD3D10BufferKHR( - context(), - flags, - bufobj, - &error); - - // TODO: This should really have a D3D10 rerror code! - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - } - - //! \brief Default constructor - initializes to nullptr. - BufferD3D10() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit BufferD3D10(const cl_mem& buffer, bool retainObject = false) : - Buffer(buffer, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferD3D10& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } -}; +#if defined(CL_HPP_USE_DX_INTEROP) + /*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's. + * + * This is provided to facilitate interoperability with Direct3D. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class BufferD3D10 : public Buffer { + public: + /*! \brief Constructs a BufferD3D10, in a specified context, from a + * given ID3D10Buffer. + * + * Wraps clCreateFromD3D10BufferKHR(). + */ + BufferD3D10(const Context &context, cl_mem_flags flags, ID3D10Buffer *bufobj, + cl_int *err = nullptr) : + pfn_clCreateFromD3D10BufferKHR(nullptr) { + typedef CL_API_ENTRY cl_mem(CL_API_CALL * PFN_clCreateFromD3D10BufferKHR)( + cl_context context, cl_mem_flags flags, ID3D10Buffer * buffer, cl_int * errcode_ret); + PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR; +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 + vector props = context.getInfo(); + cl_platform platform = nullptr; + for (int i = 0; i < props.size(); ++i) { + if (props[i] == CL_CONTEXT_PLATFORM) { platform = props[i + 1]; } + } + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateFromD3D10BufferKHR); +# endif +# if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateFromD3D10BufferKHR); +# endif + + cl_int error; + object_ = pfn_clCreateFromD3D10BufferKHR(context(), flags, bufobj, &error); + + // TODO: This should really have a D3D10 rerror code! + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != nullptr) { *err = error; } + } + + //! \brief Default constructor - initializes to nullptr. + BufferD3D10() : Buffer() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferD3D10(const cl_mem &buffer, bool retainObject = false) : + Buffer(buffer, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferD3D10 &operator=(const cl_mem &rhs) { + Buffer::operator=(rhs); + return *this; + } + }; #endif -/*! \brief Class interface for GL Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferGL : public Buffer -{ -public: - /*! \brief Constructs a BufferGL in a specified context, from a given - * GL buffer. - * - * Wraps clCreateFromGLBuffer(). - */ - BufferGL( - const Context& context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * err = nullptr) - { - cl_int error; - object_ = ::clCreateFromGLBuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - } - - //! \brief Default constructor - initializes to nullptr. - BufferGL() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit BufferGL(const cl_mem& buffer, bool retainObject = false) : - Buffer(buffer, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief Class interface for GL Render Buffer Memory Objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class BufferRenderGL : public Buffer -{ -public: - /*! \brief Constructs a BufferRenderGL in a specified context, from a given - * GL Renderbuffer. - * - * Wraps clCreateFromGLRenderbuffer(). - */ - BufferRenderGL( - const Context& context, - cl_mem_flags flags, - cl_GLuint bufobj, - cl_int * err = nullptr) - { - cl_int error; - object_ = ::clCreateFromGLRenderbuffer( - context(), - flags, - bufobj, - &error); - - detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - } - - //! \brief Default constructor - initializes to nullptr. - BufferRenderGL() : Buffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit BufferRenderGL(const cl_mem& buffer, bool retainObject = false) : - Buffer(buffer, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - BufferRenderGL& operator = (const cl_mem& rhs) - { - Buffer::operator=(rhs); - return *this; - } - - - //! \brief Wrapper for clGetGLObjectInfo(). - cl_int getObjectInfo( - cl_gl_object_type *type, - cl_GLuint * gl_object_name) - { - return detail::errHandler( - ::clGetGLObjectInfo(object_,type,gl_object_name), - __GET_GL_OBJECT_INFO_ERR); - } -}; - -/*! \brief C++ base class for Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image : public Memory -{ -protected: - //! \brief Default constructor - initializes to nullptr. - Image() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image(const cl_mem& image, bool retainObject = false) : - Memory(image, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - - -public: - //! \brief Wrapper for clGetImageInfo(). - template - cl_int getImageInfo(cl_image_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetImageInfo, object_, name, param), - __GET_IMAGE_INFO_ERR); - } - - //! \brief Wrapper for clGetImageInfo() that returns by value. - template typename - detail::param_traits::param_type - getImageInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_image_info, name>::param_type param; - cl_int result = getImageInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } -}; + /*! \brief Class interface for GL Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class BufferGL : public Buffer { + public: + /*! \brief Constructs a BufferGL in a specified context, from a given + * GL buffer. + * + * Wraps clCreateFromGLBuffer(). + */ + BufferGL(const Context &context, cl_mem_flags flags, cl_GLuint bufobj, + cl_int *err = nullptr) { + cl_int error; + object_ = ::clCreateFromGLBuffer(context(), flags, bufobj, &error); + + detail::errHandler(error, __CREATE_GL_BUFFER_ERR); + if (err != nullptr) { *err = error; } + } + + //! \brief Default constructor - initializes to nullptr. + BufferGL() : Buffer() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferGL(const cl_mem &buffer, bool retainObject = false) : + Buffer(buffer, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferGL &operator=(const cl_mem &rhs) { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo(cl_gl_object_type *type, cl_GLuint *gl_object_name) { + return detail::errHandler(::clGetGLObjectInfo(object_, type, gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } + }; + + /*! \brief Class interface for GL Render Buffer Memory Objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class BufferRenderGL : public Buffer { + public: + /*! \brief Constructs a BufferRenderGL in a specified context, from a given + * GL Renderbuffer. + * + * Wraps clCreateFromGLRenderbuffer(). + */ + BufferRenderGL(const Context &context, cl_mem_flags flags, cl_GLuint bufobj, + cl_int *err = nullptr) { + cl_int error; + object_ = ::clCreateFromGLRenderbuffer(context(), flags, bufobj, &error); + + detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR); + if (err != nullptr) { *err = error; } + } + + //! \brief Default constructor - initializes to nullptr. + BufferRenderGL() : Buffer() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit BufferRenderGL(const cl_mem &buffer, bool retainObject = false) : + Buffer(buffer, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + BufferRenderGL &operator=(const cl_mem &rhs) { + Buffer::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetGLObjectInfo(). + cl_int getObjectInfo(cl_gl_object_type *type, cl_GLuint *gl_object_name) { + return detail::errHandler(::clGetGLObjectInfo(object_, type, gl_object_name), + __GET_GL_OBJECT_INFO_ERR); + } + }; + + /*! \brief C++ base class for Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class Image : public Memory { + protected: + //! \brief Default constructor - initializes to nullptr. + Image() : Memory() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image(const cl_mem &image, bool retainObject = false) : + Memory(image, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image &operator=(const cl_mem &rhs) { + Memory::operator=(rhs); + return *this; + } + + public: + //! \brief Wrapper for clGetImageInfo(). + template + cl_int getImageInfo(cl_image_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetImageInfo, object_, name, param), + __GET_IMAGE_INFO_ERR); + } + + //! \brief Wrapper for clGetImageInfo() that returns by value. + template + typename detail::param_traits::param_type + getImageInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getImageInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + }; #if CL_HPP_TARGET_OPENCL_VERSION >= 120 -/*! \brief Class interface for 1D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image1D : public Image -{ -public: - /*! \brief Constructs a 1D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image1D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type width, - void* host_ptr = nullptr, - cl_int* err = nullptr) - { - cl_int error; - - cl_image_desc desc = {}; - desc.image_type = CL_MEM_OBJECT_IMAGE1D; - desc.image_width = width; - - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } - - //! \brief Default constructor - initializes to nullptr. - Image1D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image1D(const cl_mem& image1D, bool retainObject = false) : - Image(image1D, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image1D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - -}; - -/*! \class Image1DBuffer - * \brief Image interface for 1D buffer images. - */ -class Image1DBuffer : public Image -{ -public: - Image1DBuffer( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type width, - const Buffer &buffer, - cl_int* err = nullptr) - { - cl_int error; - - cl_image_desc desc = {}; - desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; - desc.image_width = width; - desc.buffer = buffer(); - - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - nullptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } - - Image1DBuffer() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image1DBuffer(const cl_mem& image1D, bool retainObject = false) : - Image(image1D, retainObject) { } - - Image1DBuffer& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - - -}; - -/*! \class Image1DArray - * \brief Image interface for arrays of 1D images. - */ -class Image1DArray : public Image -{ -public: - Image1DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type arraySize, - size_type width, - size_type rowPitch, - void* host_ptr = nullptr, - cl_int* err = nullptr) - { - cl_int error; - - cl_image_desc desc = {}; - desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; - desc.image_width = width; - desc.image_array_size = arraySize; - desc.image_row_pitch = rowPitch; - - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } - - Image1DArray() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image1DArray(const cl_mem& imageArray, bool retainObject = false) : - Image(imageArray, retainObject) { } - - - Image1DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - -}; + /*! \brief Class interface for 1D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class Image1D : public Image { + public: + /*! \brief Constructs a 1D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image1D(const Context &context, cl_mem_flags flags, ImageFormat format, size_type width, + void *host_ptr = nullptr, cl_int *err = nullptr) { + cl_int error; + + cl_image_desc desc = {}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D; + desc.image_width = width; + + object_ = ::clCreateImage(context(), flags, &format, &desc, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { *err = error; } + } + + //! \brief Default constructor - initializes to nullptr. + Image1D() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1D(const cl_mem &image1D, bool retainObject = false) : + Image(image1D, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image1D &operator=(const cl_mem &rhs) { + Image::operator=(rhs); + return *this; + } + }; + + /*! \class Image1DBuffer + * \brief Image interface for 1D buffer images. + */ + class Image1DBuffer : public Image { + public: + Image1DBuffer(const Context &context, cl_mem_flags flags, ImageFormat format, + size_type width, const Buffer &buffer, cl_int *err = nullptr) { + cl_int error; + + cl_image_desc desc = {}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D_BUFFER; + desc.image_width = width; + desc.buffer = buffer(); + + object_ = ::clCreateImage(context(), flags, &format, &desc, nullptr, &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { *err = error; } + } + + Image1DBuffer() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1DBuffer(const cl_mem &image1D, bool retainObject = false) : + Image(image1D, retainObject) {} + + Image1DBuffer &operator=(const cl_mem &rhs) { + Image::operator=(rhs); + return *this; + } + }; + + /*! \class Image1DArray + * \brief Image interface for arrays of 1D images. + */ + class Image1DArray : public Image { + public: + Image1DArray(const Context &context, cl_mem_flags flags, ImageFormat format, + size_type arraySize, size_type width, size_type rowPitch, + void *host_ptr = nullptr, cl_int *err = nullptr) { + cl_int error; + + cl_image_desc desc = {}; + desc.image_type = CL_MEM_OBJECT_IMAGE1D_ARRAY; + desc.image_width = width; + desc.image_array_size = arraySize; + desc.image_row_pitch = rowPitch; + + object_ = ::clCreateImage(context(), flags, &format, &desc, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { *err = error; } + } + + Image1DArray() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image1DArray(const cl_mem &imageArray, bool retainObject = false) : + Image(imageArray, retainObject) {} + + Image1DArray &operator=(const cl_mem &rhs) { + Image::operator=(rhs); + return *this; + } + }; #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - -/*! \brief Class interface for 2D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image2D : public Image -{ -public: - /*! \brief Constructs a 2D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type width, - size_type height, - size_type row_pitch = 0, - void* host_ptr = nullptr, - cl_int* err = nullptr) - { - cl_int error; - bool useCreateImage; + /*! \brief Class interface for 2D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class Image2D : public Image { + public: + /*! \brief Constructs a 2D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image2D(const Context &context, cl_mem_flags flags, ImageFormat format, size_type width, + size_type height, size_type row_pitch = 0, void *host_ptr = nullptr, + cl_int *err = nullptr) { + cl_int error; + bool useCreateImage; #if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } #elif CL_HPP_TARGET_OPENCL_VERSION >= 120 - useCreateImage = true; + useCreateImage = true; #else - useCreateImage = false; + useCreateImage = false; #endif #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - if (useCreateImage) - { - cl_image_desc desc = {}; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = width; - desc.image_height = height; - desc.image_row_pitch = row_pitch; - - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } + if (useCreateImage) { + cl_image_desc desc = {}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = row_pitch; + + object_ = ::clCreateImage(context(), flags, &format, &desc, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 #if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - if (!useCreateImage) - { - object_ = ::clCreateImage2D( - context(), flags,&format, width, height, row_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE2D_ERR); - if (err != nullptr) { - *err = error; - } - } + if (!useCreateImage) { + object_ = ::clCreateImage2D( + context(), flags, &format, width, height, row_pitch, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE2D_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 - } + } #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - /*! \brief Constructs a 2D Image from a buffer. - * \note This will share storage with the underlying buffer. - * - * Requires OpenCL 2.0 or newer or OpenCL 1.2 and the - * cl_khr_image2d_from_buffer extension. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - ImageFormat format, - const Buffer &sourceBuffer, - size_type width, - size_type height, - size_type row_pitch = 0, - cl_int* err = nullptr) - { - cl_int error; - - cl_image_desc desc = {}; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = width; - desc.image_height = height; - desc.image_row_pitch = row_pitch; - desc.buffer = sourceBuffer(); - - object_ = ::clCreateImage( - context(), - 0, // flags inherited from buffer - &format, - &desc, - nullptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } + /*! \brief Constructs a 2D Image from a buffer. + * \note This will share storage with the underlying buffer. + * + * Requires OpenCL 2.0 or newer or OpenCL 1.2 and the + * cl_khr_image2d_from_buffer extension. + * + * Wraps clCreateImage(). + */ + Image2D(const Context &context, ImageFormat format, const Buffer &sourceBuffer, + size_type width, size_type height, size_type row_pitch = 0, cl_int *err = nullptr) { + cl_int error; + + cl_image_desc desc = {}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = width; + desc.image_height = height; + desc.image_row_pitch = row_pitch; + desc.buffer = sourceBuffer(); + + object_ = ::clCreateImage(context(), + 0, // flags inherited from buffer + &format, + &desc, + nullptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /*! \brief Constructs a 2D Image from an image. - * \note This will share storage with the underlying image but may - * reinterpret the channel order and type. - * - * The image will be created matching with a descriptor matching the source. - * - * \param order is the channel order to reinterpret the image data as. - * The channel order may differ as described in the OpenCL - * 2.0 API specification. - * - * Wraps clCreateImage(). - */ - Image2D( - const Context& context, - cl_channel_order order, - const Image &sourceImage, - cl_int* err = nullptr) - { - cl_int error; - - // Descriptor fields have to match source image - size_type sourceWidth = - sourceImage.getImageInfo(); - size_type sourceHeight = - sourceImage.getImageInfo(); - size_type sourceRowPitch = - sourceImage.getImageInfo(); - cl_uint sourceNumMIPLevels = - sourceImage.getImageInfo(); - cl_uint sourceNumSamples = - sourceImage.getImageInfo(); - cl_image_format sourceFormat = - sourceImage.getImageInfo(); - - // Update only the channel order. - // Channel format inherited from source. - sourceFormat.image_channel_order = order; - - cl_image_desc desc = {}; - desc.image_type = CL_MEM_OBJECT_IMAGE2D; - desc.image_width = sourceWidth; - desc.image_height = sourceHeight; - desc.image_row_pitch = sourceRowPitch; - desc.num_mip_levels = sourceNumMIPLevels; - desc.num_samples = sourceNumSamples; - desc.buffer = sourceImage(); - - object_ = ::clCreateImage( - context(), - 0, // flags should be inherited from mem_object - &sourceFormat, - &desc, - nullptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } -#endif //#if CL_HPP_TARGET_OPENCL_VERSION >= 200 - - //! \brief Default constructor - initializes to nullptr. - Image2D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image2D(const cl_mem& image2D, bool retainObject = false) : - Image(image2D, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image2D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - - - - -}; + /*! \brief Constructs a 2D Image from an image. + * \note This will share storage with the underlying image but may + * reinterpret the channel order and type. + * + * The image will be created matching with a descriptor matching the source. + * + * \param order is the channel order to reinterpret the image data as. + * The channel order may differ as described in the OpenCL + * 2.0 API specification. + * + * Wraps clCreateImage(). + */ + Image2D(const Context &context, cl_channel_order order, const Image &sourceImage, + cl_int *err = nullptr) { + cl_int error; + + // Descriptor fields have to match source image + size_type sourceWidth = sourceImage.getImageInfo(); + size_type sourceHeight = sourceImage.getImageInfo(); + size_type sourceRowPitch = sourceImage.getImageInfo(); + cl_uint sourceNumMIPLevels = sourceImage.getImageInfo(); + cl_uint sourceNumSamples = sourceImage.getImageInfo(); + cl_image_format sourceFormat = sourceImage.getImageInfo(); + + // Update only the channel order. + // Channel format inherited from source. + sourceFormat.image_channel_order = order; + + cl_image_desc desc = {}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D; + desc.image_width = sourceWidth; + desc.image_height = sourceHeight; + desc.image_row_pitch = sourceRowPitch; + desc.num_mip_levels = sourceNumMIPLevels; + desc.num_samples = sourceNumSamples; + desc.buffer = sourceImage(); + + object_ = ::clCreateImage(context(), + 0, // flags should be inherited from mem_object + &sourceFormat, + &desc, + nullptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { *err = error; } + } +#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 + //! \brief Default constructor - initializes to nullptr. + Image2D() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2D(const cl_mem &image2D, bool retainObject = false) : + Image(image2D, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image2D &operator=(const cl_mem &rhs) { + Image::operator=(rhs); + return *this; + } + }; #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -/*! \brief Class interface for GL 2D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. - */ -class CL_API_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D -{ -public: - /*! \brief Constructs an Image2DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture2D(). - */ - Image2DGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = nullptr) - { - cl_int error; - object_ = ::clCreateFromGLTexture2D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); - if (err != nullptr) { - *err = error; - } - - } - - //! \brief Default constructor - initializes to nullptr. - Image2DGL() : Image2D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image2DGL(const cl_mem& image, bool retainObject = false) : - Image2D(image, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - *c - * See Memory for further details. - */ - Image2DGL& operator = (const cl_mem& rhs) - { - Image2D::operator=(rhs); - return *this; - } - - - -} CL_API_SUFFIX__VERSION_1_1_DEPRECATED; + /*! \brief Class interface for GL 2D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + * \note Deprecated for OpenCL 1.2. Please use ImageGL instead. + */ + class CL_API_PREFIX__VERSION_1_1_DEPRECATED Image2DGL : public Image2D { + public: + /*! \brief Constructs an Image2DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture2D(). + */ + Image2DGL(const Context &context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texobj, cl_int *err = nullptr) { + cl_int error; + object_ = ::clCreateFromGLTexture2D(context(), flags, target, miplevel, texobj, &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR); + if (err != nullptr) { *err = error; } + } + + //! \brief Default constructor - initializes to nullptr. + Image2DGL() : Image2D() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2DGL(const cl_mem &image, bool retainObject = false) : + Image2D(image, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + *c + * See Memory for further details. + */ + Image2DGL &operator=(const cl_mem &rhs) { + Image2D::operator=(rhs); + return *this; + } + + } CL_API_SUFFIX__VERSION_1_1_DEPRECATED; #endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS #if CL_HPP_TARGET_OPENCL_VERSION >= 120 -/*! \class Image2DArray - * \brief Image interface for arrays of 2D images. - */ -class Image2DArray : public Image -{ -public: - Image2DArray( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type arraySize, - size_type width, - size_type height, - size_type rowPitch, - size_type slicePitch, - void* host_ptr = nullptr, - cl_int* err = nullptr) - { - cl_int error; - - cl_image_desc desc = {}; - desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; - desc.image_width = width; - desc.image_height = height; - desc.image_array_size = arraySize; - desc.image_row_pitch = rowPitch; - desc.image_slice_pitch = slicePitch; - - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } - - Image2DArray() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image2DArray(const cl_mem& imageArray, bool retainObject = false) : Image(imageArray, retainObject) { } - - Image2DArray& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - -}; + /*! \class Image2DArray + * \brief Image interface for arrays of 2D images. + */ + class Image2DArray : public Image { + public: + Image2DArray(const Context &context, cl_mem_flags flags, ImageFormat format, + size_type arraySize, size_type width, size_type height, size_type rowPitch, + size_type slicePitch, void *host_ptr = nullptr, cl_int *err = nullptr) { + cl_int error; + + cl_image_desc desc = {}; + desc.image_type = CL_MEM_OBJECT_IMAGE2D_ARRAY; + desc.image_width = width; + desc.image_height = height; + desc.image_array_size = arraySize; + desc.image_row_pitch = rowPitch; + desc.image_slice_pitch = slicePitch; + + object_ = ::clCreateImage(context(), flags, &format, &desc, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { *err = error; } + } + + Image2DArray() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image2DArray(const cl_mem &imageArray, bool retainObject = false) : + Image(imageArray, retainObject) {} + + Image2DArray &operator=(const cl_mem &rhs) { + Image::operator=(rhs); + return *this; + } + }; #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 120 -/*! \brief Class interface for 3D Image Memory objects. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3D : public Image -{ -public: - /*! \brief Constructs a 3D Image in a specified context. - * - * Wraps clCreateImage(). - */ - Image3D( - const Context& context, - cl_mem_flags flags, - ImageFormat format, - size_type width, - size_type height, - size_type depth, - size_type row_pitch = 0, - size_type slice_pitch = 0, - void* host_ptr = nullptr, - cl_int* err = nullptr) - { - cl_int error; - bool useCreateImage; + /*! \brief Class interface for 3D Image Memory objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class Image3D : public Image { + public: + /*! \brief Constructs a 3D Image in a specified context. + * + * Wraps clCreateImage(). + */ + Image3D(const Context &context, cl_mem_flags flags, ImageFormat format, size_type width, + size_type height, size_type depth, size_type row_pitch = 0, + size_type slice_pitch = 0, void *host_ptr = nullptr, cl_int *err = nullptr) { + cl_int error; + bool useCreateImage; #if CL_HPP_TARGET_OPENCL_VERSION >= 120 && CL_HPP_MINIMUM_OPENCL_VERSION < 120 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above - } + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above + } #elif CL_HPP_TARGET_OPENCL_VERSION >= 120 - useCreateImage = true; + useCreateImage = true; #else - useCreateImage = false; + useCreateImage = false; #endif #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - if (useCreateImage) - { - cl_image_desc desc = {}; - desc.image_type = CL_MEM_OBJECT_IMAGE3D; - desc.image_width = width; - desc.image_height = height; - desc.image_depth = depth; - desc.image_row_pitch = row_pitch; - desc.image_slice_pitch = slice_pitch; - - object_ = ::clCreateImage( - context(), - flags, - &format, - &desc, - host_ptr, - &error); - - detail::errHandler(error, __CREATE_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + if (useCreateImage) { + cl_image_desc desc = {}; + desc.image_type = CL_MEM_OBJECT_IMAGE3D; + desc.image_width = width; + desc.image_height = height; + desc.image_depth = depth; + desc.image_row_pitch = row_pitch; + desc.image_slice_pitch = slice_pitch; + + object_ = ::clCreateImage(context(), flags, &format, &desc, host_ptr, &error); + + detail::errHandler(error, __CREATE_IMAGE_ERR); + if (err != nullptr) { *err = error; } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 #if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - if (!useCreateImage) - { - object_ = ::clCreateImage3D( - context(), flags, &format, width, height, depth, row_pitch, - slice_pitch, host_ptr, &error); - - detail::errHandler(error, __CREATE_IMAGE3D_ERR); - if (err != nullptr) { - *err = error; - } - } + if (!useCreateImage) { + object_ = ::clCreateImage3D(context(), + flags, + &format, + width, + height, + depth, + row_pitch, + slice_pitch, + host_ptr, + &error); + + detail::errHandler(error, __CREATE_IMAGE3D_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 120 - } - - //! \brief Default constructor - initializes to nullptr. - Image3D() : Image() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image3D(const cl_mem& image3D, bool retainObject = false) : - Image(image3D, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3D& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - -}; + } + + //! \brief Default constructor - initializes to nullptr. + Image3D() : Image() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image3D(const cl_mem &image3D, bool retainObject = false) : + Image(image3D, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3D &operator=(const cl_mem &rhs) { + Image::operator=(rhs); + return *this; + } + }; #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) -/*! \brief Class interface for GL 3D Image Memory objects. - * - * This is provided to facilitate interoperability with OpenGL. - * - * See Memory for details about copy semantics, etc. - * - * \see Memory - */ -class Image3DGL : public Image3D -{ -public: - /*! \brief Constructs an Image3DGL in a specified context, from a given - * GL Texture. - * - * Wraps clCreateFromGLTexture3D(). - */ - Image3DGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = nullptr) - { - cl_int error; - object_ = ::clCreateFromGLTexture3D( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); - if (err != nullptr) { - *err = error; - } - } - - //! \brief Default constructor - initializes to nullptr. - Image3DGL() : Image3D() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit Image3DGL(const cl_mem& image, bool retainObject = false) : - Image3D(image, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Image3DGL& operator = (const cl_mem& rhs) - { - Image3D::operator=(rhs); - return *this; - } - -}; + /*! \brief Class interface for GL 3D Image Memory objects. + * + * This is provided to facilitate interoperability with OpenGL. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class Image3DGL : public Image3D { + public: + /*! \brief Constructs an Image3DGL in a specified context, from a given + * GL Texture. + * + * Wraps clCreateFromGLTexture3D(). + */ + Image3DGL(const Context &context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texobj, cl_int *err = nullptr) { + cl_int error; + object_ = ::clCreateFromGLTexture3D(context(), flags, target, miplevel, texobj, &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR); + if (err != nullptr) { *err = error; } + } + + //! \brief Default constructor - initializes to nullptr. + Image3DGL() : Image3D() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit Image3DGL(const cl_mem &image, bool retainObject = false) : + Image3D(image, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Image3DGL &operator=(const cl_mem &rhs) { + Image3D::operator=(rhs); + return *this; + } + }; #endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS #if CL_HPP_TARGET_OPENCL_VERSION >= 120 -/*! \class ImageGL - * \brief general image interface for GL interop. - * We abstract the 2D and 3D GL images into a single instance here - * that wraps all GL sourced images on the grounds that setup information - * was performed by OpenCL anyway. - */ -class ImageGL : public Image -{ -public: - ImageGL( - const Context& context, - cl_mem_flags flags, - cl_GLenum target, - cl_GLint miplevel, - cl_GLuint texobj, - cl_int * err = nullptr) - { - cl_int error; - object_ = ::clCreateFromGLTexture( - context(), - flags, - target, - miplevel, - texobj, - &error); - - detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); - if (err != nullptr) { - *err = error; - } - } - - ImageGL() : Image() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * See Memory for further details. - */ - explicit ImageGL(const cl_mem& image, bool retainObject = false) : - Image(image, retainObject) { } - - ImageGL& operator = (const cl_mem& rhs) - { - Image::operator=(rhs); - return *this; - } - -}; + /*! \class ImageGL + * \brief general image interface for GL interop. + * We abstract the 2D and 3D GL images into a single instance here + * that wraps all GL sourced images on the grounds that setup information + * was performed by OpenCL anyway. + */ + class ImageGL : public Image { + public: + ImageGL(const Context &context, cl_mem_flags flags, cl_GLenum target, cl_GLint miplevel, + cl_GLuint texobj, cl_int *err = nullptr) { + cl_int error; + object_ = ::clCreateFromGLTexture(context(), flags, target, miplevel, texobj, &error); + + detail::errHandler(error, __CREATE_GL_TEXTURE_ERR); + if (err != nullptr) { *err = error; } + } + + ImageGL() : Image() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * See Memory for further details. + */ + explicit ImageGL(const cl_mem &image, bool retainObject = false) : + Image(image, retainObject) {} + + ImageGL &operator=(const cl_mem &rhs) { + Image::operator=(rhs); + return *this; + } + }; #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - - #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -/*! \brief Class interface for Pipe Memory Objects. -* -* See Memory for details about copy semantics, etc. -* -* \see Memory -*/ -class Pipe : public Memory -{ -public: - - /*! \brief Constructs a Pipe in a specified context. - * - * Wraps clCreatePipe(). - * @param context Context in which to create the pipe. - * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. - * @param packet_size Size in bytes of a single packet of the pipe. - * @param max_packets Number of packets that may be stored in the pipe. - * - */ - Pipe( - const Context& context, - cl_uint packet_size, - cl_uint max_packets, - cl_int* err = nullptr) - { - cl_int error; - - cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; - object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); - - detail::errHandler(error, __CREATE_PIPE_ERR); - if (err != nullptr) { - *err = error; - } - } - - /*! \brief Constructs a Pipe in a the default context. - * - * Wraps clCreatePipe(). - * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. - * @param packet_size Size in bytes of a single packet of the pipe. - * @param max_packets Number of packets that may be stored in the pipe. - * - */ - Pipe( - cl_uint packet_size, - cl_uint max_packets, - cl_int* err = nullptr) - { - cl_int error; - - Context context = Context::getDefault(err); - - cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; - object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); - - detail::errHandler(error, __CREATE_PIPE_ERR); - if (err != nullptr) { - *err = error; - } - } - - //! \brief Default constructor - initializes to nullptr. - Pipe() : Memory() { } - - /*! \brief Constructor from cl_mem - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with earlier versions. - * - * See Memory for further details. - */ - explicit Pipe(const cl_mem& pipe, bool retainObject = false) : - Memory(pipe, retainObject) { } - - /*! \brief Assignment from cl_mem - performs shallow copy. - * - * See Memory for further details. - */ - Pipe& operator = (const cl_mem& rhs) - { - Memory::operator=(rhs); - return *this; - } - - - - //! \brief Wrapper for clGetMemObjectInfo(). - template - cl_int getInfo(cl_pipe_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetPipeInfo, object_, name, param), - __GET_PIPE_INFO_ERR); - } - - //! \brief Wrapper for clGetMemObjectInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_pipe_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } -}; // class Pipe + /*! \brief Class interface for Pipe Memory Objects. + * + * See Memory for details about copy semantics, etc. + * + * \see Memory + */ + class Pipe : public Memory { + public: + /*! \brief Constructs a Pipe in a specified context. + * + * Wraps clCreatePipe(). + * @param context Context in which to create the pipe. + * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. + * @param packet_size Size in bytes of a single packet of the pipe. + * @param max_packets Number of packets that may be stored in the pipe. + * + */ + Pipe(const Context &context, cl_uint packet_size, cl_uint max_packets, + cl_int *err = nullptr) { + cl_int error; + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; + object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); + + detail::errHandler(error, __CREATE_PIPE_ERR); + if (err != nullptr) { *err = error; } + } + + /*! \brief Constructs a Pipe in a the default context. + * + * Wraps clCreatePipe(). + * @param flags Bitfield. Only CL_MEM_READ_WRITE and CL_MEM_HOST_NO_ACCESS are valid. + * @param packet_size Size in bytes of a single packet of the pipe. + * @param max_packets Number of packets that may be stored in the pipe. + * + */ + Pipe(cl_uint packet_size, cl_uint max_packets, cl_int *err = nullptr) { + cl_int error; + + Context context = Context::getDefault(err); + + cl_mem_flags flags = CL_MEM_READ_WRITE | CL_MEM_HOST_NO_ACCESS; + object_ = ::clCreatePipe(context(), flags, packet_size, max_packets, nullptr, &error); + + detail::errHandler(error, __CREATE_PIPE_ERR); + if (err != nullptr) { *err = error; } + } + + //! \brief Default constructor - initializes to nullptr. + Pipe() : Memory() {} + + /*! \brief Constructor from cl_mem - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with earlier versions. + * + * See Memory for further details. + */ + explicit Pipe(const cl_mem &pipe, bool retainObject = false) : Memory(pipe, retainObject) {} + + /*! \brief Assignment from cl_mem - performs shallow copy. + * + * See Memory for further details. + */ + Pipe &operator=(const cl_mem &rhs) { + Memory::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetMemObjectInfo(). + template + cl_int getInfo(cl_pipe_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetPipeInfo, object_, name, param), + __GET_PIPE_INFO_ERR); + } + + //! \brief Wrapper for clGetMemObjectInfo() that returns by value. + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + }; // class Pipe #endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 - -/*! \brief Class interface for cl_sampler. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_sampler as the original. For details, see - * clRetainSampler() and clReleaseSampler(). - * - * \see cl_sampler - */ -class Sampler : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to nullptr. - Sampler() { } - - /*! \brief Constructs a Sampler in a specified context. - * - * Wraps clCreateSampler(). - */ - Sampler( - const Context& context, - cl_bool normalized_coords, - cl_addressing_mode addressing_mode, - cl_filter_mode filter_mode, - cl_int* err = nullptr) - { - cl_int error; + /*! \brief Class interface for cl_sampler. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_sampler as the original. For details, see + * clRetainSampler() and clReleaseSampler(). + * + * \see cl_sampler + */ + class Sampler : public detail::Wrapper { + public: + //! \brief Default constructor - initializes to nullptr. + Sampler() {} + + /*! \brief Constructs a Sampler in a specified context. + * + * Wraps clCreateSampler(). + */ + Sampler(const Context &context, cl_bool normalized_coords, + cl_addressing_mode addressing_mode, cl_filter_mode filter_mode, + cl_int *err = nullptr) { + cl_int error; #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - cl_sampler_properties sampler_properties[] = { - CL_SAMPLER_NORMALIZED_COORDS, normalized_coords, - CL_SAMPLER_ADDRESSING_MODE, addressing_mode, - CL_SAMPLER_FILTER_MODE, filter_mode, - 0 }; - object_ = ::clCreateSamplerWithProperties( - context(), - sampler_properties, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } + cl_sampler_properties sampler_properties[] = {CL_SAMPLER_NORMALIZED_COORDS, + normalized_coords, + CL_SAMPLER_ADDRESSING_MODE, + addressing_mode, + CL_SAMPLER_FILTER_MODE, + filter_mode, + 0}; + object_ = ::clCreateSamplerWithProperties(context(), sampler_properties, &error); + + detail::errHandler(error, __CREATE_SAMPLER_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } #else - object_ = ::clCreateSampler( - context(), - normalized_coords, - addressing_mode, - filter_mode, - &error); - - detail::errHandler(error, __CREATE_SAMPLER_ERR); - if (err != nullptr) { - *err = error; - } -#endif - } - - /*! \brief Constructor from cl_sampler - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * This effectively transfers ownership of a refcount on the cl_sampler - * into the new Sampler object. - */ - explicit Sampler(const cl_sampler& sampler, bool retainObject = false) : - detail::Wrapper(sampler, retainObject) { } - - /*! \brief Assignment operator from cl_sampler - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseSampler() on the value previously held by this instance. - */ - Sampler& operator = (const cl_sampler& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - - - //! \brief Wrapper for clGetSamplerInfo(). - template - cl_int getInfo(cl_sampler_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetSamplerInfo, object_, name, param), - __GET_SAMPLER_INFO_ERR); - } - - //! \brief Wrapper for clGetSamplerInfo() that returns by value. - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_sampler_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } -}; - -class Program; -class CommandQueue; -class DeviceCommandQueue; -class Kernel; - -//! \brief Class interface for specifying NDRange values. -class NDRange -{ -private: - size_type sizes_[3]; - cl_uint dimensions_; - -public: - //! \brief Default constructor - resulting range has zero dimensions. - NDRange() - : dimensions_(0) - { - sizes_[0] = 0; - sizes_[1] = 0; - sizes_[2] = 0; - } - - //! \brief Constructs one-dimensional range. - NDRange(size_type size0) - : dimensions_(1) - { - sizes_[0] = size0; - sizes_[1] = 1; - sizes_[2] = 1; - } - - //! \brief Constructs two-dimensional range. - NDRange(size_type size0, size_type size1) - : dimensions_(2) - { - sizes_[0] = size0; - sizes_[1] = size1; - sizes_[2] = 1; - } - - //! \brief Constructs three-dimensional range. - NDRange(size_type size0, size_type size1, size_type size2) - : dimensions_(3) - { - sizes_[0] = size0; - sizes_[1] = size1; - sizes_[2] = size2; - } - - //! \brief Constructs one-dimensional range. - NDRange(array a) : NDRange(a[0]){} - - //! \brief Constructs two-dimensional range. - NDRange(array a) : NDRange(a[0], a[1]){} - - //! \brief Constructs three-dimensional range. - NDRange(array a) : NDRange(a[0], a[1], a[2]){} - - /*! \brief Conversion operator to const size_type *. - * - * \returns a pointer to the size of the first dimension. - */ - operator const size_type*() const { - return sizes_; - } - - //! \brief Queries the number of dimensions in the range. - size_type dimensions() const - { - return dimensions_; - } - - //! \brief Returns the size of the object in bytes based on the - // runtime number of dimensions - size_type size() const - { - return dimensions_*sizeof(size_type); - } - - size_type* get() - { - return sizes_; - } - - const size_type* get() const - { - return sizes_; - } -}; - -//! \brief A zero-dimensional range. -static const NDRange NullRange; - -//! \brief Local address wrapper for use with Kernel::setArg -struct LocalSpaceArg -{ - size_type size_; -}; - -namespace detail { - -template -struct KernelArgumentHandler; - -// Enable for objects that are not subclasses of memory -// Pointers, constants etc -template -struct KernelArgumentHandler::value>::type> -{ - static size_type size(const T&) { return sizeof(T); } - static const T* ptr(const T& value) { return &value; } -}; - -// Enable for subclasses of memory where we want to get a reference to the cl_mem out -// and pass that in for safety -template -struct KernelArgumentHandler::value>::type> -{ - static size_type size(const T&) { return sizeof(cl_mem); } - static const cl_mem* ptr(const T& value) { return &(value()); } -}; - -// Specialization for DeviceCommandQueue defined later - -template <> -struct KernelArgumentHandler -{ - static size_type size(const LocalSpaceArg& value) { return value.size_; } - static const void* ptr(const LocalSpaceArg&) { return nullptr; } -}; - -} -//! \endcond - -/*! Local - * \brief Helper function for generating LocalSpaceArg objects. - */ -inline LocalSpaceArg -Local(size_type size) -{ - LocalSpaceArg ret = { size }; - return ret; -} - -/*! \brief Class interface for cl_kernel. - * - * \note Copies of these objects are shallow, meaning that the copy will refer - * to the same underlying cl_kernel as the original. For details, see - * clRetainKernel() and clReleaseKernel(). - * - * \see cl_kernel - */ -class Kernel : public detail::Wrapper -{ -public: - inline Kernel(const Program& program, const char* name, cl_int* err = nullptr); - - //! \brief Default constructor - initializes to nullptr. - Kernel() { } - - /*! \brief Constructor from cl_kernel - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - * This effectively transfers ownership of a refcount on the cl_kernel - * into the new Kernel object. - */ - explicit Kernel(const cl_kernel& kernel, bool retainObject = false) : - detail::Wrapper(kernel, retainObject) { } - - /*! \brief Assignment operator from cl_kernel - takes ownership. - * - * This effectively transfers ownership of a refcount on the rhs and calls - * clReleaseKernel() on the value previously held by this instance. - */ - Kernel& operator = (const cl_kernel& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - - - - template - cl_int getInfo(cl_kernel_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelInfo, object_, name, param), - __GET_KERNEL_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_kernel_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } + object_ = + ::clCreateSampler(context(), normalized_coords, addressing_mode, filter_mode, &error); + + detail::errHandler(error, __CREATE_SAMPLER_ERR); + if (err != nullptr) { *err = error; } +#endif + } + + /*! \brief Constructor from cl_sampler - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This effectively transfers ownership of a refcount on the cl_sampler + * into the new Sampler object. + */ + explicit Sampler(const cl_sampler &sampler, bool retainObject = false) : + detail::Wrapper(sampler, retainObject) {} + + /*! \brief Assignment operator from cl_sampler - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseSampler() on the value previously held by this instance. + */ + Sampler &operator=(const cl_sampler &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + //! \brief Wrapper for clGetSamplerInfo(). + template + cl_int getInfo(cl_sampler_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetSamplerInfo, object_, name, param), + __GET_SAMPLER_INFO_ERR); + } + + //! \brief Wrapper for clGetSamplerInfo() that returns by value. + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + }; + + class Program; + class CommandQueue; + class DeviceCommandQueue; + class Kernel; + + //! \brief Class interface for specifying NDRange values. + class NDRange { + private: + size_type sizes_[3]; + cl_uint dimensions_; + + public: + //! \brief Default constructor - resulting range has zero dimensions. + NDRange() : dimensions_(0) { + sizes_[0] = 0; + sizes_[1] = 0; + sizes_[2] = 0; + } + + //! \brief Constructs one-dimensional range. + NDRange(size_type size0) : dimensions_(1) { + sizes_[0] = size0; + sizes_[1] = 1; + sizes_[2] = 1; + } + + //! \brief Constructs two-dimensional range. + NDRange(size_type size0, size_type size1) : dimensions_(2) { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = 1; + } + + //! \brief Constructs three-dimensional range. + NDRange(size_type size0, size_type size1, size_type size2) : dimensions_(3) { + sizes_[0] = size0; + sizes_[1] = size1; + sizes_[2] = size2; + } + + //! \brief Constructs one-dimensional range. + NDRange(array a) : NDRange(a[0]) {} + + //! \brief Constructs two-dimensional range. + NDRange(array a) : NDRange(a[0], a[1]) {} + + //! \brief Constructs three-dimensional range. + NDRange(array a) : NDRange(a[0], a[1], a[2]) {} + + /*! \brief Conversion operator to const size_type *. + * + * \returns a pointer to the size of the first dimension. + */ + operator const size_type *() const { return sizes_; } + + //! \brief Queries the number of dimensions in the range. + size_type dimensions() const { return dimensions_; } + + //! \brief Returns the size of the object in bytes based on the + // runtime number of dimensions + size_type size() const { return dimensions_ * sizeof(size_type); } + + size_type *get() { return sizes_; } + + const size_type *get() const { return sizes_; } + }; + + //! \brief A zero-dimensional range. + static const NDRange NullRange; + + //! \brief Local address wrapper for use with Kernel::setArg + struct LocalSpaceArg { + size_type size_; + }; + + namespace detail { + + template + struct KernelArgumentHandler; + + // Enable for objects that are not subclasses of memory + // Pointers, constants etc + template + struct KernelArgumentHandler< + T, typename std::enable_if::value>::type> { + static size_type size(const T &) { return sizeof(T); } + static const T *ptr(const T &value) { return &value; } + }; + + // Enable for subclasses of memory where we want to get a reference to the cl_mem out + // and pass that in for safety + template + struct KernelArgumentHandler< + T, typename std::enable_if::value>::type> { + static size_type size(const T &) { return sizeof(cl_mem); } + static const cl_mem *ptr(const T &value) { return &(value()); } + }; + + // Specialization for DeviceCommandQueue defined later + + template<> + struct KernelArgumentHandler { + static size_type size(const LocalSpaceArg &value) { return value.size_; } + static const void *ptr(const LocalSpaceArg &) { return nullptr; } + }; + + } // namespace detail + //! \endcond + + /*! Local + * \brief Helper function for generating LocalSpaceArg objects. + */ + inline LocalSpaceArg Local(size_type size) { + LocalSpaceArg ret = {size}; + return ret; + } + + /*! \brief Class interface for cl_kernel. + * + * \note Copies of these objects are shallow, meaning that the copy will refer + * to the same underlying cl_kernel as the original. For details, see + * clRetainKernel() and clReleaseKernel(). + * + * \see cl_kernel + */ + class Kernel : public detail::Wrapper { + public: + inline Kernel(const Program &program, const char *name, cl_int *err = nullptr); + + //! \brief Default constructor - initializes to nullptr. + Kernel() {} + + /*! \brief Constructor from cl_kernel - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + * This effectively transfers ownership of a refcount on the cl_kernel + * into the new Kernel object. + */ + explicit Kernel(const cl_kernel &kernel, bool retainObject = false) : + detail::Wrapper(kernel, retainObject) {} + + /*! \brief Assignment operator from cl_kernel - takes ownership. + * + * This effectively transfers ownership of a refcount on the rhs and calls + * clReleaseKernel() on the value previously held by this instance. + */ + Kernel &operator=(const cl_kernel &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_kernel_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetKernelInfo, object_, name, param), + __GET_KERNEL_INFO_ERR); + } + + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - template - cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), - __GET_KERNEL_ARG_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getArgInfo(cl_uint argIndex, cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_kernel_arg_info, name>::param_type param; - cl_int result = getArgInfo(argIndex, name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } + template + cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T *param) const { + return detail::errHandler( + detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param), + __GET_KERNEL_ARG_INFO_ERR); + } + + template + typename detail::param_traits::param_type + getArgInfo(cl_uint argIndex, cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getArgInfo(argIndex, name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - template - cl_int getWorkGroupInfo( - const Device& device, cl_kernel_work_group_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetKernelWorkGroupInfo, object_, device(), name, param), - __GET_KERNEL_WORK_GROUP_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getWorkGroupInfo(const Device& device, cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_kernel_work_group_info, name>::param_type param; - cl_int result = getWorkGroupInfo(device, name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - -#if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) || CL_HPP_TARGET_OPENCL_VERSION >= 210 - cl_int getSubGroupInfo(const cl::Device &dev, cl_kernel_sub_group_info name, const cl::NDRange &range, size_type* param) const - { -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - return detail::errHandler( - clGetKernelSubGroupInfo(object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), - __GET_KERNEL_SUB_GROUP_INFO_ERR); - -#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + template + cl_int getWorkGroupInfo(const Device &device, cl_kernel_work_group_info name, + T *param) const { + return detail::errHandler( + detail::getInfo(&::clGetKernelWorkGroupInfo, object_, device(), name, param), + __GET_KERNEL_WORK_GROUP_INFO_ERR); + } + + template + typename detail::param_traits::param_type + getWorkGroupInfo(const Device &device, cl_int *err = nullptr) const { + typename detail::param_traits::param_type + param; + cl_int result = getWorkGroupInfo(device, name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } - typedef clGetKernelSubGroupInfoKHR_fn PFN_clGetKernelSubGroupInfoKHR; - static PFN_clGetKernelSubGroupInfoKHR pfn_clGetKernelSubGroupInfoKHR = nullptr; - CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetKernelSubGroupInfoKHR); - - return detail::errHandler( - pfn_clGetKernelSubGroupInfoKHR(object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), - __GET_KERNEL_SUB_GROUP_INFO_ERR); - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - } - - template - size_type getSubGroupInfo(const cl::Device &dev, const cl::NDRange &range, cl_int* err = nullptr) const - { - size_type param; - cl_int result = getSubGroupInfo(dev, name, range, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } +#if defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) || CL_HPP_TARGET_OPENCL_VERSION >= 210 + cl_int getSubGroupInfo(const cl::Device &dev, cl_kernel_sub_group_info name, + const cl::NDRange &range, size_type *param) const { +# if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + return detail::errHandler( + clGetKernelSubGroupInfo( + object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), + __GET_KERNEL_SUB_GROUP_INFO_ERR); + +# else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + typedef clGetKernelSubGroupInfoKHR_fn PFN_clGetKernelSubGroupInfoKHR; + static PFN_clGetKernelSubGroupInfoKHR pfn_clGetKernelSubGroupInfoKHR = nullptr; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetKernelSubGroupInfoKHR); + + return detail::errHandler( + pfn_clGetKernelSubGroupInfoKHR( + object_, dev(), name, range.size(), range.get(), sizeof(size_type), param, nullptr), + __GET_KERNEL_SUB_GROUP_INFO_ERR); + +# endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + } + + template + size_type getSubGroupInfo(const cl::Device &dev, const cl::NDRange &range, + cl_int *err = nullptr) const { + size_type param; + cl_int result = getSubGroupInfo(dev, name, range, ¶m); + if (err != nullptr) { *err = result; } + return param; + } #endif // defined(CL_HPP_USE_CL_SUB_GROUPS_KHR) || CL_HPP_TARGET_OPENCL_VERSION >= 210 #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /*! \brief setArg overload taking a shared_ptr type - */ - template - cl_int setArg(cl_uint index, const cl::pointer &argPtr) - { - return detail::errHandler( - ::clSetKernelArgSVMPointer(object_, index, argPtr.get()), - __SET_KERNEL_ARGS_ERR); - } - - /*! \brief setArg overload taking a vector type. - */ - template - cl_int setArg(cl_uint index, const cl::vector &argPtr) - { - return detail::errHandler( - ::clSetKernelArgSVMPointer(object_, index, argPtr.data()), - __SET_KERNEL_ARGS_ERR); - } - - /*! \brief setArg overload taking a pointer type - */ - template - typename std::enable_if::value, cl_int>::type - setArg(cl_uint index, const T argPtr) - { - return detail::errHandler( - ::clSetKernelArgSVMPointer(object_, index, argPtr), - __SET_KERNEL_ARGS_ERR); - } + /*! \brief setArg overload taking a shared_ptr type + */ + template + cl_int setArg(cl_uint index, const cl::pointer &argPtr) { + return detail::errHandler(::clSetKernelArgSVMPointer(object_, index, argPtr.get()), + __SET_KERNEL_ARGS_ERR); + } + + /*! \brief setArg overload taking a vector type. + */ + template + cl_int setArg(cl_uint index, const cl::vector &argPtr) { + return detail::errHandler(::clSetKernelArgSVMPointer(object_, index, argPtr.data()), + __SET_KERNEL_ARGS_ERR); + } + + /*! \brief setArg overload taking a pointer type + */ + template + typename std::enable_if::value, cl_int>::type setArg(cl_uint index, + const T argPtr) { + return detail::errHandler(::clSetKernelArgSVMPointer(object_, index, argPtr), + __SET_KERNEL_ARGS_ERR); + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /*! \brief setArg overload taking a POD type - */ - template - typename std::enable_if::value, cl_int>::type - setArg(cl_uint index, const T &value) - { - return detail::errHandler( - ::clSetKernelArg( - object_, - index, - detail::KernelArgumentHandler::size(value), - detail::KernelArgumentHandler::ptr(value)), - __SET_KERNEL_ARGS_ERR); - } - - cl_int setArg(cl_uint index, size_type size, const void* argPtr) - { - return detail::errHandler( - ::clSetKernelArg(object_, index, size, argPtr), - __SET_KERNEL_ARGS_ERR); - } + /*! \brief setArg overload taking a POD type + */ + template + typename std::enable_if::value, cl_int>::type setArg(cl_uint index, + const T &value) { + return detail::errHandler( + ::clSetKernelArg(object_, + index, + detail::KernelArgumentHandler::size(value), + detail::KernelArgumentHandler::ptr(value)), + __SET_KERNEL_ARGS_ERR); + } + + cl_int setArg(cl_uint index, size_type size, const void *argPtr) { + return detail::errHandler(::clSetKernelArg(object_, index, size, argPtr), + __SET_KERNEL_ARGS_ERR); + } #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /*! - * Specify a vector of SVM pointers that the kernel may access in - * addition to its arguments. - */ - cl_int setSVMPointers(const vector &pointerList) - { - return detail::errHandler( - ::clSetKernelExecInfo( - object_, - CL_KERNEL_EXEC_INFO_SVM_PTRS, - sizeof(void*)*pointerList.size(), - pointerList.data())); - } - - /*! - * Specify a std::array of SVM pointers that the kernel may access in - * addition to its arguments. - */ - template - cl_int setSVMPointers(const std::array &pointerList) - { - return detail::errHandler( - ::clSetKernelExecInfo( - object_, - CL_KERNEL_EXEC_INFO_SVM_PTRS, - sizeof(void*)*pointerList.size(), - pointerList.data())); - } - - /*! \brief Enable fine-grained system SVM. - * - * \note It is only possible to enable fine-grained system SVM if all devices - * in the context associated with kernel support it. - * - * \param svmEnabled True if fine-grained system SVM is requested. False otherwise. - * \return CL_SUCCESS if the function was executed succesfully. CL_INVALID_OPERATION - * if no devices in the context support fine-grained system SVM. - * - * \see clSetKernelExecInfo - */ - cl_int enableFineGrainedSystemSVM(bool svmEnabled) - { - cl_bool svmEnabled_ = svmEnabled ? CL_TRUE : CL_FALSE; - return detail::errHandler( - ::clSetKernelExecInfo( - object_, - CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, - sizeof(cl_bool), - &svmEnabled_ - ) - ); - } - - template - void setSVMPointersHelper(std::array &pointerList, const pointer &t0, const pointer &t1, Ts & ... ts) - { - pointerList[index] = static_cast(t0.get()); - setSVMPointersHelper(pointerList, t1, ts...); - } - - template - typename std::enable_if::value, void>::type - setSVMPointersHelper(std::array &pointerList, T0 t0, T1 t1, Ts... ts) - { - pointerList[index] = static_cast(t0); - setSVMPointersHelper(pointerList, t1, ts...); - } - - template - void setSVMPointersHelper(std::array &pointerList, const pointer &t0) - { - pointerList[index] = static_cast(t0.get()); - } - - - template - typename std::enable_if::value, void>::type - setSVMPointersHelper(std::array &pointerList, T0 t0) - { - pointerList[index] = static_cast(t0); - } - - template - cl_int setSVMPointers(const T0 &t0, Ts & ... ts) - { - std::array pointerList; - - setSVMPointersHelper<0, 1 + sizeof...(Ts)>(pointerList, t0, ts...); - return detail::errHandler( - ::clSetKernelExecInfo( - object_, - CL_KERNEL_EXEC_INFO_SVM_PTRS, - sizeof(void*)*(1 + sizeof...(Ts)), - pointerList.data())); - } - - template - cl_int setExecInfo(cl_kernel_exec_info param_name, const T& val) - { - return detail::errHandler( - ::clSetKernelExecInfo( - object_, - param_name, - sizeof(T), - &val)); - } - - template - cl_int setExecInfo(typename detail::param_traits::param_type& val) - { - return setExecInfo(name, val); - } + /*! + * Specify a vector of SVM pointers that the kernel may access in + * addition to its arguments. + */ + cl_int setSVMPointers(const vector &pointerList) { + return detail::errHandler(::clSetKernelExecInfo(object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void *) * pointerList.size(), + pointerList.data())); + } + + /*! + * Specify a std::array of SVM pointers that the kernel may access in + * addition to its arguments. + */ + template + cl_int setSVMPointers(const std::array &pointerList) { + return detail::errHandler(::clSetKernelExecInfo(object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void *) * pointerList.size(), + pointerList.data())); + } + + /*! \brief Enable fine-grained system SVM. + * + * \note It is only possible to enable fine-grained system SVM if all devices + * in the context associated with kernel support it. + * + * \param svmEnabled True if fine-grained system SVM is requested. False otherwise. + * \return CL_SUCCESS if the function was executed succesfully. CL_INVALID_OPERATION + * if no devices in the context support fine-grained system SVM. + * + * \see clSetKernelExecInfo + */ + cl_int enableFineGrainedSystemSVM(bool svmEnabled) { + cl_bool svmEnabled_ = svmEnabled ? CL_TRUE : CL_FALSE; + return detail::errHandler(::clSetKernelExecInfo( + object_, CL_KERNEL_EXEC_INFO_SVM_FINE_GRAIN_SYSTEM, sizeof(cl_bool), &svmEnabled_)); + } + + template + void setSVMPointersHelper(std::array &pointerList, + const pointer &t0, const pointer &t1, Ts &...ts) { + pointerList[index] = static_cast(t0.get()); + setSVMPointersHelper(pointerList, t1, ts...); + } + + template + typename std::enable_if::value, void>::type + setSVMPointersHelper(std::array &pointerList, T0 t0, T1 t1, Ts... ts) { + pointerList[index] = static_cast(t0); + setSVMPointersHelper(pointerList, t1, ts...); + } + + template + void setSVMPointersHelper(std::array &pointerList, + const pointer &t0) { + pointerList[index] = static_cast(t0.get()); + } + + template + typename std::enable_if::value, void>::type + setSVMPointersHelper(std::array &pointerList, T0 t0) { + pointerList[index] = static_cast(t0); + } + + template + cl_int setSVMPointers(const T0 &t0, Ts &...ts) { + std::array pointerList; + + setSVMPointersHelper<0, 1 + sizeof...(Ts)>(pointerList, t0, ts...); + return detail::errHandler(::clSetKernelExecInfo(object_, + CL_KERNEL_EXEC_INFO_SVM_PTRS, + sizeof(void *) * (1 + sizeof...(Ts)), + pointerList.data())); + } + + template + cl_int setExecInfo(cl_kernel_exec_info param_name, const T &val) { + return detail::errHandler(::clSetKernelExecInfo(object_, param_name, sizeof(T), &val)); + } + + template + cl_int setExecInfo( + typename detail::param_traits::param_type &val) { + return setExecInfo(name, val); + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - /** - * Make a deep copy of the kernel object including its arguments. - * @return A new kernel object with internal state entirely separate from that - * of the original but with any arguments set on the original intact. - */ - Kernel clone() - { - cl_int error; - Kernel retValue(clCloneKernel(this->get(), &error)); - - detail::errHandler(error, __CLONE_KERNEL_ERR); - return retValue; - } + /** + * Make a deep copy of the kernel object including its arguments. + * @return A new kernel object with internal state entirely separate from that + * of the original but with any arguments set on the original intact. + */ + Kernel clone() { + cl_int error; + Kernel retValue(clCloneKernel(this->get(), &error)); + + detail::errHandler(error, __CLONE_KERNEL_ERR); + return retValue; + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 -}; + }; -/*! \class Program - * \brief Program interface that implements cl_program. - */ -class Program : public detail::Wrapper -{ -public: + /*! \class Program + * \brief Program interface that implements cl_program. + */ + class Program : public detail::Wrapper { + public: #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - typedef vector> Binaries; - typedef vector Sources; -#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - typedef vector > Binaries; - typedef vector > Sources; + typedef vector> Binaries; + typedef vector Sources; +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + typedef vector> Binaries; + typedef vector> Sources; #endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - - Program( - const string& source, - bool build = false, - cl_int* err = nullptr) - { - cl_int error; - const char * strings = source.c_str(); - const size_type length = source.size(); + Program(const string &source, bool build = false, cl_int *err = nullptr) { + cl_int error; - Context context = Context::getDefault(err); + const char *strings = source.c_str(); + const size_type length = source.size(); - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); + Context context = Context::getDefault(err); - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + object_ = ::clCreateProgramWithSource(context(), (cl_uint)1, &strings, &length, &error); - if (error == CL_SUCCESS && build) { + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - error = ::clBuildProgram( - object_, - 0, - nullptr, + if (error == CL_SUCCESS && build) { + error = ::clBuildProgram(object_, + 0, + nullptr, #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - "-cl-std=CL2.0", + "-cl-std=CL2.0", #else - "", + "", #endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - nullptr, - nullptr); - - detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - - if (err != nullptr) { - *err = error; - } - } - - Program( - const Context& context, - const string& source, - bool build = false, - cl_int* err = nullptr) - { - cl_int error; - - const char * strings = source.c_str(); - const size_type length = source.size(); - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)1, &strings, &length, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - - if (error == CL_SUCCESS && build) { - error = ::clBuildProgram( - object_, - 0, - nullptr, + nullptr, + nullptr); + + detail::buildErrHandler( + error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != nullptr) { *err = error; } + } + + Program(const Context &context, const string &source, bool build = false, + cl_int *err = nullptr) { + cl_int error; + + const char *strings = source.c_str(); + const size_type length = source.size(); + + object_ = ::clCreateProgramWithSource(context(), (cl_uint)1, &strings, &length, &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + + if (error == CL_SUCCESS && build) { + error = ::clBuildProgram(object_, + 0, + nullptr, #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - "-cl-std=CL2.0", + "-cl-std=CL2.0", #else - "", + "", #endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - nullptr, - nullptr); - - detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - - if (err != nullptr) { - *err = error; - } - } - - /** - * Create a program from a vector of source strings and the default context. - * Does not compile or link the program. - */ - Program( - const Sources& sources, - cl_int* err = nullptr) - { - cl_int error; - Context context = Context::getDefault(err); - - const size_type n = (size_type)sources.size(); - - vector lengths(n); - vector strings(n); - - for (size_type i = 0; i < n; ++i) { -#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - strings[i] = sources[(int)i].data(); - lengths[i] = sources[(int)i].length(); -#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; -#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - } - - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings.data(), lengths.data(), &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != nullptr) { - *err = error; - } - } - - /** - * Create a program from a vector of source strings and a provided context. - * Does not compile or link the program. - */ - Program( - const Context& context, - const Sources& sources, - cl_int* err = nullptr) - { - cl_int error; - - const size_type n = (size_type)sources.size(); - - vector lengths(n); - vector strings(n); - - for (size_type i = 0; i < n; ++i) { + nullptr, + nullptr); + + detail::buildErrHandler( + error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != nullptr) { *err = error; } + } + + /** + * Create a program from a vector of source strings and the default context. + * Does not compile or link the program. + */ + Program(const Sources &sources, cl_int *err = nullptr) { + cl_int error; + Context context = Context::getDefault(err); + + const size_type n = (size_type)sources.size(); + + vector lengths(n); + vector strings(n); + + for (size_type i = 0; i < n; ++i) { #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - strings[i] = sources[(int)i].data(); - lengths[i] = sources[(int)i].length(); -#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - strings[i] = sources[(int)i].first; - lengths[i] = sources[(int)i].second; + strings[i] = sources[(int)i].data(); + lengths[i] = sources[(int)i].length(); +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; #endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - } + } - object_ = ::clCreateProgramWithSource( - context(), (cl_uint)n, strings.data(), lengths.data(), &error); + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings.data(), lengths.data(), &error); - detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); - if (err != nullptr) { - *err = error; - } - } + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != nullptr) { *err = error; } + } + /** + * Create a program from a vector of source strings and a provided context. + * Does not compile or link the program. + */ + Program(const Context &context, const Sources &sources, cl_int *err = nullptr) { + cl_int error; -#if defined(CL_HPP_USE_IL_KHR) || CL_HPP_TARGET_OPENCL_VERSION >= 210 - /** - * Program constructor to allow construction of program from SPIR-V or another IL. - * - * Requires OpenCL 2.1 or newer or the cl_khr_il_program extension. - */ - Program( - const vector& IL, - bool build = false, - cl_int* err = nullptr) - { - cl_int error; - - Context context = Context::getDefault(err); + const size_type n = (size_type)sources.size(); -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + vector lengths(n); + vector strings(n); - object_ = ::clCreateProgramWithIL( - context(), static_cast(IL.data()), IL.size(), &error); + for (size_type i = 0; i < n; ++i) { +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].data(); + lengths[i] = sources[(int)i].length(); +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + strings[i] = sources[(int)i].first; + lengths[i] = sources[(int)i].second; +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + } -#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + object_ = ::clCreateProgramWithSource( + context(), (cl_uint)n, strings.data(), lengths.data(), &error); - typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; - static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = nullptr; - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); + detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR); + if (err != nullptr) { *err = error; } + } - object_ = pfn_clCreateProgramWithILKHR( - context(), static_cast(IL.data()), IL.size(), &error); +#if defined(CL_HPP_USE_IL_KHR) || CL_HPP_TARGET_OPENCL_VERSION >= 210 + /** + * Program constructor to allow construction of program from SPIR-V or another IL. + * + * Requires OpenCL 2.1 or newer or the cl_khr_il_program extension. + */ + Program(const vector &IL, bool build = false, cl_int *err = nullptr) { + cl_int error; + + Context context = Context::getDefault(err); + +# if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + object_ = ::clCreateProgramWithIL( + context(), static_cast(IL.data()), IL.size(), &error); + +# else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; + static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = nullptr; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); + + object_ = pfn_clCreateProgramWithILKHR( + context(), static_cast(IL.data()), IL.size(), &error); + +# endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); + + if (error == CL_SUCCESS && build) { + error = ::clBuildProgram(object_, + 0, + nullptr, +# if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +# else + "", +# endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + nullptr, + nullptr); + + detail::buildErrHandler( + error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != nullptr) { *err = error; } + } + + /** + * Program constructor to allow construction of program from SPIR-V or another IL + * for a specific context. + * + * Requires OpenCL 2.1 or newer or the cl_khr_il_program extension. + */ + Program(const Context &context, const vector &IL, bool build = false, + cl_int *err = nullptr) { + cl_int error; + +# if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + object_ = ::clCreateProgramWithIL( + context(), static_cast(IL.data()), IL.size(), &error); + +# else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; + static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = nullptr; + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); + + object_ = pfn_clCreateProgramWithILKHR( + context(), static_cast(IL.data()), IL.size(), &error); + +# endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + + detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); + + if (error == CL_SUCCESS && build) { + error = ::clBuildProgram(object_, + 0, + nullptr, +# if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + "-cl-std=CL2.0", +# else + "", +# endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) + nullptr, + nullptr); + + detail::buildErrHandler( + error, __BUILD_PROGRAM_ERR, getBuildInfo()); + } + + if (err != nullptr) { *err = error; } + } +#endif // defined(CL_HPP_USE_IL_KHR) || CL_HPP_TARGET_OPENCL_VERSION >= 210 -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /** + * Construct a program object from a list of devices and a per-device list of binaries. + * \param context A valid OpenCL context in which to construct the program. + * \param devices A vector of OpenCL device objects for which the program will be created. + * \param binaries A vector of pairs of a pointer to a binary object and its length. + * \param binaryStatus An optional vector that on completion will be resized to + * match the size of binaries and filled with values to specify if each binary + * was successfully loaded. + * Set to CL_SUCCESS if the binary was successfully loaded. + * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is nullptr. + * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. + * \param err if non-nullptr will be set to CL_SUCCESS on successful operation or one of the + * following errors: CL_INVALID_CONTEXT if context is not a valid context. CL_INVALID_VALUE + * if the length of devices is zero; or if the length of binaries does not match the length + * of devices; or if any entry in binaries is nullptr or has length 0. CL_INVALID_DEVICE if + * OpenCL devices listed in devices are not in the list of devices associated with context. + * CL_INVALID_BINARY if an invalid program binary was encountered for any device. + * binaryStatus will return specific status for each device. CL_OUT_OF_HOST_MEMORY if there + * is a failure to allocate resources required by the OpenCL implementation on the host. + */ + Program(const Context &context, const vector &devices, const Binaries &binaries, + vector *binaryStatus = nullptr, cl_int *err = nullptr) { + cl_int error; + + const size_type numDevices = devices.size(); + + // Catch size mismatch early and return + if (binaries.size() != numDevices) { + error = CL_INVALID_VALUE; + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != nullptr) { *err = error; } + return; + } + + vector lengths(numDevices); + vector images(numDevices); +#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + for (size_type i = 0; i < numDevices; ++i) { + images[i] = binaries[i].data(); + lengths[i] = binaries[(int)i].size(); + } +#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) + for (size_type i = 0; i < numDevices; ++i) { + images[i] = (const unsigned char *)binaries[i].first; + lengths[i] = binaries[(int)i].second; + } +#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); + vector deviceIDs(numDevices); + for (size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } - if (error == CL_SUCCESS && build) { + if (binaryStatus) { binaryStatus->resize(numDevices); } - error = ::clBuildProgram( - object_, - 0, - nullptr, -#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - "-cl-std=CL2.0", -#else - "", -#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - nullptr, - nullptr); - - detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - - if (err != nullptr) { - *err = error; - } - } - - /** - * Program constructor to allow construction of program from SPIR-V or another IL - * for a specific context. - * - * Requires OpenCL 2.1 or newer or the cl_khr_il_program extension. - */ - Program( - const Context& context, - const vector& IL, - bool build = false, - cl_int* err = nullptr) - { - cl_int error; + object_ = ::clCreateProgramWithBinary( + context(), + (cl_uint)devices.size(), + deviceIDs.data(), + lengths.data(), + images.data(), + (binaryStatus != nullptr && numDevices > 0) ? &binaryStatus->front() : nullptr, + &error); -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 + detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); + if (err != nullptr) { *err = error; } + } - object_ = ::clCreateProgramWithIL( - context(), static_cast(IL.data()), IL.size(), &error); +#if CL_HPP_TARGET_OPENCL_VERSION >= 120 + /** + * Create program using builtin kernels. + * \param kernelNames Semi-colon separated list of builtin kernel names + */ + Program(const Context &context, const vector &devices, const string &kernelNames, + cl_int *err = nullptr) { + cl_int error; + + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); + for (size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } + + object_ = ::clCreateProgramWithBuiltInKernels( + context(), (cl_uint)devices.size(), deviceIDs.data(), kernelNames.c_str(), &error); + + detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); + if (err != nullptr) { *err = error; } + } +#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -#else // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + Program() {} - typedef clCreateProgramWithILKHR_fn PFN_clCreateProgramWithILKHR; - static PFN_clCreateProgramWithILKHR pfn_clCreateProgramWithILKHR = nullptr; - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateProgramWithILKHR); + /*! \brief Constructor from cl_program - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit Program(const cl_program &program, bool retainObject = false) : + detail::Wrapper(program, retainObject) {} - object_ = pfn_clCreateProgramWithILKHR( - context(), static_cast(IL.data()), IL.size(), &error); + Program &operator=(const cl_program &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + cl_int build(const vector &devices, const char *options = nullptr, + void(CL_CALLBACK *notifyFptr)(cl_program, void *) = nullptr, + void *data = nullptr) const { + size_type numDevices = devices.size(); + vector deviceIDs(numDevices); - detail::errHandler(error, __CREATE_PROGRAM_WITH_IL_ERR); + for (size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex) { + deviceIDs[deviceIndex] = (devices[deviceIndex])(); + } - if (error == CL_SUCCESS && build) { - error = ::clBuildProgram( - object_, - 0, - nullptr, -#if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - "-cl-std=CL2.0", -#else - "", -#endif // #if !defined(CL_HPP_CL_1_2_DEFAULT_BUILD) - nullptr, - nullptr); + cl_int buildError = ::clBuildProgram( + object_, (cl_uint)devices.size(), deviceIDs.data(), options, notifyFptr, data); - detail::buildErrHandler(error, __BUILD_PROGRAM_ERR, getBuildInfo()); - } + return detail::buildErrHandler( + buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); + } - if (err != nullptr) { - *err = error; - } - } -#endif // defined(CL_HPP_USE_IL_KHR) || CL_HPP_TARGET_OPENCL_VERSION >= 210 + cl_int build(const Device &device, const char *options = nullptr, + void(CL_CALLBACK *notifyFptr)(cl_program, void *) = nullptr, + void *data = nullptr) const { + cl_device_id deviceID = device(); - /** - * Construct a program object from a list of devices and a per-device list of binaries. - * \param context A valid OpenCL context in which to construct the program. - * \param devices A vector of OpenCL device objects for which the program will be created. - * \param binaries A vector of pairs of a pointer to a binary object and its length. - * \param binaryStatus An optional vector that on completion will be resized to - * match the size of binaries and filled with values to specify if each binary - * was successfully loaded. - * Set to CL_SUCCESS if the binary was successfully loaded. - * Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is nullptr. - * Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device. - * \param err if non-nullptr will be set to CL_SUCCESS on successful operation or one of the following errors: - * CL_INVALID_CONTEXT if context is not a valid context. - * CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; - * or if any entry in binaries is nullptr or has length 0. - * CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context. - * CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device. - * CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host. - */ - Program( - const Context& context, - const vector& devices, - const Binaries& binaries, - vector* binaryStatus = nullptr, - cl_int* err = nullptr) - { - cl_int error; - - const size_type numDevices = devices.size(); - - // Catch size mismatch early and return - if(binaries.size() != numDevices) { - error = CL_INVALID_VALUE; - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != nullptr) { - *err = error; - } - return; - } - - - vector lengths(numDevices); - vector images(numDevices); -#if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - for (size_type i = 0; i < numDevices; ++i) { - images[i] = binaries[i].data(); - lengths[i] = binaries[(int)i].size(); - } -#else // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - for (size_type i = 0; i < numDevices; ++i) { - images[i] = (const unsigned char*)binaries[i].first; - lengths[i] = binaries[(int)i].second; - } -#endif // #if !defined(CL_HPP_ENABLE_PROGRAM_CONSTRUCTION_FROM_ARRAY_COMPATIBILITY) - - vector deviceIDs(numDevices); - for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - if(binaryStatus) { - binaryStatus->resize(numDevices); - } - - object_ = ::clCreateProgramWithBinary( - context(), (cl_uint) devices.size(), - deviceIDs.data(), - lengths.data(), images.data(), (binaryStatus != nullptr && numDevices > 0) - ? &binaryStatus->front() - : nullptr, &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR); - if (err != nullptr) { - *err = error; - } - } - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - /** - * Create program using builtin kernels. - * \param kernelNames Semi-colon separated list of builtin kernel names - */ - Program( - const Context& context, - const vector& devices, - const string& kernelNames, - cl_int* err = nullptr) - { - cl_int error; - - - size_type numDevices = devices.size(); - vector deviceIDs(numDevices); - for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - object_ = ::clCreateProgramWithBuiltInKernels( - context(), - (cl_uint) devices.size(), - deviceIDs.data(), - kernelNames.c_str(), - &error); - - detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR); - if (err != nullptr) { - *err = error; - } - } -#endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_int buildError = ::clBuildProgram(object_, 1, &deviceID, options, notifyFptr, data); - Program() { } - - - /*! \brief Constructor from cl_program - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - */ - explicit Program(const cl_program& program, bool retainObject = false) : - detail::Wrapper(program, retainObject) { } - - Program& operator = (const cl_program& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - - cl_int build( - const vector& devices, - const char* options = nullptr, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = nullptr, - void* data = nullptr) const - { - size_type numDevices = devices.size(); - vector deviceIDs(numDevices); - - for( size_type deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) { - deviceIDs[deviceIndex] = (devices[deviceIndex])(); - } - - cl_int buildError = ::clBuildProgram( - object_, - (cl_uint) - devices.size(), - deviceIDs.data(), - options, - notifyFptr, - data); - - return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); - } - - cl_int build( - const Device& device, - const char* options = nullptr, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = nullptr, - void* data = nullptr) const - { - cl_device_id deviceID = device(); - - cl_int buildError = ::clBuildProgram( - object_, - 1, - &deviceID, - options, - notifyFptr, - data); - - BuildLogType buildLog(0); - buildLog.push_back(std::make_pair(device, getBuildInfo(device))); - return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, buildLog); - } - - cl_int build( - const char* options = nullptr, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = nullptr, - void* data = nullptr) const - { - cl_int buildError = ::clBuildProgram( - object_, - 0, - nullptr, - options, - notifyFptr, - data); - - return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); - } + BuildLogType buildLog(0); + buildLog.push_back(std::make_pair(device, getBuildInfo(device))); + return detail::buildErrHandler(buildError, __BUILD_PROGRAM_ERR, buildLog); + } + + cl_int build(const char *options = nullptr, + void(CL_CALLBACK *notifyFptr)(cl_program, void *) = nullptr, + void *data = nullptr) const { + cl_int buildError = ::clBuildProgram(object_, 0, nullptr, options, notifyFptr, data); + + return detail::buildErrHandler( + buildError, __BUILD_PROGRAM_ERR, getBuildInfo()); + } #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl_int compile( - const char* options = nullptr, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = nullptr, - void* data = nullptr) const - { - cl_int error = ::clCompileProgram( - object_, - 0, - nullptr, - options, - 0, - nullptr, - nullptr, - notifyFptr, - data); - return detail::buildErrHandler(error, __COMPILE_PROGRAM_ERR, getBuildInfo()); - } + cl_int compile(const char *options = nullptr, + void(CL_CALLBACK *notifyFptr)(cl_program, void *) = nullptr, + void *data = nullptr) const { + cl_int error = ::clCompileProgram( + object_, 0, nullptr, options, 0, nullptr, nullptr, notifyFptr, data); + return detail::buildErrHandler( + error, __COMPILE_PROGRAM_ERR, getBuildInfo()); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - template - cl_int getInfo(cl_program_info name, T* param) const - { - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_program_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - - template - cl_int getBuildInfo( - const Device& device, cl_program_build_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetProgramBuildInfo, object_, device(), name, param), - __GET_PROGRAM_BUILD_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getBuildInfo(const Device& device, cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - cl_int result = getBuildInfo(device, name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - - /** - * Build info function that returns a vector of device/info pairs for the specified - * info type and for all devices in the program. - * On an error reading the info for any device, an empty vector of info will be returned. - */ - template - vector::param_type>> - getBuildInfo(cl_int *err = nullptr) const - { - cl_int result = CL_SUCCESS; - - auto devs = getInfo(&result); - vector::param_type>> - devInfo; - - // If there was an initial error from getInfo return the error - if (result != CL_SUCCESS) { - if (err != nullptr) { - *err = result; - } - return devInfo; - } - - for (const cl::Device &d : devs) { - typename detail::param_traits< - detail::cl_program_build_info, name>::param_type param; - result = getBuildInfo(d, name, ¶m); - devInfo.push_back( - std::pair::param_type> - (d, param)); - if (result != CL_SUCCESS) { - // On error, leave the loop and return the error code - break; - } - } - if (err != nullptr) { - *err = result; - } - if (result != CL_SUCCESS) { - devInfo.clear(); - } - return devInfo; - } - - cl_int createKernels(vector* kernels) - { - cl_uint numKernels; - cl_int err = ::clCreateKernelsInProgram(object_, 0, nullptr, &numKernels); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - vector value(numKernels); - - err = ::clCreateKernelsInProgram( - object_, numKernels, value.data(), nullptr); - if (err != CL_SUCCESS) { - return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); - } - - if (kernels) { - kernels->resize(value.size()); - - // Assign to param, constructing with retain behaviour - // to correctly capture each underlying CL object - for (size_type i = 0; i < value.size(); i++) { - // We do not need to retain because this kernel is being created - // by the runtime - (*kernels)[i] = Kernel(value[i], false); - } - } - return CL_SUCCESS; - } + template + cl_int getInfo(cl_program_info name, T *param) const { + return detail::errHandler(detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + + template + cl_int getBuildInfo(const Device &device, cl_program_build_info name, T *param) const { + return detail::errHandler( + detail::getInfo(&::clGetProgramBuildInfo, object_, device(), name, param), + __GET_PROGRAM_BUILD_INFO_ERR); + } + + template + typename detail::param_traits::param_type + getBuildInfo(const Device &device, cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getBuildInfo(device, name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + + /** + * Build info function that returns a vector of device/info pairs for the specified + * info type and for all devices in the program. + * On an error reading the info for any device, an empty vector of info will be returned. + */ + template + vector::param_type>> + getBuildInfo(cl_int *err = nullptr) const { + cl_int result = CL_SUCCESS; + + auto devs = getInfo(&result); + vector::param_type>> + devInfo; + + // If there was an initial error from getInfo return the error + if (result != CL_SUCCESS) { + if (err != nullptr) { *err = result; } + return devInfo; + } + + for (const cl::Device &d : devs) { + typename detail::param_traits::param_type + param; + result = getBuildInfo(d, name, ¶m); + devInfo.push_back( + std::pair< + cl::Device, + typename detail::param_traits::param_type>( + d, param)); + if (result != CL_SUCCESS) { + // On error, leave the loop and return the error code + break; + } + } + if (err != nullptr) { *err = result; } + if (result != CL_SUCCESS) { devInfo.clear(); } + return devInfo; + } + + cl_int createKernels(vector *kernels) { + cl_uint numKernels; + cl_int err = ::clCreateKernelsInProgram(object_, 0, nullptr, &numKernels); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + vector value(numKernels); + + err = ::clCreateKernelsInProgram(object_, numKernels, value.data(), nullptr); + if (err != CL_SUCCESS) { + return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR); + } + + if (kernels) { + kernels->resize(value.size()); + + // Assign to param, constructing with retain behaviour + // to correctly capture each underlying CL object + for (size_type i = 0; i < value.size(); i++) { + // We do not need to retain because this kernel is being created + // by the runtime + (*kernels)[i] = Kernel(value[i], false); + } + } + return CL_SUCCESS; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 220 -#if defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) - /*! \brief Registers a callback function to be called when destructors for - * program scope global variables are complete and before the - * program is released. - * - * Wraps clSetProgramReleaseCallback(). - * - * Each call to this function registers the specified user callback function - * on a callback stack associated with program. The registered user callback - * functions are called in the reverse order in which they were registered. - */ - CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int setReleaseCallback( - void (CL_CALLBACK * pfn_notify)(cl_program program, void * user_data), - void * user_data = nullptr) CL_API_SUFFIX__VERSION_2_2_DEPRECATED - { - return detail::errHandler( - ::clSetProgramReleaseCallback( - object_, - pfn_notify, - user_data), - __SET_PROGRAM_RELEASE_CALLBACK_ERR); - } -#endif // #if defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) - - /*! \brief Sets a SPIR-V specialization constant. - * - * Wraps clSetProgramSpecializationConstant(). - */ - template - typename std::enable_if::value, cl_int>::type - setSpecializationConstant(cl_uint index, const T &value) - { - return detail::errHandler( - ::clSetProgramSpecializationConstant( - object_, - index, - sizeof(value), - &value), - __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); - } - - /*! \brief Sets a SPIR-V specialization constant. - * - * Wraps clSetProgramSpecializationConstant(). - */ - cl_int setSpecializationConstant(cl_uint index, size_type size, const void* value) - { - return detail::errHandler( - ::clSetProgramSpecializationConstant( - object_, - index, - size, - value), - __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); - } +# if defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) + /*! \brief Registers a callback function to be called when destructors for + * program scope global variables are complete and before the + * program is released. + * + * Wraps clSetProgramReleaseCallback(). + * + * Each call to this function registers the specified user callback function + * on a callback stack associated with program. The registered user callback + * functions are called in the reverse order in which they were registered. + */ + CL_API_PREFIX__VERSION_2_2_DEPRECATED cl_int + setReleaseCallback(void(CL_CALLBACK *pfn_notify)(cl_program program, void *user_data), + void *user_data = nullptr) CL_API_SUFFIX__VERSION_2_2_DEPRECATED { + return detail::errHandler(::clSetProgramReleaseCallback(object_, pfn_notify, user_data), + __SET_PROGRAM_RELEASE_CALLBACK_ERR); + } +# endif // #if defined(CL_USE_DEPRECATED_OPENCL_2_2_APIS) + + /*! \brief Sets a SPIR-V specialization constant. + * + * Wraps clSetProgramSpecializationConstant(). + */ + template + typename std::enable_if::value, cl_int>::type + setSpecializationConstant(cl_uint index, const T &value) { + return detail::errHandler( + ::clSetProgramSpecializationConstant(object_, index, sizeof(value), &value), + __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); + } + + /*! \brief Sets a SPIR-V specialization constant. + * + * Wraps clSetProgramSpecializationConstant(). + */ + cl_int setSpecializationConstant(cl_uint index, size_type size, const void *value) { + return detail::errHandler( + ::clSetProgramSpecializationConstant(object_, index, size, value), + __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 -}; + }; #if CL_HPP_TARGET_OPENCL_VERSION >= 120 -inline Program linkProgram( - const Program& input1, - const Program& input2, - const char* options = nullptr, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = nullptr, - void* data = nullptr, - cl_int* err = nullptr) -{ - cl_int error_local = CL_SUCCESS; - cl_program programs[2] = { input1(), input2() }; - - Context ctx = input1.getInfo(&error_local); - if(error_local!=CL_SUCCESS) { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - - cl_program prog = ::clLinkProgram( - ctx(), - 0, - nullptr, - options, - 2, - programs, - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); - if (err != nullptr) { - *err = error_local; - } - - return Program(prog); -} - -inline Program linkProgram( - const vector& inputPrograms, - const char* options = nullptr, - void (CL_CALLBACK * notifyFptr)(cl_program, void *) = nullptr, - void* data = nullptr, - cl_int* err = nullptr) -{ - cl_int error_local = CL_SUCCESS; - Context ctx; - - static_assert(sizeof(cl::Program) == sizeof(cl_program), - "Size of cl::Program must be equal to size of cl_program"); - - if(inputPrograms.size() > 0) { - ctx = inputPrograms[0].getInfo(&error_local); - if(error_local!=CL_SUCCESS) { - detail::errHandler(error_local, __LINK_PROGRAM_ERR); - } - } - - cl_program prog = ::clLinkProgram( - ctx(), - 0, - nullptr, - options, - static_cast(inputPrograms.size()), - reinterpret_cast(inputPrograms.data()), - notifyFptr, - data, - &error_local); - - detail::errHandler(error_local,__COMPILE_PROGRAM_ERR); - if (err != nullptr) { - *err = error_local; - } - - return Program(prog); -} + inline Program linkProgram(const Program &input1, const Program &input2, + const char *options = nullptr, + void(CL_CALLBACK *notifyFptr)(cl_program, void *) = nullptr, + void *data = nullptr, cl_int *err = nullptr) { + cl_int error_local = CL_SUCCESS; + cl_program programs[2] = {input1(), input2()}; + + Context ctx = input1.getInfo(&error_local); + if (error_local != CL_SUCCESS) { detail::errHandler(error_local, __LINK_PROGRAM_ERR); } + + cl_program prog = + ::clLinkProgram(ctx(), 0, nullptr, options, 2, programs, notifyFptr, data, &error_local); + + detail::errHandler(error_local, __COMPILE_PROGRAM_ERR); + if (err != nullptr) { *err = error_local; } + + return Program(prog); + } + + inline Program linkProgram(const vector &inputPrograms, const char *options = nullptr, + void(CL_CALLBACK *notifyFptr)(cl_program, void *) = nullptr, + void *data = nullptr, cl_int *err = nullptr) { + cl_int error_local = CL_SUCCESS; + Context ctx; + + static_assert(sizeof(cl::Program) == sizeof(cl_program), + "Size of cl::Program must be equal to size of cl_program"); + + if (inputPrograms.size() > 0) { + ctx = inputPrograms[0].getInfo(&error_local); + if (error_local != CL_SUCCESS) { detail::errHandler(error_local, __LINK_PROGRAM_ERR); } + } + + cl_program prog = + ::clLinkProgram(ctx(), + 0, + nullptr, + options, + static_cast(inputPrograms.size()), + reinterpret_cast(inputPrograms.data()), + notifyFptr, + data, + &error_local); + + detail::errHandler(error_local, __COMPILE_PROGRAM_ERR); + if (err != nullptr) { *err = error_local; } + + return Program(prog); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 -// Template specialization for CL_PROGRAM_BINARIES -template <> -inline cl_int cl::Program::getInfo(cl_program_info name, vector>* param) const -{ - if (name != CL_PROGRAM_BINARIES) { - return CL_INVALID_VALUE; - } - if (param) { - // Resize the parameter array appropriately for each allocation - // and pass down to the helper - - vector sizes = getInfo(); - size_type numBinaries = sizes.size(); - - // Resize the parameter array and constituent arrays - param->resize(numBinaries); - for (size_type i = 0; i < numBinaries; ++i) { - (*param)[i].resize(sizes[i]); - } - - return detail::errHandler( - detail::getInfo(&::clGetProgramInfo, object_, name, param), - __GET_PROGRAM_INFO_ERR); - } - - return CL_SUCCESS; -} - -template<> -inline vector> cl::Program::getInfo(cl_int* err) const -{ - vector> binariesVectors; - - cl_int result = getInfo(CL_PROGRAM_BINARIES, &binariesVectors); - if (err != nullptr) { - *err = result; - } - return binariesVectors; -} + // Template specialization for CL_PROGRAM_BINARIES + template<> + inline cl_int cl::Program::getInfo(cl_program_info name, + vector> *param) const { + if (name != CL_PROGRAM_BINARIES) { return CL_INVALID_VALUE; } + if (param) { + // Resize the parameter array appropriately for each allocation + // and pass down to the helper + + vector sizes = getInfo(); + size_type numBinaries = sizes.size(); + + // Resize the parameter array and constituent arrays + param->resize(numBinaries); + for (size_type i = 0; i < numBinaries; ++i) { (*param)[i].resize(sizes[i]); } + + return detail::errHandler(detail::getInfo(&::clGetProgramInfo, object_, name, param), + __GET_PROGRAM_INFO_ERR); + } + + return CL_SUCCESS; + } + + template<> + inline vector> + cl::Program::getInfo(cl_int *err) const { + vector> binariesVectors; + + cl_int result = getInfo(CL_PROGRAM_BINARIES, &binariesVectors); + if (err != nullptr) { *err = result; } + return binariesVectors; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 220 -// Template specialization for clSetProgramSpecializationConstant -template <> -inline cl_int cl::Program::setSpecializationConstant(cl_uint index, const bool &value) -{ - cl_uchar ucValue = value ? CL_UCHAR_MAX : 0; - return detail::errHandler( - ::clSetProgramSpecializationConstant( - object_, - index, - sizeof(ucValue), - &ucValue), - __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); -} + // Template specialization for clSetProgramSpecializationConstant + template<> + inline cl_int cl::Program::setSpecializationConstant(cl_uint index, const bool &value) { + cl_uchar ucValue = value ? CL_UCHAR_MAX : 0; + return detail::errHandler( + ::clSetProgramSpecializationConstant(object_, index, sizeof(ucValue), &ucValue), + __SET_PROGRAM_SPECIALIZATION_CONSTANT_ERR); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 220 -inline Kernel::Kernel(const Program& program, const char* name, cl_int* err) -{ - cl_int error; - - object_ = ::clCreateKernel(program(), name, &error); - detail::errHandler(error, __CREATE_KERNEL_ERR); + inline Kernel::Kernel(const Program &program, const char *name, cl_int *err) { + cl_int error; - if (err != nullptr) { - *err = error; - } + object_ = ::clCreateKernel(program(), name, &error); + detail::errHandler(error, __CREATE_KERNEL_ERR); -} + if (err != nullptr) { *err = error; } + } #ifdef cl_khr_external_memory -enum class ExternalMemoryType : cl_external_memory_handle_type_khr -{ - None = 0, + enum class ExternalMemoryType : cl_external_memory_handle_type_khr { + None = 0, - OpaqueFd = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, - OpaqueWin32 = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR, - OpaqueWin32Kmt = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR, + OpaqueFd = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_FD_KHR, + OpaqueWin32 = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KHR, + OpaqueWin32Kmt = CL_EXTERNAL_MEMORY_HANDLE_OPAQUE_WIN32_KMT_KHR, - D3D11Texture = CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR, - D3D11TextureKmt = CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR, + D3D11Texture = CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KHR, + D3D11TextureKmt = CL_EXTERNAL_MEMORY_HANDLE_D3D11_TEXTURE_KMT_KHR, - D3D12Heap = CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR, - D3D12Resource = CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR, + D3D12Heap = CL_EXTERNAL_MEMORY_HANDLE_D3D12_HEAP_KHR, + D3D12Resource = CL_EXTERNAL_MEMORY_HANDLE_D3D12_RESOURCE_KHR, - DmaBuf = CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR, -}; + DmaBuf = CL_EXTERNAL_MEMORY_HANDLE_DMA_BUF_KHR, + }; #endif -enum class QueueProperties : cl_command_queue_properties -{ - None = 0, - Profiling = CL_QUEUE_PROFILING_ENABLE, - OutOfOrder = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, -}; - -inline QueueProperties operator|(QueueProperties lhs, QueueProperties rhs) -{ - return static_cast(static_cast(lhs) | static_cast(rhs)); -} - -inline QueueProperties operator&(QueueProperties lhs, QueueProperties rhs) -{ - return static_cast(static_cast(lhs) & static_cast(rhs)); -} - -/*! \class CommandQueue - * \brief CommandQueue interface for cl_command_queue. - */ -class CommandQueue : public detail::Wrapper -{ -private: - static std::once_flag default_initialized_; - static CommandQueue default_; - static cl_int default_error_; - - /*! \brief Create the default command queue returned by @ref getDefault. - * - * It sets default_error_ to indicate success or failure. It does not throw - * @c cl::Error. - */ - static void makeDefault() - { - /* We don't want to throw an error from this function, so we have to - * catch and set the error flag. - */ + enum class QueueProperties : cl_command_queue_properties { + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, + OutOfOrder = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, + }; + + inline QueueProperties operator|(QueueProperties lhs, QueueProperties rhs) { + return static_cast(static_cast(lhs) | + static_cast(rhs)); + } + + inline QueueProperties operator&(QueueProperties lhs, QueueProperties rhs) { + return static_cast(static_cast(lhs) & + static_cast(rhs)); + } + + /*! \class CommandQueue + * \brief CommandQueue interface for cl_command_queue. + */ + class CommandQueue : public detail::Wrapper { + private: + static std::once_flag default_initialized_; + static CommandQueue default_; + static cl_int default_error_; + + /*! \brief Create the default command queue returned by @ref getDefault. + * + * It sets default_error_ to indicate success or failure. It does not throw + * @c cl::Error. + */ + static void makeDefault() { + /* We don't want to throw an error from this function, so we have to + * catch and set the error flag. + */ #if defined(CL_HPP_ENABLE_EXCEPTIONS) - try + try #endif - { - int error; - Context context = Context::getDefault(&error); - - if (error != CL_SUCCESS) { - default_error_ = error; - } - else { - Device device = Device::getDefault(); - default_ = CommandQueue(context, device, 0, &default_error_); - } - } + { + int error; + Context context = Context::getDefault(&error); + + if (error != CL_SUCCESS) { + default_error_ = error; + } else { + Device device = Device::getDefault(); + default_ = CommandQueue(context, device, 0, &default_error_); + } + } #if defined(CL_HPP_ENABLE_EXCEPTIONS) - catch (cl::Error &e) { - default_error_ = e.err(); - } + catch (cl::Error &e) { + default_error_ = e.err(); + } #endif - } + } - /*! \brief Create the default command queue. - * - * This sets @c default_. It does not throw - * @c cl::Error. - */ - static void makeDefaultProvided(const CommandQueue &c) { - default_ = c; - } + /*! \brief Create the default command queue. + * + * This sets @c default_. It does not throw + * @c cl::Error. + */ + static void makeDefaultProvided(const CommandQueue &c) { default_ = c; } #ifdef cl_khr_external_memory - static std::once_flag ext_memory_initialized_; + static std::once_flag ext_memory_initialized_; - static void initMemoryExtension(const cl::Device& device) - { - auto platform = device.getInfo()(); + static void initMemoryExtension(const cl::Device &device) { + auto platform = device.getInfo()(); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueAcquireExternalMemObjectsKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueReleaseExternalMemObjectsKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueAcquireExternalMemObjectsKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueReleaseExternalMemObjectsKHR); - if ((pfn_clEnqueueAcquireExternalMemObjectsKHR == nullptr) - && (pfn_clEnqueueReleaseExternalMemObjectsKHR == nullptr)) - { - detail::errHandler(CL_INVALID_VALUE, __ENQUEUE_ACQUIRE_EXTERNAL_MEMORY_ERR); - } - } + if ((pfn_clEnqueueAcquireExternalMemObjectsKHR == nullptr) && + (pfn_clEnqueueReleaseExternalMemObjectsKHR == nullptr)) { + detail::errHandler(CL_INVALID_VALUE, __ENQUEUE_ACQUIRE_EXTERNAL_MEMORY_ERR); + } + } #endif // cl_khr_external_memory -public: + public: #ifdef CL_HPP_UNIT_TEST_ENABLE - /*! \brief Reset the default. - * - * This sets @c default_ to an empty value to support cleanup in - * the unit test framework. - * This function is not thread safe. - */ - static void unitTestClearDefault() { - default_ = CommandQueue(); - } + /*! \brief Reset the default. + * + * This sets @c default_ to an empty value to support cleanup in + * the unit test framework. + * This function is not thread safe. + */ + static void unitTestClearDefault() { default_ = CommandQueue(); } #endif // #ifdef CL_HPP_UNIT_TEST_ENABLE - - - /*! - * \brief Constructs a CommandQueue based on passed properties. - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - CommandQueue( - cl_command_queue_properties properties, - cl_int* err = nullptr) - { - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - if (err != nullptr) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - bool useWithProperties; + + /*! + * \brief Constructs a CommandQueue based on passed properties. + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue(cl_command_queue_properties properties, cl_int *err = nullptr) { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != nullptr) { *err = error; } + } else { + Device device = context.getInfo()[0]; + bool useWithProperties; #if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } #elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; + useWithProperties = true; #else - useWithProperties = false; + useWithProperties = false; #endif #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, 0 }; - if ((properties & CL_QUEUE_ON_DEVICE) == 0) { - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - } - else { - error = CL_INVALID_QUEUE_PROPERTIES; - } - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - } + if (useWithProperties) { + cl_queue_properties queue_properties[] = {CL_QUEUE_PROPERTIES, properties, 0}; + if ((properties & CL_QUEUE_ON_DEVICE) == 0) { + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + } else { + error = CL_INVALID_QUEUE_PROPERTIES; + } + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != nullptr) { - *err = error; - } - } + if (!useWithProperties) { + object_ = ::clCreateCommandQueue(context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } - } - - /*! - * \brief Constructs a CommandQueue based on passed properties. - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - CommandQueue( - QueueProperties properties, - cl_int* err = nullptr) - { - cl_int error; - - Context context = Context::getDefault(&error); - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) { - if (err != nullptr) { - *err = error; - } - } - else { - Device device = context.getInfo()[0]; - bool useWithProperties; + } + } + + /*! + * \brief Constructs a CommandQueue based on passed properties. + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue(QueueProperties properties, cl_int *err = nullptr) { + cl_int error; + + Context context = Context::getDefault(&error); + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != nullptr) { *err = error; } + } else { + Device device = context.getInfo()[0]; + bool useWithProperties; #if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } #elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; + useWithProperties = true; #else - useWithProperties = false; + useWithProperties = false; #endif #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; - - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - } + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0}; + + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), device(), static_cast(properties), &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != nullptr) { - *err = error; - } - } + if (!useWithProperties) { + object_ = + ::clCreateCommandQueue(context(), + device(), + static_cast(properties), + &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - - } - } - - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - explicit CommandQueue( - const Context& context, - cl_command_queue_properties properties = 0, - cl_int* err = nullptr) - { - cl_int error; - bool useWithProperties; - vector devices; - error = context.getInfo(CL_CONTEXT_DEVICES, &devices); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) - { - if (err != nullptr) { - *err = error; - } - return; - } + } + } + + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given + * context Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is + * specified. + */ + explicit CommandQueue(const Context &context, cl_command_queue_properties properties = 0, + cl_int *err = nullptr) { + cl_int error; + bool useWithProperties; + vector devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != nullptr) { *err = error; } + return; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } #elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; + useWithProperties = true; #else - useWithProperties = false; + useWithProperties = false; #endif #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, 0 }; - if ((properties & CL_QUEUE_ON_DEVICE) == 0) { - object_ = ::clCreateCommandQueueWithProperties( - context(), devices[0](), queue_properties, &error); - } - else { - error = CL_INVALID_QUEUE_PROPERTIES; - } - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - } + if (useWithProperties) { + cl_queue_properties queue_properties[] = {CL_QUEUE_PROPERTIES, properties, 0}; + if ((properties & CL_QUEUE_ON_DEVICE) == 0) { + object_ = ::clCreateCommandQueueWithProperties( + context(), devices[0](), queue_properties, &error); + } else { + error = CL_INVALID_QUEUE_PROPERTIES; + } + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), devices[0](), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != nullptr) { - *err = error; - } - } + if (!useWithProperties) { + object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } - - /*! - * \brief Constructs a CommandQueue for an implementation defined device in the given context - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - explicit CommandQueue( - const Context& context, - QueueProperties properties, - cl_int* err = nullptr) - { - cl_int error; - bool useWithProperties; - vector devices; - error = context.getInfo(CL_CONTEXT_DEVICES, &devices); - - detail::errHandler(error, __CREATE_CONTEXT_ERR); - - if (error != CL_SUCCESS) - { - if (err != nullptr) { - *err = error; - } - return; - } + } + + /*! + * \brief Constructs a CommandQueue for an implementation defined device in the given + * context Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is + * specified. + */ + explicit CommandQueue(const Context &context, QueueProperties properties, + cl_int *err = nullptr) { + cl_int error; + bool useWithProperties; + vector devices; + error = context.getInfo(CL_CONTEXT_DEVICES, &devices); + + detail::errHandler(error, __CREATE_CONTEXT_ERR); + + if (error != CL_SUCCESS) { + if (err != nullptr) { *err = error; } + return; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } #elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; + useWithProperties = true; #else - useWithProperties = false; + useWithProperties = false; #endif #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), devices[0](), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - } + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0}; + object_ = ::clCreateCommandQueueWithProperties( + context(), devices[0](), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), devices[0](), static_cast(properties), &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != nullptr) { - *err = error; - } - } + if (!useWithProperties) { + object_ = + ::clCreateCommandQueue(context(), + devices[0](), + static_cast(properties), + &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } - - /*! - * \brief Constructs a CommandQueue for a passed device and context - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - CommandQueue( - const Context& context, - const Device& device, - cl_command_queue_properties properties = 0, - cl_int* err = nullptr) - { - cl_int error; - bool useWithProperties; + } + + /*! + * \brief Constructs a CommandQueue for a passed device and context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue(const Context &context, const Device &device, + cl_command_queue_properties properties = 0, cl_int *err = nullptr) { + cl_int error; + bool useWithProperties; #if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } #elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; + useWithProperties = true; #else - useWithProperties = false; + useWithProperties = false; #endif #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - } + if (useWithProperties) { + cl_queue_properties queue_properties[] = {CL_QUEUE_PROPERTIES, properties, 0}; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), device(), properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != nullptr) { - *err = error; - } - } + if (!useWithProperties) { + object_ = ::clCreateCommandQueue(context(), device(), properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } - - /*! - * \brief Constructs a CommandQueue for a passed device and context - * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. - */ - CommandQueue( - const Context& context, - const Device& device, - QueueProperties properties, - cl_int* err = nullptr) - { - cl_int error; - bool useWithProperties; + } + + /*! + * \brief Constructs a CommandQueue for a passed device and context + * Will return an CL_INVALID_QUEUE_PROPERTIES error if CL_QUEUE_ON_DEVICE is specified. + */ + CommandQueue(const Context &context, const Device &device, QueueProperties properties, + cl_int *err = nullptr) { + cl_int error; + bool useWithProperties; #if CL_HPP_TARGET_OPENCL_VERSION >= 200 && CL_HPP_MINIMUM_OPENCL_VERSION < 200 - // Run-time decision based on the actual platform - { - cl_uint version = detail::getContextPlatformVersion(context()); - useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above - } + // Run-time decision based on the actual platform + { + cl_uint version = detail::getContextPlatformVersion(context()); + useWithProperties = (version >= 0x20000); // OpenCL 2.0 or above + } #elif CL_HPP_TARGET_OPENCL_VERSION >= 200 - useWithProperties = true; + useWithProperties = true; #else - useWithProperties = false; + useWithProperties = false; #endif #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (useWithProperties) { - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, static_cast(properties), 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - } + if (useWithProperties) { + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, static_cast(properties), 0}; + object_ = ::clCreateCommandQueueWithProperties( + context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_MINIMUM_OPENCL_VERSION < 200 - if (!useWithProperties) { - object_ = ::clCreateCommandQueue( - context(), device(), static_cast(properties), &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); - if (err != nullptr) { - *err = error; - } - } + if (!useWithProperties) { + object_ = + ::clCreateCommandQueue(context(), + device(), + static_cast(properties), + &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR); + if (err != nullptr) { *err = error; } + } #endif // CL_HPP_MINIMUM_OPENCL_VERSION < 200 - } + } - static CommandQueue getDefault(cl_int * err = nullptr) - { - std::call_once(default_initialized_, makeDefault); + static CommandQueue getDefault(cl_int *err = nullptr) { + std::call_once(default_initialized_, makeDefault); #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); -#else // CL_HPP_TARGET_OPENCL_VERSION >= 200 - detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_ERR); + detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); +#else // CL_HPP_TARGET_OPENCL_VERSION >= 200 + detail::errHandler(default_error_, __CREATE_COMMAND_QUEUE_ERR); #endif // CL_HPP_TARGET_OPENCL_VERSION >= 200 - if (err != nullptr) { - *err = default_error_; - } - return default_; - } - - /** - * Modify the default command queue to be used by - * subsequent operations. - * Will only set the default if no default was previously created. - * @return updated default command queue. - * Should be compared to the passed value to ensure that it was updated. - */ - static CommandQueue setDefault(const CommandQueue &default_queue) - { - std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_queue)); - detail::errHandler(default_error_); - return default_; - } - - CommandQueue() { } - - - /*! \brief Constructor from cl_command_queue - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - */ - explicit CommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : - detail::Wrapper(commandQueue, retainObject) { } - - CommandQueue& operator = (const cl_command_queue& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - - cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - size_type offset, - size_type size, - void* ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_READ_BUFFER_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - size_type offset, - size_type size, - const void* ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBuffer( - object_, buffer(), blocking, offset, size, - ptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_WRITE_BUFFER_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - size_type src_offset, - size_type dst_offset, - size_type size, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBuffer( - object_, src(), dst(), src_offset, dst_offset, size, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQEUE_COPY_BUFFER_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } + if (err != nullptr) { *err = default_error_; } + return default_; + } + + /** + * Modify the default command queue to be used by + * subsequent operations. + * Will only set the default if no default was previously created. + * @return updated default command queue. + * Should be compared to the passed value to ensure that it was updated. + */ + static CommandQueue setDefault(const CommandQueue &default_queue) { + std::call_once(default_initialized_, makeDefaultProvided, std::cref(default_queue)); + detail::errHandler(default_error_); + return default_; + } + + CommandQueue() {} + + /*! \brief Constructor from cl_command_queue - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit CommandQueue(const cl_command_queue &commandQueue, bool retainObject = false) : + detail::Wrapper(commandQueue, retainObject) {} + + CommandQueue &operator=(const cl_command_queue &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T *param) const { + return detail::errHandler( + detail::getInfo(&::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + + cl_int enqueueReadBuffer(const Buffer &buffer, cl_bool blocking, size_type offset, + size_type size, void *ptr, const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBuffer( + object_, + buffer(), + blocking, + offset, + size, + ptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_READ_BUFFER_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueWriteBuffer(const Buffer &buffer, cl_bool blocking, size_type offset, + size_type size, const void *ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBuffer( + object_, + buffer(), + blocking, + offset, + size, + ptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_WRITE_BUFFER_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueCopyBuffer(const Buffer &src, const Buffer &dst, size_type src_offset, + size_type dst_offset, size_type size, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBuffer( + object_, + src(), + dst(), + src_offset, + dst_offset, + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQEUE_COPY_BUFFER_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 110 - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - void *ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadBufferRect( - object_, - buffer(), - blocking, - buffer_offset.data(), - host_offset.data(), - region.data(), - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_READ_BUFFER_RECT_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - void* ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueReadBufferRect( - buffer, - blocking, - { buffer_offset[0], buffer_offset[1], 0 }, - { host_offset[0], host_offset[1], 0 }, - { region[0], region[1], 1 }, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); - } - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - const void *ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteBufferRect( - object_, - buffer(), - blocking, - buffer_offset.data(), - host_offset.data(), - region.data(), - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_WRITE_BUFFER_RECT_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - const void* ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueWriteBufferRect( - buffer, - blocking, - { buffer_offset[0], buffer_offset[1], 0 }, - { host_offset[0], host_offset[1], 0 }, - { region[0], region[1], 1 }, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - size_type src_row_pitch, - size_type src_slice_pitch, - size_type dst_row_pitch, - size_type dst_slice_pitch, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferRect( - object_, - src(), - dst(), - src_origin.data(), - dst_origin.data(), - region.data(), - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQEUE_COPY_BUFFER_RECT_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - size_type src_row_pitch, - size_type src_slice_pitch, - size_type dst_row_pitch, - size_type dst_slice_pitch, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueCopyBufferRect( - src, - dst, - { src_origin[0], src_origin[1], 0 }, - { dst_origin[0], dst_origin[1], 0 }, - { region[0], region[1], 1 }, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); - } + cl_int enqueueReadBufferRect(const Buffer &buffer, cl_bool blocking, + const array &buffer_offset, + const array &host_offset, + const array ®ion, size_type buffer_row_pitch, + size_type buffer_slice_pitch, size_type host_row_pitch, + size_type host_slice_pitch, void *ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadBufferRect( + object_, + buffer(), + blocking, + buffer_offset.data(), + host_offset.data(), + region.data(), + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_READ_BUFFER_RECT_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueReadBufferRect(const Buffer &buffer, cl_bool blocking, + const array &buffer_offset, + const array &host_offset, + const array ®ion, size_type buffer_row_pitch, + size_type buffer_slice_pitch, size_type host_row_pitch, + size_type host_slice_pitch, void *ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueReadBufferRect(buffer, + blocking, + {buffer_offset[0], buffer_offset[1], 0}, + {host_offset[0], host_offset[1], 0}, + {region[0], region[1], 1}, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); + } + + cl_int enqueueWriteBufferRect(const Buffer &buffer, cl_bool blocking, + const array &buffer_offset, + const array &host_offset, + const array ®ion, size_type buffer_row_pitch, + size_type buffer_slice_pitch, size_type host_row_pitch, + size_type host_slice_pitch, const void *ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteBufferRect( + object_, + buffer(), + blocking, + buffer_offset.data(), + host_offset.data(), + region.data(), + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_WRITE_BUFFER_RECT_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueWriteBufferRect(const Buffer &buffer, cl_bool blocking, + const array &buffer_offset, + const array &host_offset, + const array ®ion, size_type buffer_row_pitch, + size_type buffer_slice_pitch, size_type host_row_pitch, + size_type host_slice_pitch, const void *ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueWriteBufferRect(buffer, + blocking, + {buffer_offset[0], buffer_offset[1], 0}, + {host_offset[0], host_offset[1], 0}, + {region[0], region[1], 1}, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); + } + + cl_int enqueueCopyBufferRect(const Buffer &src, const Buffer &dst, + const array &src_origin, + const array &dst_origin, + const array ®ion, size_type src_row_pitch, + size_type src_slice_pitch, size_type dst_row_pitch, + size_type dst_slice_pitch, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferRect( + object_, + src(), + dst(), + src_origin.data(), + dst_origin.data(), + region.data(), + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQEUE_COPY_BUFFER_RECT_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferRect(const Buffer &src, const Buffer &dst, + const array &src_origin, + const array &dst_origin, + const array ®ion, size_type src_row_pitch, + size_type src_slice_pitch, size_type dst_row_pitch, + size_type dst_slice_pitch, + const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueCopyBufferRect(src, + dst, + {src_origin[0], src_origin[1], 0}, + {dst_origin[0], dst_origin[1], 0}, + {region[0], region[1], 1}, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - /** - * Enqueue a command to fill a buffer object with a pattern - * of a given size. The pattern is specified as a vector type. - * \tparam PatternType The datatype of the pattern field. - * The pattern type must be an accepted OpenCL data type. - * \tparam offset Is the offset in bytes into the buffer at - * which to start filling. This must be a multiple of - * the pattern size. - * \tparam size Is the size in bytes of the region to fill. - * This must be a multiple of the pattern size. - */ - template - cl_int enqueueFillBuffer( - const Buffer& buffer, - PatternType pattern, - size_type offset, - size_type size, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillBuffer( - object_, - buffer(), - static_cast(&pattern), - sizeof(PatternType), - offset, - size, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_FILL_BUFFER_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } + /** + * Enqueue a command to fill a buffer object with a pattern + * of a given size. The pattern is specified as a vector type. + * \tparam PatternType The datatype of the pattern field. + * The pattern type must be an accepted OpenCL data type. + * \tparam offset Is the offset in bytes into the buffer at + * which to start filling. This must be a multiple of + * the pattern size. + * \tparam size Is the size in bytes of the region to fill. + * This must be a multiple of the pattern size. + */ + template + cl_int enqueueFillBuffer(const Buffer &buffer, PatternType pattern, size_type offset, + size_type size, const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillBuffer( + object_, + buffer(), + static_cast(&pattern), + sizeof(PatternType), + offset, + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_FILL_BUFFER_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - void* ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReadImage( - object_, - image(), - blocking, - origin.data(), - region.data(), - row_pitch, - slice_pitch, - ptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_READ_IMAGE_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - void* ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueReadImage( - image, - blocking, - { origin[0], origin[1], 0 }, - { region[0], region[1], 1 }, - row_pitch, - slice_pitch, - ptr, - events, - event); - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - const void* ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueWriteImage( - object_, - image(), - blocking, - origin.data(), - region.data(), - row_pitch, - slice_pitch, - ptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_WRITE_IMAGE_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - const void* ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueWriteImage( - image, - blocking, - { origin[0], origin[1], 0 }, - { region[0], region[1], 1 }, - row_pitch, - slice_pitch, - ptr, - events, - event); - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImage( - object_, - src(), - dst(), - src_origin.data(), - dst_origin.data(), - region.data(), - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_COPY_IMAGE_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueCopyImage( - src, - dst, - { src_origin[0], src_origin[1], 0 }, - { dst_origin[0], dst_origin[1], 0 }, - { region[0], region[1], 1 }, - events, - event); - } + cl_int enqueueReadImage(const Image &image, cl_bool blocking, + const array &origin, + const array ®ion, size_type row_pitch, + size_type slice_pitch, void *ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReadImage( + object_, + image(), + blocking, + origin.data(), + region.data(), + row_pitch, + slice_pitch, + ptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_READ_IMAGE_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueReadImage(const Image &image, cl_bool blocking, + const array &origin, + const array ®ion, size_type row_pitch, + size_type slice_pitch, void *ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueReadImage(image, + blocking, + {origin[0], origin[1], 0}, + {region[0], region[1], 1}, + row_pitch, + slice_pitch, + ptr, + events, + event); + } + + cl_int enqueueWriteImage(const Image &image, cl_bool blocking, + const array &origin, + const array ®ion, size_type row_pitch, + size_type slice_pitch, const void *ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueWriteImage( + object_, + image(), + blocking, + origin.data(), + region.data(), + row_pitch, + slice_pitch, + ptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_WRITE_IMAGE_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueWriteImage(const Image &image, cl_bool blocking, + const array &origin, + const array ®ion, size_type row_pitch, + size_type slice_pitch, const void *ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueWriteImage(image, + blocking, + {origin[0], origin[1], 0}, + {region[0], region[1], 1}, + row_pitch, + slice_pitch, + ptr, + events, + event); + } + + cl_int + enqueueCopyImage(const Image &src, const Image &dst, const array &src_origin, + const array &dst_origin, const array ®ion, + const vector *events = nullptr, Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImage( + object_, + src(), + dst(), + src_origin.data(), + dst_origin.data(), + region.data(), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_COPY_IMAGE_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int + enqueueCopyImage(const Image &src, const Image &dst, const array &src_origin, + const array &dst_origin, const array ®ion, + const vector *events = nullptr, Event *event = nullptr) const { + return enqueueCopyImage(src, + dst, + {src_origin[0], src_origin[1], 0}, + {dst_origin[0], dst_origin[1], 0}, + {region[0], region[1], 1}, + events, + event); + } #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA floating-point, signed integer - * or unsigned integer color value if the image channel data - * type is an unnormalized signed integer type. - */ - template - typename std::enable_if::value || - std::is_same::value || - std::is_same::value, - cl_int>::type - enqueueFillImage( - const Image& image, - T fillColor, - const array& origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueFillImage( - object_, - image(), - static_cast(&fillColor), - origin.data(), - region.data(), - (events != nullptr) ? (cl_uint)events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*)&events->front() : NULL, - (event != NULL) ? &tmp : nullptr), - __ENQUEUE_FILL_IMAGE_ERR); - - if (event != nullptr && err == CL_SUCCESS) *event = tmp; - - return err; - } - - /** - * Enqueue a command to fill an image object with a specified color. - * \param fillColor is the color to use to fill the image. - * This is a four component RGBA floating-point, signed integer - * or unsigned integer color value if the image channel data - * type is an unnormalized signed integer type. - */ - template - typename std::enable_if::value || - std::is_same::value || - std::is_same::value, cl_int>::type - enqueueFillImage( - const Image& image, - T fillColor, - const array& origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueFillImage( - image, - fillColor, - { origin[0], origin[1], 0 }, - { region[0], region[1], 1 }, - events, - event - ); - } + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA floating-point, signed integer + * or unsigned integer color value if the image channel data + * type is an unnormalized signed integer type. + */ + template + typename std::enable_if::value || + std::is_same::value || + std::is_same::value, + cl_int>::type + enqueueFillImage(const Image &image, T fillColor, const array &origin, + const array ®ion, const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueFillImage( + object_, + image(), + static_cast(&fillColor), + origin.data(), + region.data(), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : NULL, + (event != NULL) ? &tmp : nullptr), + __ENQUEUE_FILL_IMAGE_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueue a command to fill an image object with a specified color. + * \param fillColor is the color to use to fill the image. + * This is a four component RGBA floating-point, signed integer + * or unsigned integer color value if the image channel data + * type is an unnormalized signed integer type. + */ + template + typename std::enable_if::value || + std::is_same::value || + std::is_same::value, + cl_int>::type + enqueueFillImage(const Image &image, T fillColor, const array &origin, + const array ®ion, const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueFillImage(image, + fillColor, + {origin[0], origin[1], 0}, + {region[0], region[1], 1}, + events, + event); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const array& src_origin, - const array& region, - size_type dst_offset, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyImageToBuffer( - object_, - src(), - dst(), - src_origin.data(), - region.data(), - dst_offset, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const array& src_origin, - const array& region, - size_type dst_offset, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueCopyImageToBuffer( - src, - dst, - { src_origin[0], src_origin[1], 0 }, - { region[0], region[1], 1 }, - dst_offset, - events, - event); - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - size_type src_offset, - const array& dst_origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueCopyBufferToImage( - object_, - src(), - dst(), - src_offset, - dst_origin.data(), - region.data(), - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - size_type src_offset, - const array& dst_origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueCopyBufferToImage( - src, - dst, - src_offset, - { dst_origin[0], dst_origin[1], 0 }, - { region[0], region[1], 1 }, - events, - event); - } - - void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - size_type offset, - size_type size, - const vector* events = nullptr, - Event* event = nullptr, - cl_int* err = nullptr) const - { - cl_event tmp; - cl_int error; - void * result = ::clEnqueueMapBuffer( - object_, buffer(), blocking, flags, offset, size, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - if (event != nullptr && error == CL_SUCCESS) - *event = tmp; - - return result; - } - - void* enqueueMapImage( - const Image& image, - cl_bool blocking, - cl_map_flags flags, - const array& origin, - const array& region, - size_type * row_pitch, - size_type * slice_pitch, - const vector* events = nullptr, - Event* event = nullptr, - cl_int* err = nullptr) const - { - cl_event tmp; - cl_int error; - void * result = ::clEnqueueMapImage( - object_, image(), blocking, flags, - origin.data(), - region.data(), - row_pitch, slice_pitch, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); - if (err != nullptr) { - *err = error; - } - if (event != nullptr && error == CL_SUCCESS) - *event = tmp; - return result; - } - - void* enqueueMapImage( - const Image& image, - cl_bool blocking, - cl_map_flags flags, - const array& origin, - const array& region, - size_type* row_pitch, - size_type* slice_pitch, - const vector* events = nullptr, - Event* event = nullptr, - cl_int* err = nullptr) const - { - return enqueueMapImage(image, blocking, flags, - { origin[0], origin[1], 0 }, - { region[0], region[1], 1 }, row_pitch, - slice_pitch, events, event, err); - } + cl_int enqueueCopyImageToBuffer(const Image &src, const Buffer &dst, + const array &src_origin, + const array ®ion, size_type dst_offset, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyImageToBuffer( + object_, + src(), + dst(), + src_origin.data(), + region.data(), + dst_offset, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueCopyImageToBuffer(const Image &src, const Buffer &dst, + const array &src_origin, + const array ®ion, size_type dst_offset, + const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueCopyImageToBuffer(src, + dst, + {src_origin[0], src_origin[1], 0}, + {region[0], region[1], 1}, + dst_offset, + events, + event); + } + + cl_int enqueueCopyBufferToImage(const Buffer &src, const Image &dst, size_type src_offset, + const array &dst_origin, + const array ®ion, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueCopyBufferToImage( + object_, + src(), + dst(), + src_offset, + dst_origin.data(), + region.data(), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueCopyBufferToImage(const Buffer &src, const Image &dst, size_type src_offset, + const array &dst_origin, + const array ®ion, + const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueCopyBufferToImage(src, + dst, + src_offset, + {dst_origin[0], dst_origin[1], 0}, + {region[0], region[1], 1}, + events, + event); + } + + void *enqueueMapBuffer(const Buffer &buffer, cl_bool blocking, cl_map_flags flags, + size_type offset, size_type size, + const vector *events = nullptr, Event *event = nullptr, + cl_int *err = nullptr) const { + cl_event tmp; + cl_int error; + void *result = ::clEnqueueMapBuffer( + object_, + buffer(), + blocking, + flags, + offset, + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != nullptr) { *err = error; } + if (event != nullptr && error == CL_SUCCESS) *event = tmp; + + return result; + } + + void *enqueueMapImage(const Image &image, cl_bool blocking, cl_map_flags flags, + const array &origin, const array ®ion, + size_type *row_pitch, size_type *slice_pitch, + const vector *events = nullptr, Event *event = nullptr, + cl_int *err = nullptr) const { + cl_event tmp; + cl_int error; + void *result = ::clEnqueueMapImage( + object_, + image(), + blocking, + flags, + origin.data(), + region.data(), + row_pitch, + slice_pitch, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR); + if (err != nullptr) { *err = error; } + if (event != nullptr && error == CL_SUCCESS) *event = tmp; + return result; + } + + void *enqueueMapImage(const Image &image, cl_bool blocking, cl_map_flags flags, + const array &origin, const array ®ion, + size_type *row_pitch, size_type *slice_pitch, + const vector *events = nullptr, Event *event = nullptr, + cl_int *err = nullptr) const { + return enqueueMapImage(image, + blocking, + flags, + {origin[0], origin[1], 0}, + {region[0], region[1], 1}, + row_pitch, + slice_pitch, + events, + event, + err); + } #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /** - * Enqueues a command that copies a region of memory from the source pointer to the destination pointer. - * This function is specifically for transferring data between the host and a coarse-grained SVM buffer. - */ - template - cl_int enqueueMemcpySVM( - T *dst_ptr, - const T *src_ptr, - cl_bool blocking, - size_type size, - const vector *events = nullptr, - Event *event = nullptr) const { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMemcpy( - object_, blocking, static_cast(dst_ptr), static_cast(src_ptr), size, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event *) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), __ENQUEUE_COPY_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - *Enqueues a command that will copy data from one coarse-grained SVM buffer to another. - *This function takes two cl::pointer instances representing the destination and source buffers. - */ - template - cl_int enqueueMemcpySVM( - cl::pointer &dst_ptr, - const cl::pointer &src_ptr, - cl_bool blocking, - size_type size, - const vector *events = nullptr, - Event *event = nullptr) const { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMemcpy( - object_, blocking, static_cast(dst_ptr.get()), static_cast(src_ptr.get()), - size, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event *) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), __ENQUEUE_COPY_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. - * This variant takes a cl::vector instance. - */ - template - cl_int enqueueMemcpySVM( - cl::vector &dst_container, - const cl::vector &src_container, - cl_bool blocking, - const vector *events = nullptr, - Event *event = nullptr) const { - cl_event tmp; - if(src_container.size() != dst_container.size()){ - return detail::errHandler(CL_INVALID_VALUE,__ENQUEUE_COPY_SVM_ERR); - } - cl_int err = detail::errHandler(::clEnqueueSVMMemcpy( - object_, blocking, static_cast(dst_container.data()), - static_cast(src_container.data()), - dst_container.size() * sizeof(T), - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event *) &events->front() : nullptr, - (event != NULL) ? &tmp : nullptr), __ENQUEUE_COPY_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command to fill a SVM buffer with a pattern. - * - */ - template - cl_int enqueueMemFillSVM( - T *ptr, - PatternType pattern, - size_type size, - const vector *events = nullptr, - Event *event = nullptr) const { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMemFill( - object_, static_cast(ptr), static_cast(&pattern), - sizeof(PatternType), size, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event *) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), __ENQUEUE_FILL_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that fills a region of a coarse-grained SVM buffer with a specified pattern. - * This variant takes a cl::pointer instance. - */ - template - cl_int enqueueMemFillSVM( - cl::pointer &ptr, - PatternType pattern, - size_type size, - const vector *events = nullptr, - Event *event = nullptr) const { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMemFill( - object_, static_cast(ptr.get()), static_cast(&pattern), - sizeof(PatternType), size, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event *) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), __ENQUEUE_FILL_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will allow the host to fill a region of a coarse-grained SVM buffer with a specified pattern. - * This variant takes a cl::vector instance. - */ - template - cl_int enqueueMemFillSVM( - cl::vector &container, - PatternType pattern, - const vector *events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMemFill( - object_, static_cast(container.data()), static_cast(&pattern), - sizeof(PatternType), container.size() * sizeof(T), - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event *) &events->front() : nullptr, - (event != nullptr) ? &tmp : NULL), __ENQUEUE_FILL_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. - * This variant takes a raw SVM pointer. - */ - template - cl_int enqueueMapSVM( - T* ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMap( - object_, blocking, flags, static_cast(ptr), size, - (events != nullptr) ? (cl_uint)events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*)&events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_MAP_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - - /** - * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. - * This variant takes a cl::pointer instance. - */ - template - cl_int enqueueMapSVM( - cl::pointer &ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMap( - object_, blocking, flags, static_cast(ptr.get()), size, - (events != nullptr) ? (cl_uint)events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*)&events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_MAP_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will allow the host to update a region of a coarse-grained SVM buffer. - * This variant takes a cl::vector instance. - */ - template - cl_int enqueueMapSVM( - cl::vector &container, - cl_bool blocking, - cl_map_flags flags, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMap( - object_, blocking, flags, static_cast(container.data()), container.size()*sizeof(T), - (events != nullptr) ? (cl_uint)events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*)&events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_MAP_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } + /** + * Enqueues a command that copies a region of memory from the source pointer to the + * destination pointer. This function is specifically for transferring data between the host + * and a coarse-grained SVM buffer. + */ + template + cl_int enqueueMemcpySVM(T *dst_ptr, const T *src_ptr, cl_bool blocking, size_type size, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMMemcpy( + object_, + blocking, + static_cast(dst_ptr), + static_cast(src_ptr), + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_COPY_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + *Enqueues a command that will copy data from one coarse-grained SVM buffer to another. + *This function takes two cl::pointer instances representing the destination and source + *buffers. + */ + template + cl_int enqueueMemcpySVM(cl::pointer &dst_ptr, const cl::pointer &src_ptr, + cl_bool blocking, size_type size, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMMemcpy( + object_, + blocking, + static_cast(dst_ptr.get()), + static_cast(src_ptr.get()), + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_COPY_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM + * buffer. This variant takes a cl::vector instance. + */ + template + cl_int enqueueMemcpySVM(cl::vector &dst_container, + const cl::vector &src_container, cl_bool blocking, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + if (src_container.size() != dst_container.size()) { + return detail::errHandler(CL_INVALID_VALUE, __ENQUEUE_COPY_SVM_ERR); + } + cl_int err = detail::errHandler( + ::clEnqueueSVMMemcpy( + object_, + blocking, + static_cast(dst_container.data()), + static_cast(src_container.data()), + dst_container.size() * sizeof(T), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != NULL) ? &tmp : nullptr), + __ENQUEUE_COPY_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command to fill a SVM buffer with a pattern. + * + */ + template + cl_int enqueueMemFillSVM(T *ptr, PatternType pattern, size_type size, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMMemFill( + object_, + static_cast(ptr), + static_cast(&pattern), + sizeof(PatternType), + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_FILL_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command that fills a region of a coarse-grained SVM buffer with a specified + * pattern. This variant takes a cl::pointer instance. + */ + template + cl_int enqueueMemFillSVM(cl::pointer &ptr, PatternType pattern, size_type size, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMMemFill( + object_, + static_cast(ptr.get()), + static_cast(&pattern), + sizeof(PatternType), + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_FILL_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host to fill a region of a coarse-grained SVM + * buffer with a specified pattern. This variant takes a cl::vector instance. + */ + template + cl_int enqueueMemFillSVM(cl::vector &container, PatternType pattern, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMMemFill( + object_, + static_cast(container.data()), + static_cast(&pattern), + sizeof(PatternType), + container.size() * sizeof(T), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : NULL), + __ENQUEUE_FILL_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM + * buffer. This variant takes a raw SVM pointer. + */ + template + cl_int enqueueMapSVM(T *ptr, cl_bool blocking, cl_map_flags flags, size_type size, + const vector *events = nullptr, Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMMap( + object_, + blocking, + flags, + static_cast(ptr), + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_MAP_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM + * buffer. This variant takes a cl::pointer instance. + */ + template + cl_int enqueueMapSVM(cl::pointer &ptr, cl_bool blocking, cl_map_flags flags, + size_type size, const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMMap( + object_, + blocking, + flags, + static_cast(ptr.get()), + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_MAP_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host to update a region of a coarse-grained SVM + * buffer. This variant takes a cl::vector instance. + */ + template + cl_int enqueueMapSVM(cl::vector &container, cl_bool blocking, cl_map_flags flags, + const vector *events = nullptr, Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMMap( + object_, + blocking, + flags, + static_cast(container.data()), + container.size() * sizeof(T), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_MAP_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - object_, memory(), mapped_ptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - + cl_int enqueueUnmapMemObject(const Memory &memory, void *mapped_ptr, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + object_, + memory(), + mapped_ptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - /** - * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. - * This variant takes a raw SVM pointer. - */ - template - cl_int enqueueUnmapSVM( - T* ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueSVMUnmap( - object_, static_cast(ptr), - (events != nullptr) ? (cl_uint)events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*)&events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_UNMAP_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. - * This variant takes a cl::pointer instance. - */ - template - cl_int enqueueUnmapSVM( - cl::pointer &ptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueSVMUnmap( - object_, static_cast(ptr.get()), - (events != nullptr) ? (cl_uint)events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*)&events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_UNMAP_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL runtime. - * This variant takes a cl::vector instance. - */ - template - cl_int enqueueUnmapSVM( - cl::vector &container, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueSVMUnmap( - object_, static_cast(container.data()), - (events != nullptr) ? (cl_uint)events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*)&events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_UNMAP_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL + * runtime. This variant takes a raw SVM pointer. + */ + template + cl_int enqueueUnmapSVM(T *ptr, const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, + static_cast(ptr), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_UNMAP_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL + * runtime. This variant takes a cl::pointer instance. + */ + template + cl_int enqueueUnmapSVM(cl::pointer &ptr, const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, + static_cast(ptr.get()), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_UNMAP_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command that will release a coarse-grained SVM buffer back to the OpenCL + * runtime. This variant takes a cl::vector instance. + */ + template + cl_int enqueueUnmapSVM(cl::vector &container, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMUnmap( + object_, + static_cast(container.data()), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_UNMAP_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_TARGET_OPENCL_VERSION >= 120 - /** - * Enqueues a marker command which waits for either a list of events to complete, - * or all previously enqueued commands to complete. - * - * Enqueues a marker command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command returns an event which can be waited on, - * i.e. this event can be waited on to insure that all events either in the event_wait_list - * or all previously enqueued commands, queued before this command to command_queue, - * have completed. - */ - cl_int enqueueMarkerWithWaitList( - const vector *events = nullptr, - Event *event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarkerWithWaitList( - object_, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_MARKER_WAIT_LIST_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * A synchronization point that enqueues a barrier operation. - * - * Enqueues a barrier command which waits for either a list of events to complete, - * or if the list is empty it waits for all commands previously enqueued in command_queue - * to complete before it completes. This command blocks command execution, that is, any - * following commands enqueued after it do not execute until it completes. This command - * returns an event which can be waited on, i.e. this event can be waited on to insure that - * all events either in the event_wait_list or all previously enqueued commands, queued - * before this command to command_queue, have completed. - */ - cl_int enqueueBarrierWithWaitList( - const vector *events = nullptr, - Event *event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueBarrierWithWaitList( - object_, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_BARRIER_WAIT_LIST_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command to indicate with which device a set of memory objects - * should be associated. - */ - cl_int enqueueMigrateMemObjects( - const vector &memObjects, - cl_mem_migration_flags flags, - const vector* events = nullptr, - Event* event = nullptr - ) const - { - cl_event tmp; - - vector localMemObjects(memObjects.size()); - - for( int i = 0; i < (int)memObjects.size(); ++i ) { - localMemObjects[i] = memObjects[i](); - } - - cl_int err = detail::errHandler( - ::clEnqueueMigrateMemObjects( - object_, - (cl_uint)memObjects.size(), - localMemObjects.data(), - flags, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } + /** + * Enqueues a marker command which waits for either a list of events to complete, + * or all previously enqueued commands to complete. + * + * Enqueues a marker command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command returns an event which can be waited on, + * i.e. this event can be waited on to insure that all events either in the event_wait_list + * or all previously enqueued commands, queued before this command to command_queue, + * have completed. + */ + cl_int enqueueMarkerWithWaitList(const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueMarkerWithWaitList( + object_, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_MARKER_WAIT_LIST_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * A synchronization point that enqueues a barrier operation. + * + * Enqueues a barrier command which waits for either a list of events to complete, + * or if the list is empty it waits for all commands previously enqueued in command_queue + * to complete before it completes. This command blocks command execution, that is, any + * following commands enqueued after it do not execute until it completes. This command + * returns an event which can be waited on, i.e. this event can be waited on to insure that + * all events either in the event_wait_list or all previously enqueued commands, queued + * before this command to command_queue, have completed. + */ + cl_int enqueueBarrierWithWaitList(const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueBarrierWithWaitList( + object_, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_BARRIER_WAIT_LIST_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command to indicate with which device a set of memory objects + * should be associated. + */ + cl_int enqueueMigrateMemObjects(const vector &memObjects, + cl_mem_migration_flags flags, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + + vector localMemObjects(memObjects.size()); + + for (int i = 0; i < (int)memObjects.size(); ++i) { + localMemObjects[i] = memObjects[i](); + } + + cl_int err = detail::errHandler( + ::clEnqueueMigrateMemObjects( + object_, + (cl_uint)memObjects.size(), + localMemObjects.data(), + flags, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 120 - #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - /** - * Enqueues a command that will allow the host associate ranges within a set of - * SVM allocations with a device. - * @param sizes - The length from each pointer to migrate. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector &svmRawPointers, - const cl::vector &sizes, - cl_mem_migration_flags flags = 0, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler(::clEnqueueSVMMigrateMem( - object_, - svmRawPointers.size(), static_cast(svmRawPointers.data()), - sizes.data(), // array of sizes not passed - flags, - (events != nullptr) ? (cl_uint)events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*)&events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_MIGRATE_SVM_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - /** - * Enqueues a command that will allow the host associate a set of SVM allocations with - * a device. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector &svmRawPointers, - cl_mem_migration_flags flags = 0, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueMigrateSVM(svmRawPointers, cl::vector(svmRawPointers.size()), flags, events, event); - } - - - /** - * Enqueues a command that will allow the host associate ranges within a set of - * SVM allocations with a device. - * @param sizes - The length from each pointer to migrate. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector> &svmPointers, - const cl::vector &sizes, - cl_mem_migration_flags flags = 0, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl::vector svmRawPointers; - svmRawPointers.reserve(svmPointers.size()); - for (auto p : svmPointers) { - svmRawPointers.push_back(static_cast(p.get())); - } - - return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); - } - - - /** - * Enqueues a command that will allow the host associate a set of SVM allocations with - * a device. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector> &svmPointers, - cl_mem_migration_flags flags = 0, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueMigrateSVM(svmPointers, cl::vector(svmPointers.size()), flags, events, event); - } - - /** - * Enqueues a command that will allow the host associate ranges within a set of - * SVM allocations with a device. - * @param sizes - The length from the beginning of each container to migrate. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector> &svmContainers, - const cl::vector &sizes, - cl_mem_migration_flags flags = 0, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl::vector svmRawPointers; - svmRawPointers.reserve(svmContainers.size()); - for (auto p : svmContainers) { - svmRawPointers.push_back(static_cast(p.data())); - } - - return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); - } - - /** - * Enqueues a command that will allow the host associate a set of SVM allocations with - * a device. - */ - template - cl_int enqueueMigrateSVM( - const cl::vector> &svmContainers, - cl_mem_migration_flags flags = 0, - const vector* events = nullptr, - Event* event = nullptr) const - { - return enqueueMigrateSVM(svmContainers, cl::vector(svmContainers.size()), flags, events, event); - } + /** + * Enqueues a command that will allow the host associate ranges within a set of + * SVM allocations with a device. + * @param sizes - The length from each pointer to migrate. + */ + template + cl_int + enqueueMigrateSVM(const cl::vector &svmRawPointers, const cl::vector &sizes, + cl_mem_migration_flags flags = 0, const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueSVMMigrateMem( + object_, + svmRawPointers.size(), + static_cast(svmRawPointers.data()), + sizes.data(), // array of sizes not passed + flags, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_MIGRATE_SVM_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + /** + * Enqueues a command that will allow the host associate a set of SVM allocations with + * a device. + */ + template + cl_int + enqueueMigrateSVM(const cl::vector &svmRawPointers, cl_mem_migration_flags flags = 0, + const vector *events = nullptr, Event *event = nullptr) const { + return enqueueMigrateSVM( + svmRawPointers, cl::vector(svmRawPointers.size()), flags, events, event); + } + + /** + * Enqueues a command that will allow the host associate ranges within a set of + * SVM allocations with a device. + * @param sizes - The length from each pointer to migrate. + */ + template + cl_int + enqueueMigrateSVM(const cl::vector> &svmPointers, + const cl::vector &sizes, cl_mem_migration_flags flags = 0, + const vector *events = nullptr, Event *event = nullptr) const { + cl::vector svmRawPointers; + svmRawPointers.reserve(svmPointers.size()); + for (auto p : svmPointers) { svmRawPointers.push_back(static_cast(p.get())); } + + return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); + } + + /** + * Enqueues a command that will allow the host associate a set of SVM allocations with + * a device. + */ + template + cl_int enqueueMigrateSVM(const cl::vector> &svmPointers, + cl_mem_migration_flags flags = 0, + const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueMigrateSVM( + svmPointers, cl::vector(svmPointers.size()), flags, events, event); + } + + /** + * Enqueues a command that will allow the host associate ranges within a set of + * SVM allocations with a device. + * @param sizes - The length from the beginning of each container to migrate. + */ + template + cl_int + enqueueMigrateSVM(const cl::vector> &svmContainers, + const cl::vector &sizes, cl_mem_migration_flags flags = 0, + const vector *events = nullptr, Event *event = nullptr) const { + cl::vector svmRawPointers; + svmRawPointers.reserve(svmContainers.size()); + for (auto p : svmContainers) { + svmRawPointers.push_back(static_cast(p.data())); + } + + return enqueueMigrateSVM(svmRawPointers, sizes, flags, events, event); + } + + /** + * Enqueues a command that will allow the host associate a set of SVM allocations with + * a device. + */ + template + cl_int enqueueMigrateSVM(const cl::vector> &svmContainers, + cl_mem_migration_flags flags = 0, + const vector *events = nullptr, + Event *event = nullptr) const { + return enqueueMigrateSVM( + svmContainers, cl::vector(svmContainers.size()), flags, events, event); + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 - - cl_int enqueueNDRangeKernel( - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local = NullRange, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNDRangeKernel( - object_, kernel(), (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const size_type*) offset : nullptr, - (const size_type*) global, - local.dimensions() != 0 ? (const size_type*) local : nullptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_NDRANGE_KERNEL_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } + + cl_int enqueueNDRangeKernel(const Kernel &kernel, const NDRange &offset, + const NDRange &global, const NDRange &local = NullRange, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNDRangeKernel( + object_, + kernel(), + (cl_uint)global.dimensions(), + offset.dimensions() != 0 ? (const size_type *)offset : nullptr, + (const size_type *)global, + local.dimensions() != 0 ? (const size_type *)local : nullptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_NDRANGE_KERNEL_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) - CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int enqueueTask( - const Kernel& kernel, - const vector* events = nullptr, - Event* event = nullptr) const CL_API_SUFFIX__VERSION_1_2_DEPRECATED - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueTask( - object_, kernel(), - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_TASK_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } + CL_API_PREFIX__VERSION_1_2_DEPRECATED cl_int + enqueueTask(const Kernel &kernel, const vector *events = nullptr, + Event *event = nullptr) const CL_API_SUFFIX__VERSION_1_2_DEPRECATED { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueTask( + object_, + kernel(), + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_TASK_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_2_APIS) - cl_int enqueueNativeKernel( - void (CL_CALLBACK *userFptr)(void *), - std::pair args, - const vector* mem_objects = nullptr, - const vector* mem_locs = nullptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueNativeKernel( - object_, userFptr, args.first, args.second, - (mem_objects != nullptr) ? (cl_uint) mem_objects->size() : 0, - (mem_objects->size() > 0 ) ? reinterpret_cast(mem_objects->data()) : nullptr, - (mem_locs != nullptr && mem_locs->size() > 0) ? (const void **) &mem_locs->front() : nullptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_NATIVE_KERNEL); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } + cl_int enqueueNativeKernel(void(CL_CALLBACK *userFptr)(void *), + std::pair args, + const vector *mem_objects = nullptr, + const vector *mem_locs = nullptr, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueNativeKernel( + object_, + userFptr, + args.first, + args.second, + (mem_objects != nullptr) ? (cl_uint)mem_objects->size() : 0, + (mem_objects->size() > 0) ? reinterpret_cast(mem_objects->data()) + : nullptr, + (mem_locs != nullptr && mem_locs->size() > 0) ? (const void **)&mem_locs->front() + : nullptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_NATIVE_KERNEL); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } /** * Deprecated APIs for 1.2 */ #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - CL_API_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueMarker(Event* event = nullptr) const CL_API_SUFFIX__VERSION_1_1_DEPRECATED - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueMarker( - object_, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_MARKER_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - CL_API_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueWaitForEvents(const vector& events) const CL_API_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueWaitForEvents( - object_, - (cl_uint) events.size(), - events.size() > 0 ? (const cl_event*) &events.front() : nullptr), - __ENQUEUE_WAIT_FOR_EVENTS_ERR); - } + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueMarker(Event *event = nullptr) const CL_API_SUFFIX__VERSION_1_1_DEPRECATED { + cl_event tmp; + cl_int err = + detail::errHandler(::clEnqueueMarker(object_, (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_MARKER_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueWaitForEvents(const vector &events) const + CL_API_SUFFIX__VERSION_1_1_DEPRECATED { + return detail::errHandler(::clEnqueueWaitForEvents(object_, + (cl_uint)events.size(), + events.size() > 0 + ? (const cl_event *)&events.front() + : nullptr), + __ENQUEUE_WAIT_FOR_EVENTS_ERR); + } #endif // defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - cl_int enqueueAcquireGLObjects( - const vector* mem_objects = nullptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueAcquireGLObjects( - object_, - (mem_objects != nullptr) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != nullptr && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): nullptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseGLObjects( - const vector* mem_objects = nullptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueReleaseGLObjects( - object_, - (mem_objects != nullptr) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != nullptr && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): nullptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - -#if defined (CL_HPP_USE_DX_INTEROP) -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); -typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( - cl_command_queue command_queue, cl_uint num_objects, - const cl_mem* mem_objects, cl_uint num_events_in_wait_list, - const cl_event* event_wait_list, cl_event* event); - - cl_int enqueueAcquireD3D10Objects( - const vector* mem_objects = nullptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = nullptr; -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueAcquireD3D10ObjectsKHR); -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueAcquireD3D10ObjectsKHR); -#endif - - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueAcquireD3D10ObjectsKHR( - object_, - (mem_objects != nullptr) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != nullptr && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): nullptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_ACQUIRE_GL_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseD3D10Objects( - const vector* mem_objects = nullptr, - const vector* events = nullptr, - Event* event = nullptr) const - { - static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = nullptr; -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl_context context = getInfo(); - cl::Device device(getInfo()); - cl_platform_id platform = device.getInfo(); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueReleaseD3D10ObjectsKHR); -#endif -#if CL_HPP_MINIMUM_OPENCL_VERSION < 120 - CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueReleaseD3D10ObjectsKHR); -#endif + cl_int enqueueAcquireGLObjects(const vector *mem_objects = nullptr, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueAcquireGLObjects( + object_, + (mem_objects != nullptr) ? (cl_uint)mem_objects->size() : 0, + (mem_objects != nullptr && mem_objects->size() > 0) + ? (const cl_mem *)&mem_objects->front() + : nullptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueReleaseGLObjects(const vector *mem_objects = nullptr, + const vector *events = nullptr, + Event *event = nullptr) const { + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueReleaseGLObjects( + object_, + (mem_objects != nullptr) ? (cl_uint)mem_objects->size() : 0, + (mem_objects != nullptr && mem_objects->size() > 0) + ? (const cl_mem *)&mem_objects->front() + : nullptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } - cl_event tmp; - cl_int err = detail::errHandler( - pfn_clEnqueueReleaseD3D10ObjectsKHR( - object_, - (mem_objects != nullptr) ? (cl_uint) mem_objects->size() : 0, - (mem_objects != nullptr && mem_objects->size() > 0) ? (const cl_mem *) &mem_objects->front(): nullptr, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_RELEASE_GL_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } +#if defined(CL_HPP_USE_DX_INTEROP) + typedef CL_API_ENTRY cl_int(CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); + typedef CL_API_ENTRY cl_int(CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)( + cl_command_queue command_queue, cl_uint num_objects, const cl_mem *mem_objects, + cl_uint num_events_in_wait_list, const cl_event *event_wait_list, cl_event *event); + + cl_int enqueueAcquireD3D10Objects(const vector *mem_objects = nullptr, + const vector *events = nullptr, + Event *event = nullptr) const { + static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = + nullptr; +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueAcquireD3D10ObjectsKHR); +# endif +# if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueAcquireD3D10ObjectsKHR); +# endif + + cl_event tmp; + cl_int err = + detail::errHandler(pfn_clEnqueueAcquireD3D10ObjectsKHR( + object_, + (mem_objects != nullptr) ? (cl_uint)mem_objects->size() : 0, + (mem_objects != nullptr && mem_objects->size() > 0) + ? (const cl_mem *)&mem_objects->front() + : nullptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_ACQUIRE_GL_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueReleaseD3D10Objects(const vector *mem_objects = nullptr, + const vector *events = nullptr, + Event *event = nullptr) const { + static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = + nullptr; +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_context context = getInfo(); + cl::Device device(getInfo()); + cl_platform_id platform = device.getInfo(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueReleaseD3D10ObjectsKHR); +# endif +# if CL_HPP_MINIMUM_OPENCL_VERSION < 120 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueReleaseD3D10ObjectsKHR); +# endif + + cl_event tmp; + cl_int err = detail::errHandler( + pfn_clEnqueueReleaseD3D10ObjectsKHR( + object_, + (mem_objects != nullptr) ? (cl_uint)mem_objects->size() : 0, + (mem_objects != nullptr && mem_objects->size() > 0) + ? (const cl_mem *)&mem_objects->front() + : nullptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_RELEASE_GL_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #endif /** * Deprecated APIs for 1.2 */ #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) - CL_API_PREFIX__VERSION_1_1_DEPRECATED - cl_int enqueueBarrier() const CL_API_SUFFIX__VERSION_1_1_DEPRECATED - { - return detail::errHandler( - ::clEnqueueBarrier(object_), - __ENQUEUE_BARRIER_ERR); - } + CL_API_PREFIX__VERSION_1_1_DEPRECATED + cl_int enqueueBarrier() const CL_API_SUFFIX__VERSION_1_1_DEPRECATED { + return detail::errHandler(::clEnqueueBarrier(object_), __ENQUEUE_BARRIER_ERR); + } #endif // CL_USE_DEPRECATED_OPENCL_1_1_APIS - cl_int flush() const - { - return detail::errHandler(::clFlush(object_), __FLUSH_ERR); - } + cl_int flush() const { return detail::errHandler(::clFlush(object_), __FLUSH_ERR); } - cl_int finish() const - { - return detail::errHandler(::clFinish(object_), __FINISH_ERR); - } + cl_int finish() const { return detail::errHandler(::clFinish(object_), __FINISH_ERR); } #ifdef cl_khr_external_memory - cl_int enqueueAcquireExternalMemObjects( - const vector& mem_objects, - const vector* events_wait = nullptr, - Event *event = nullptr) - { - cl_int err = CL_INVALID_OPERATION; - cl_event tmp; - - std::call_once(ext_memory_initialized_, initMemoryExtension, this->getInfo()); - - if (pfn_clEnqueueAcquireExternalMemObjectsKHR) - { - err = pfn_clEnqueueAcquireExternalMemObjectsKHR( - object_, - static_cast(mem_objects.size()), - (mem_objects.size() > 0) ? reinterpret_cast(mem_objects.data()) : nullptr, - (events_wait != nullptr) ? static_cast(events_wait->size()) : 0, - (events_wait != nullptr && events_wait->size() > 0) ? reinterpret_cast(events_wait->data()) : nullptr, - &tmp); - } - - detail::errHandler(err, __ENQUEUE_ACQUIRE_EXTERNAL_MEMORY_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } - - cl_int enqueueReleaseExternalMemObjects( - const vector& mem_objects, - const vector* events_wait = nullptr, - Event *event = nullptr) - { - cl_int err = CL_INVALID_OPERATION; - cl_event tmp; - - std::call_once(ext_memory_initialized_, initMemoryExtension, this->getInfo()); - - if (pfn_clEnqueueReleaseExternalMemObjectsKHR) - { - err = pfn_clEnqueueReleaseExternalMemObjectsKHR( - object_, - static_cast(mem_objects.size()), - (mem_objects.size() > 0) ? reinterpret_cast(mem_objects.data()) : nullptr, - (events_wait != nullptr) ? static_cast(events_wait->size()) : 0, - (events_wait != nullptr && events_wait->size() > 0) ? reinterpret_cast(events_wait->data()) : nullptr, - &tmp); - } - - detail::errHandler(err, __ENQUEUE_RELEASE_EXTERNAL_MEMORY_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; - } + cl_int enqueueAcquireExternalMemObjects(const vector &mem_objects, + const vector *events_wait = nullptr, + Event *event = nullptr) { + cl_int err = CL_INVALID_OPERATION; + cl_event tmp; + + std::call_once( + ext_memory_initialized_, initMemoryExtension, this->getInfo()); + + if (pfn_clEnqueueAcquireExternalMemObjectsKHR) { + err = pfn_clEnqueueAcquireExternalMemObjectsKHR( + object_, + static_cast(mem_objects.size()), + (mem_objects.size() > 0) ? reinterpret_cast(mem_objects.data()) + : nullptr, + (events_wait != nullptr) ? static_cast(events_wait->size()) : 0, + (events_wait != nullptr && events_wait->size() > 0) + ? reinterpret_cast(events_wait->data()) + : nullptr, + &tmp); + } + + detail::errHandler(err, __ENQUEUE_ACQUIRE_EXTERNAL_MEMORY_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + cl_int enqueueReleaseExternalMemObjects(const vector &mem_objects, + const vector *events_wait = nullptr, + Event *event = nullptr) { + cl_int err = CL_INVALID_OPERATION; + cl_event tmp; + + std::call_once( + ext_memory_initialized_, initMemoryExtension, this->getInfo()); + + if (pfn_clEnqueueReleaseExternalMemObjectsKHR) { + err = pfn_clEnqueueReleaseExternalMemObjectsKHR( + object_, + static_cast(mem_objects.size()), + (mem_objects.size() > 0) ? reinterpret_cast(mem_objects.data()) + : nullptr, + (events_wait != nullptr) ? static_cast(events_wait->size()) : 0, + (events_wait != nullptr && events_wait->size() > 0) + ? reinterpret_cast(events_wait->data()) + : nullptr, + &tmp); + } + + detail::errHandler(err, __ENQUEUE_RELEASE_EXTERNAL_MEMORY_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #endif // cl_khr_external_memory && CL_HPP_TARGET_OPENCL_VERSION >= 300 #ifdef cl_khr_semaphore - cl_int enqueueWaitSemaphores( - const vector &sema_objects, - const vector &sema_payloads = {}, - const vector* events_wait_list = nullptr, - Event *event = nullptr) const; - - cl_int enqueueSignalSemaphores( - const vector &sema_objects, - const vector& sema_payloads = {}, - const vector* events_wait_list = nullptr, - Event* event = nullptr); + cl_int enqueueWaitSemaphores(const vector &sema_objects, + const vector &sema_payloads = {}, + const vector *events_wait_list = nullptr, + Event *event = nullptr) const; + + cl_int enqueueSignalSemaphores(const vector &sema_objects, + const vector &sema_payloads = {}, + const vector *events_wait_list = nullptr, + Event *event = nullptr); #endif // cl_khr_semaphore -}; // CommandQueue + }; // CommandQueue #ifdef cl_khr_external_memory -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandQueue::ext_memory_initialized_; + CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandQueue::ext_memory_initialized_; #endif -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandQueue::default_initialized_; -CL_HPP_DEFINE_STATIC_MEMBER_ CommandQueue CommandQueue::default_; -CL_HPP_DEFINE_STATIC_MEMBER_ cl_int CommandQueue::default_error_ = CL_SUCCESS; - + CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandQueue::default_initialized_; + CL_HPP_DEFINE_STATIC_MEMBER_ CommandQueue CommandQueue::default_; + CL_HPP_DEFINE_STATIC_MEMBER_ cl_int CommandQueue::default_error_ = CL_SUCCESS; #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -enum class DeviceQueueProperties : cl_command_queue_properties -{ - None = 0, - Profiling = CL_QUEUE_PROFILING_ENABLE, -}; - -inline DeviceQueueProperties operator|(DeviceQueueProperties lhs, DeviceQueueProperties rhs) -{ - return static_cast(static_cast(lhs) | static_cast(rhs)); -} - -/*! \class DeviceCommandQueue - * \brief DeviceCommandQueue interface for device cl_command_queues. - */ -class DeviceCommandQueue : public detail::Wrapper -{ -public: - - /*! - * Trivial empty constructor to create a null queue. - */ - DeviceCommandQueue() { } - - /*! - * Default construct device command queue on default context and device - */ - DeviceCommandQueue(DeviceQueueProperties properties, cl_int* err = nullptr) - { - cl_int error; - cl::Context context = cl::Context::getDefault(); - cl::Device device = cl::Device::getDefault(); - - cl_command_queue_properties mergedProperties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); - - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, mergedProperties, 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - } - - /*! - * Create a device command queue for a specified device in the passed context. - */ - DeviceCommandQueue( - const Context& context, - const Device& device, - DeviceQueueProperties properties = DeviceQueueProperties::None, - cl_int* err = nullptr) - { - cl_int error; - - cl_command_queue_properties mergedProperties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, mergedProperties, 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - } - - /*! - * Create a device command queue for a specified device in the passed context. - */ - DeviceCommandQueue( - const Context& context, - const Device& device, - cl_uint queueSize, - DeviceQueueProperties properties = DeviceQueueProperties::None, - cl_int* err = nullptr) - { - cl_int error; - - cl_command_queue_properties mergedProperties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | static_cast(properties); - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, mergedProperties, - CL_QUEUE_SIZE, queueSize, - 0 }; - object_ = ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - } - - /*! \brief Constructor from cl_command_queue - takes ownership. - * - * \param retainObject will cause the constructor to retain its cl object. - * Defaults to false to maintain compatibility with - * earlier versions. - */ - explicit DeviceCommandQueue(const cl_command_queue& commandQueue, bool retainObject = false) : - detail::Wrapper(commandQueue, retainObject) { } - - DeviceCommandQueue& operator = (const cl_command_queue& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_command_queue_info name, T* param) const - { - return detail::errHandler( - detail::getInfo( - &::clGetCommandQueueInfo, object_, name, param), - __GET_COMMAND_QUEUE_INFO_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_command_queue_info, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - - /*! - * Create a new default device command queue for the default device, - * in the default context and of the default size. - * If there is already a default queue for the specified device this - * function will return the pre-existing queue. - */ - static DeviceCommandQueue makeDefault( - cl_int *err = nullptr) - { - cl_int error; - cl::Context context = cl::Context::getDefault(); - cl::Device device = cl::Device::getDefault(); - - cl_command_queue_properties properties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, - 0 }; - DeviceCommandQueue deviceQueue( - ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error)); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - - return deviceQueue; - } - - /*! - * Create a new default device command queue for the specified device - * and of the default size. - * If there is already a default queue for the specified device this - * function will return the pre-existing queue. - */ - static DeviceCommandQueue makeDefault( - const Context &context, const Device &device, cl_int *err = nullptr) - { - cl_int error; - - cl_command_queue_properties properties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, - 0 }; - DeviceCommandQueue deviceQueue( - ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error)); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - - return deviceQueue; - } - - /*! - * Create a new default device command queue for the specified device - * and of the requested size in bytes. - * If there is already a default queue for the specified device this - * function will return the pre-existing queue. - */ - static DeviceCommandQueue makeDefault( - const Context &context, const Device &device, cl_uint queueSize, cl_int *err = nullptr) - { - cl_int error; - - cl_command_queue_properties properties = - CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | CL_QUEUE_ON_DEVICE_DEFAULT; - cl_queue_properties queue_properties[] = { - CL_QUEUE_PROPERTIES, properties, - CL_QUEUE_SIZE, queueSize, - 0 }; - DeviceCommandQueue deviceQueue( - ::clCreateCommandQueueWithProperties( - context(), device(), queue_properties, &error)); - - detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); - if (err != nullptr) { - *err = error; - } - - return deviceQueue; - } - - - -#if CL_HPP_TARGET_OPENCL_VERSION >= 210 - /*! - * Modify the default device command queue to be used for subsequent kernels. - * This can update the default command queue for a device repeatedly to account - * for kernels that rely on the default. - * @return updated default device command queue. - */ - static DeviceCommandQueue updateDefault(const Context &context, const Device &device, const DeviceCommandQueue &default_queue, cl_int *err = nullptr) - { - cl_int error; - error = clSetDefaultDeviceCommandQueue(context.get(), device.get(), default_queue.get()); - - detail::errHandler(error, __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR); - if (err != nullptr) { - *err = error; - } - return default_queue; - } - - /*! - * Return the current default command queue for the specified command queue - */ - static DeviceCommandQueue getDefault(const CommandQueue &queue, cl_int * err = nullptr) - { - return queue.getInfo(err); - } - -#endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 -}; // DeviceCommandQueue - -namespace detail -{ - // Specialization for device command queue - template <> - struct KernelArgumentHandler - { - static size_type size(const cl::DeviceCommandQueue&) { return sizeof(cl_command_queue); } - static const cl_command_queue* ptr(const cl::DeviceCommandQueue& value) { return &(value()); } - }; -} // namespace detail + enum class DeviceQueueProperties : cl_command_queue_properties { + None = 0, + Profiling = CL_QUEUE_PROFILING_ENABLE, + }; + + inline DeviceQueueProperties operator|(DeviceQueueProperties lhs, DeviceQueueProperties rhs) { + return static_cast(static_cast(lhs) | + static_cast(rhs)); + } + + /*! \class DeviceCommandQueue + * \brief DeviceCommandQueue interface for device cl_command_queues. + */ + class DeviceCommandQueue : public detail::Wrapper { + public: + /*! + * Trivial empty constructor to create a null queue. + */ + DeviceCommandQueue() {} + + /*! + * Default construct device command queue on default context and device + */ + DeviceCommandQueue(DeviceQueueProperties properties, cl_int *err = nullptr) { + cl_int error; + cl::Context context = cl::Context::getDefault(); + cl::Device device = cl::Device::getDefault(); + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | + static_cast(properties); + + cl_queue_properties queue_properties[] = {CL_QUEUE_PROPERTIES, mergedProperties, 0}; + object_ = + ::clCreateCommandQueueWithProperties(context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + } + + /*! + * Create a device command queue for a specified device in the passed context. + */ + DeviceCommandQueue(const Context &context, const Device &device, + DeviceQueueProperties properties = DeviceQueueProperties::None, + cl_int *err = nullptr) { + cl_int error; + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | + static_cast(properties); + cl_queue_properties queue_properties[] = {CL_QUEUE_PROPERTIES, mergedProperties, 0}; + object_ = + ::clCreateCommandQueueWithProperties(context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + } + + /*! + * Create a device command queue for a specified device in the passed context. + */ + DeviceCommandQueue(const Context &context, const Device &device, cl_uint queueSize, + DeviceQueueProperties properties = DeviceQueueProperties::None, + cl_int *err = nullptr) { + cl_int error; + + cl_command_queue_properties mergedProperties = + CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | CL_QUEUE_ON_DEVICE | + static_cast(properties); + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, mergedProperties, CL_QUEUE_SIZE, queueSize, 0}; + object_ = + ::clCreateCommandQueueWithProperties(context(), device(), queue_properties, &error); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + } + + /*! \brief Constructor from cl_command_queue - takes ownership. + * + * \param retainObject will cause the constructor to retain its cl object. + * Defaults to false to maintain compatibility with + * earlier versions. + */ + explicit DeviceCommandQueue(const cl_command_queue &commandQueue, + bool retainObject = false) : + detail::Wrapper(commandQueue, retainObject) {} + + DeviceCommandQueue &operator=(const cl_command_queue &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_command_queue_info name, T *param) const { + return detail::errHandler( + detail::getInfo(&::clGetCommandQueueInfo, object_, name, param), + __GET_COMMAND_QUEUE_INFO_ERR); + } + + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + + /*! + * Create a new default device command queue for the default device, + * in the default context and of the default size. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault(cl_int *err = nullptr) { + cl_int error; + cl::Context context = cl::Context::getDefault(); + cl::Device device = cl::Device::getDefault(); + + cl_command_queue_properties properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | + CL_QUEUE_ON_DEVICE | + CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = {CL_QUEUE_PROPERTIES, properties, 0}; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties(context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + + return deviceQueue; + } + + /*! + * Create a new default device command queue for the specified device + * and of the default size. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault(const Context &context, const Device &device, + cl_int *err = nullptr) { + cl_int error; + + cl_command_queue_properties properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | + CL_QUEUE_ON_DEVICE | + CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = {CL_QUEUE_PROPERTIES, properties, 0}; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties(context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + + return deviceQueue; + } + + /*! + * Create a new default device command queue for the specified device + * and of the requested size in bytes. + * If there is already a default queue for the specified device this + * function will return the pre-existing queue. + */ + static DeviceCommandQueue makeDefault(const Context &context, const Device &device, + cl_uint queueSize, cl_int *err = nullptr) { + cl_int error; + + cl_command_queue_properties properties = CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE | + CL_QUEUE_ON_DEVICE | + CL_QUEUE_ON_DEVICE_DEFAULT; + cl_queue_properties queue_properties[] = { + CL_QUEUE_PROPERTIES, properties, CL_QUEUE_SIZE, queueSize, 0}; + DeviceCommandQueue deviceQueue( + ::clCreateCommandQueueWithProperties(context(), device(), queue_properties, &error)); + + detail::errHandler(error, __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR); + if (err != nullptr) { *err = error; } + + return deviceQueue; + } + +# if CL_HPP_TARGET_OPENCL_VERSION >= 210 + /*! + * Modify the default device command queue to be used for subsequent kernels. + * This can update the default command queue for a device repeatedly to account + * for kernels that rely on the default. + * @return updated default device command queue. + */ + static DeviceCommandQueue updateDefault(const Context &context, const Device &device, + const DeviceCommandQueue &default_queue, + cl_int *err = nullptr) { + cl_int error; + error = + clSetDefaultDeviceCommandQueue(context.get(), device.get(), default_queue.get()); + + detail::errHandler(error, __SET_DEFAULT_DEVICE_COMMAND_QUEUE_ERR); + if (err != nullptr) { *err = error; } + return default_queue; + } + + /*! + * Return the current default command queue for the specified command queue + */ + static DeviceCommandQueue getDefault(const CommandQueue &queue, cl_int *err = nullptr) { + return queue.getInfo(err); + } + +# endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 210 + }; // DeviceCommandQueue + + namespace detail { + // Specialization for device command queue + template<> + struct KernelArgumentHandler { + static size_type size(const cl::DeviceCommandQueue &) { + return sizeof(cl_command_queue); + } + static const cl_command_queue *ptr(const cl::DeviceCommandQueue &value) { + return &(value()); + } + }; + } // namespace detail #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - -template< typename IteratorType > -Buffer::Buffer( - const Context &context, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if( readOnly ) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if( useHostPtr ) { - flags |= CL_MEM_USE_HOST_PTR; - } - - size_type size = sizeof(DataType)*(endIterator - startIterator); - - if( useHostPtr ) { - object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); - } else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - - if( !useHostPtr ) { - CommandQueue queue(context, 0, &error); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - } -} - -template< typename IteratorType > -Buffer::Buffer( - const CommandQueue &queue, - IteratorType startIterator, - IteratorType endIterator, - bool readOnly, - bool useHostPtr, - cl_int* err) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - cl_mem_flags flags = 0; - if (readOnly) { - flags |= CL_MEM_READ_ONLY; - } - else { - flags |= CL_MEM_READ_WRITE; - } - if (useHostPtr) { - flags |= CL_MEM_USE_HOST_PTR; - } - - size_type size = sizeof(DataType)*(endIterator - startIterator); - - Context context = queue.getInfo(); - - if (useHostPtr) { - object_ = ::clCreateBuffer(context(), flags, size, const_cast(&*startIterator), &error); - } - else { - object_ = ::clCreateBuffer(context(), flags, size, 0, &error); - } - - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - - if (!useHostPtr) { - error = cl::copy(queue, startIterator, endIterator, *this); - detail::errHandler(error, __CREATE_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - } -} - -inline cl_int enqueueReadBuffer( - const Buffer& buffer, - cl_bool blocking, - size_type offset, - size_type size, - void* ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline cl_int enqueueWriteBuffer( - const Buffer& buffer, - cl_bool blocking, - size_type offset, - size_type size, - const void* ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); -} - -inline void* enqueueMapBuffer( - const Buffer& buffer, - cl_bool blocking, - cl_map_flags flags, - size_type offset, - size_type size, - const vector* events = nullptr, - Event* event = nullptr, - cl_int* err = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - - void * result = ::clEnqueueMapBuffer( - queue(), buffer(), blocking, flags, offset, size, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (cl_event*) event, - &error); - - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (err != nullptr) { - *err = error; - } - return result; -} - + template + Buffer::Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr, cl_int *err) { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if (readOnly) { + flags |= CL_MEM_READ_ONLY; + } else { + flags |= CL_MEM_READ_WRITE; + } + if (useHostPtr) { flags |= CL_MEM_USE_HOST_PTR; } + + size_type size = sizeof(DataType) * (endIterator - startIterator); + + if (useHostPtr) { + object_ = ::clCreateBuffer( + context(), flags, size, const_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != nullptr) { *err = error; } + + if (!useHostPtr) { + CommandQueue queue(context, 0, &error); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != nullptr) { *err = error; } + + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != nullptr) { *err = error; } + } + } + + template + Buffer::Buffer(const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, + bool readOnly, bool useHostPtr, cl_int *err) { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + cl_mem_flags flags = 0; + if (readOnly) { + flags |= CL_MEM_READ_ONLY; + } else { + flags |= CL_MEM_READ_WRITE; + } + if (useHostPtr) { flags |= CL_MEM_USE_HOST_PTR; } + + size_type size = sizeof(DataType) * (endIterator - startIterator); + + Context context = queue.getInfo(); + + if (useHostPtr) { + object_ = ::clCreateBuffer( + context(), flags, size, const_cast(&*startIterator), &error); + } else { + object_ = ::clCreateBuffer(context(), flags, size, 0, &error); + } + + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != nullptr) { *err = error; } + + if (!useHostPtr) { + error = cl::copy(queue, startIterator, endIterator, *this); + detail::errHandler(error, __CREATE_BUFFER_ERR); + if (err != nullptr) { *err = error; } + } + } + + inline cl_int enqueueReadBuffer(const Buffer &buffer, cl_bool blocking, size_type offset, + size_type size, void *ptr, + const vector *events = nullptr, Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event); + } + + inline cl_int enqueueWriteBuffer(const Buffer &buffer, cl_bool blocking, size_type offset, + size_type size, const void *ptr, + const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event); + } + + inline void *enqueueMapBuffer(const Buffer &buffer, cl_bool blocking, cl_map_flags flags, + size_type offset, size_type size, + const vector *events = nullptr, Event *event = nullptr, + cl_int *err = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != nullptr) { *err = error; } + + void *result = ::clEnqueueMapBuffer( + queue(), + buffer(), + blocking, + flags, + offset, + size, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (cl_event *)event, + &error); + + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (err != nullptr) { *err = error; } + return result; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -/** - * Enqueues to the default queue a command that will allow the host to - * update a region of a coarse-grained SVM buffer. - * This variant takes a raw SVM pointer. - */ -template -inline cl_int enqueueMapSVM( - T* ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events, - Event* event) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_MAP_SVM_ERR); - } - - return queue.enqueueMapSVM( - ptr, blocking, flags, size, events, event); -} - -/** - * Enqueues to the default queue a command that will allow the host to - * update a region of a coarse-grained SVM buffer. - * This variant takes a cl::pointer instance. - */ -template -inline cl_int enqueueMapSVM( - cl::pointer &ptr, - cl_bool blocking, - cl_map_flags flags, - size_type size, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - } - - return queue.enqueueMapSVM( - ptr, blocking, flags, size, events, event); -} - -/** - * Enqueues to the default queue a command that will allow the host to - * update a region of a coarse-grained SVM buffer. - * This variant takes a cl::vector instance. - */ -template -inline cl_int enqueueMapSVM( - cl::vector &container, - cl_bool blocking, - cl_map_flags flags, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_MAP_SVM_ERR); - } - - return queue.enqueueMapSVM( - container, blocking, flags, events, event); -} + /** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a raw SVM pointer. + */ + template + inline cl_int enqueueMapSVM(T *ptr, cl_bool blocking, cl_map_flags flags, size_type size, + const vector *events, Event *event) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { return detail::errHandler(error, __ENQUEUE_MAP_SVM_ERR); } + + return queue.enqueueMapSVM(ptr, blocking, flags, size, events, event); + } + + /** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::pointer instance. + */ + template + inline cl_int enqueueMapSVM(cl::pointer &ptr, cl_bool blocking, cl_map_flags flags, + size_type size, const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { return detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); } + + return queue.enqueueMapSVM(ptr, blocking, flags, size, events, event); + } + + /** + * Enqueues to the default queue a command that will allow the host to + * update a region of a coarse-grained SVM buffer. + * This variant takes a cl::vector instance. + */ + template + inline cl_int enqueueMapSVM(cl::vector &container, cl_bool blocking, + cl_map_flags flags, const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { return detail::errHandler(error, __ENQUEUE_MAP_SVM_ERR); } + + return queue.enqueueMapSVM(container, blocking, flags, events, event); + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -inline cl_int enqueueUnmapMemObject( - const Memory& memory, - void* mapped_ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); - if (error != CL_SUCCESS) { - return error; - } - - cl_event tmp; - cl_int err = detail::errHandler( - ::clEnqueueUnmapMemObject( - queue(), memory(), mapped_ptr, - (events != nullptr) ? (cl_uint)events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*)&events->front() : nullptr, - (event != nullptr) ? &tmp : nullptr), - __ENQUEUE_UNMAP_MEM_OBJECT_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; -} + inline cl_int enqueueUnmapMemObject(const Memory &memory, void *mapped_ptr, + const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR); + if (error != CL_SUCCESS) { return error; } + + cl_event tmp; + cl_int err = detail::errHandler( + ::clEnqueueUnmapMemObject( + queue(), + memory(), + mapped_ptr, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (event != nullptr) ? &tmp : nullptr), + __ENQUEUE_UNMAP_MEM_OBJECT_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -/** - * Enqueues to the default queue a command that will release a coarse-grained - * SVM buffer back to the OpenCL runtime. - * This variant takes a raw SVM pointer. - */ -template -inline cl_int enqueueUnmapSVM( - T* ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_UNMAP_SVM_ERR); - } - - return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), - __ENQUEUE_UNMAP_SVM_ERR); - -} - -/** - * Enqueues to the default queue a command that will release a coarse-grained - * SVM buffer back to the OpenCL runtime. - * This variant takes a cl::pointer instance. - */ -template -inline cl_int enqueueUnmapSVM( - cl::pointer &ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_UNMAP_SVM_ERR); - } - - return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), - __ENQUEUE_UNMAP_SVM_ERR); -} - -/** - * Enqueues to the default queue a command that will release a coarse-grained - * SVM buffer back to the OpenCL runtime. - * This variant takes a cl::vector instance. - */ -template -inline cl_int enqueueUnmapSVM( - cl::vector &container, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) { - return detail::errHandler(error, __ENQUEUE_UNMAP_SVM_ERR); - } - - return detail::errHandler(queue.enqueueUnmapSVM(container, events, event), - __ENQUEUE_UNMAP_SVM_ERR); -} + /** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a raw SVM pointer. + */ + template + inline cl_int enqueueUnmapSVM(T *ptr, const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { return detail::errHandler(error, __ENQUEUE_UNMAP_SVM_ERR); } + + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_SVM_ERR); + } + + /** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::pointer instance. + */ + template + inline cl_int enqueueUnmapSVM(cl::pointer &ptr, const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { return detail::errHandler(error, __ENQUEUE_UNMAP_SVM_ERR); } + + return detail::errHandler(queue.enqueueUnmapSVM(ptr, events, event), + __ENQUEUE_UNMAP_SVM_ERR); + } + + /** + * Enqueues to the default queue a command that will release a coarse-grained + * SVM buffer back to the OpenCL runtime. + * This variant takes a cl::vector instance. + */ + template + inline cl_int enqueueUnmapSVM(cl::vector &container, + const vector *events = nullptr, Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) { return detail::errHandler(error, __ENQUEUE_UNMAP_SVM_ERR); } + + return detail::errHandler(queue.enqueueUnmapSVM(container, events, event), + __ENQUEUE_UNMAP_SVM_ERR); + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -inline cl_int enqueueCopyBuffer( - const Buffer& src, - const Buffer& dst, - size_type src_offset, - size_type dst_offset, - size_type size, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, startIterator, endIterator, buffer); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses default command queue. - */ -template< typename IteratorType > -inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - if (error != CL_SUCCESS) - return error; - - return cl::copy(queue, buffer, startIterator, endIterator); -} - -/** - * Blocking copy operation between iterators and a buffer. - * Host to Device. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - size_type length = endIterator-startIterator; - size_type byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } + inline cl_int enqueueCopyBuffer(const Buffer &src, const Buffer &dst, size_type src_offset, + size_type dst_offset, size_type size, + const vector *events = nullptr, Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event); + } + + /** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses default command queue. + */ + template + inline cl_int copy(IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) return error; + + return cl::copy(queue, startIterator, endIterator, buffer); + } + + /** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses default command queue. + */ + template + inline cl_int copy(const cl::Buffer &buffer, IteratorType startIterator, + IteratorType endIterator) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + if (error != CL_SUCCESS) return error; + + return cl::copy(queue, buffer, startIterator, endIterator); + } + + /** + * Blocking copy operation between iterators and a buffer. + * Host to Device. + * Uses specified queue. + */ + template + inline cl_int copy(const CommandQueue &queue, IteratorType startIterator, + IteratorType endIterator, cl::Buffer &buffer) { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + size_type length = endIterator - startIterator; + size_type byteLength = length * sizeof(DataType); + + DataType *pointer = static_cast( + queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if (error != CL_SUCCESS) { return error; } #if defined(_MSC_VER) - std::copy( - startIterator, - endIterator, - stdext::checked_array_iterator( - pointer, length)); + std::copy( + startIterator, endIterator, stdext::checked_array_iterator(pointer, length)); #else - std::copy(startIterator, endIterator, pointer); + std::copy(startIterator, endIterator, pointer); #endif - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - -/** - * Blocking copy operation between iterators and a buffer. - * Device to Host. - * Uses specified queue. - */ -template< typename IteratorType > -inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator ) -{ - typedef typename std::iterator_traits::value_type DataType; - cl_int error; - - size_type length = endIterator-startIterator; - size_type byteLength = length*sizeof(DataType); - - DataType *pointer = - static_cast(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); - // if exceptions enabled, enqueueMapBuffer will throw - if( error != CL_SUCCESS ) { - return error; - } - std::copy(pointer, pointer + length, startIterator); - Event endEvent; - error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); - // if exceptions enabled, enqueueUnmapMemObject will throw - if( error != CL_SUCCESS ) { - return error; - } - endEvent.wait(); - return CL_SUCCESS; -} - + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if (error != CL_SUCCESS) { return error; } + endEvent.wait(); + return CL_SUCCESS; + } + + /** + * Blocking copy operation between iterators and a buffer. + * Device to Host. + * Uses specified queue. + */ + template + inline cl_int copy(const CommandQueue &queue, const cl::Buffer &buffer, + IteratorType startIterator, IteratorType endIterator) { + typedef typename std::iterator_traits::value_type DataType; + cl_int error; + + size_type length = endIterator - startIterator; + size_type byteLength = length * sizeof(DataType); + + DataType *pointer = static_cast( + queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error)); + // if exceptions enabled, enqueueMapBuffer will throw + if (error != CL_SUCCESS) { return error; } + std::copy(pointer, pointer + length, startIterator); + Event endEvent; + error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent); + // if exceptions enabled, enqueueUnmapMemObject will throw + if (error != CL_SUCCESS) { return error; } + endEvent.wait(); + return CL_SUCCESS; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 200 -/** - * Blocking SVM map operation - performs a blocking map underneath. - */ -template -inline cl_int mapSVM(cl::vector &container) -{ - return enqueueMapSVM(container, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE); -} - -/** -* Blocking SVM map operation - performs a blocking map underneath. -*/ -template -inline cl_int unmapSVM(cl::vector &container) -{ - return enqueueUnmapSVM(container); -} + /** + * Blocking SVM map operation - performs a blocking map underneath. + */ + template + inline cl_int mapSVM(cl::vector &container) { + return enqueueMapSVM(container, CL_TRUE, CL_MAP_READ | CL_MAP_WRITE); + } + + /** + * Blocking SVM map operation - performs a blocking map underneath. + */ + template + inline cl_int unmapSVM(cl::vector &container) { + return enqueueUnmapSVM(container); + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 #if CL_HPP_TARGET_OPENCL_VERSION >= 110 -inline cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - void *ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueReadBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - void* ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - return enqueueReadBufferRect( - buffer, - blocking, - { buffer_offset[0], buffer_offset[1], 0 }, - { host_offset[0], host_offset[1], 0 }, - { region[0], region[1], 1 }, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - const void *ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteBufferRect( - buffer, - blocking, - buffer_offset, - host_offset, - region, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteBufferRect( - const Buffer& buffer, - cl_bool blocking, - const array& buffer_offset, - const array& host_offset, - const array& region, - size_type buffer_row_pitch, - size_type buffer_slice_pitch, - size_type host_row_pitch, - size_type host_slice_pitch, - const void* ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - return enqueueWriteBufferRect( - buffer, - blocking, - { buffer_offset[0], buffer_offset[1], 0 }, - { host_offset[0], host_offset[1], 0 }, - { region[0], region[1], 1 }, - buffer_row_pitch, - buffer_slice_pitch, - host_row_pitch, - host_slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - size_type src_row_pitch, - size_type src_slice_pitch, - size_type dst_row_pitch, - size_type dst_slice_pitch, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferRect( - src, - dst, - src_origin, - dst_origin, - region, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); -} - -inline cl_int enqueueCopyBufferRect( - const Buffer& src, - const Buffer& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - size_type src_row_pitch, - size_type src_slice_pitch, - size_type dst_row_pitch, - size_type dst_slice_pitch, - const vector* events = nullptr, - Event* event = nullptr) -{ - return enqueueCopyBufferRect( - src, - dst, - { src_origin[0], src_origin[1], 0 }, - { dst_origin[0], dst_origin[1], 0 }, - { region[0], region[1], 1 }, - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - events, - event); -} + inline cl_int enqueueReadBufferRect(const Buffer &buffer, cl_bool blocking, + const array &buffer_offset, + const array &host_offset, + const array ®ion, + size_type buffer_row_pitch, size_type buffer_slice_pitch, + size_type host_row_pitch, size_type host_slice_pitch, + void *ptr, const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueReadBufferRect(buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); + } + + inline cl_int enqueueReadBufferRect(const Buffer &buffer, cl_bool blocking, + const array &buffer_offset, + const array &host_offset, + const array ®ion, + size_type buffer_row_pitch, size_type buffer_slice_pitch, + size_type host_row_pitch, size_type host_slice_pitch, + void *ptr, const vector *events = nullptr, + Event *event = nullptr) { + return enqueueReadBufferRect(buffer, + blocking, + {buffer_offset[0], buffer_offset[1], 0}, + {host_offset[0], host_offset[1], 0}, + {region[0], region[1], 1}, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); + } + + inline cl_int enqueueWriteBufferRect(const Buffer &buffer, cl_bool blocking, + const array &buffer_offset, + const array &host_offset, + const array ®ion, + size_type buffer_row_pitch, size_type buffer_slice_pitch, + size_type host_row_pitch, size_type host_slice_pitch, + const void *ptr, const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueWriteBufferRect(buffer, + blocking, + buffer_offset, + host_offset, + region, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); + } + + inline cl_int enqueueWriteBufferRect(const Buffer &buffer, cl_bool blocking, + const array &buffer_offset, + const array &host_offset, + const array ®ion, + size_type buffer_row_pitch, size_type buffer_slice_pitch, + size_type host_row_pitch, size_type host_slice_pitch, + const void *ptr, const vector *events = nullptr, + Event *event = nullptr) { + return enqueueWriteBufferRect(buffer, + blocking, + {buffer_offset[0], buffer_offset[1], 0}, + {host_offset[0], host_offset[1], 0}, + {region[0], region[1], 1}, + buffer_row_pitch, + buffer_slice_pitch, + host_row_pitch, + host_slice_pitch, + ptr, + events, + event); + } + + inline cl_int enqueueCopyBufferRect( + const Buffer &src, const Buffer &dst, const array &src_origin, + const array &dst_origin, const array ®ion, + size_type src_row_pitch, size_type src_slice_pitch, size_type dst_row_pitch, + size_type dst_slice_pitch, const vector *events = nullptr, Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueCopyBufferRect(src, + dst, + src_origin, + dst_origin, + region, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); + } + + inline cl_int enqueueCopyBufferRect( + const Buffer &src, const Buffer &dst, const array &src_origin, + const array &dst_origin, const array ®ion, + size_type src_row_pitch, size_type src_slice_pitch, size_type dst_row_pitch, + size_type dst_slice_pitch, const vector *events = nullptr, Event *event = nullptr) { + return enqueueCopyBufferRect(src, + dst, + {src_origin[0], src_origin[1], 0}, + {dst_origin[0], dst_origin[1], 0}, + {region[0], region[1], 1}, + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + events, + event); + } #endif // CL_HPP_TARGET_OPENCL_VERSION >= 110 -inline cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - void* ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueReadImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueReadImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - void* ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - return enqueueReadImage( - image, - blocking, - { origin[0], origin[1], 0 }, - { region[0], region[1], 1 }, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - const void* ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueWriteImage( - image, - blocking, - origin, - region, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueWriteImage( - const Image& image, - cl_bool blocking, - const array& origin, - const array& region, - size_type row_pitch, - size_type slice_pitch, - const void* ptr, - const vector* events = nullptr, - Event* event = nullptr) -{ - return enqueueWriteImage( - image, - blocking, - { origin[0], origin[1], 0 }, - { region[0], region[1], 1 }, - row_pitch, - slice_pitch, - ptr, - events, - event); -} - -inline cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImage( - src, - dst, - src_origin, - dst_origin, - region, - events, - event); -} - -inline cl_int enqueueCopyImage( - const Image& src, - const Image& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) -{ - return enqueueCopyImage( - src, - dst, - { src_origin[0], src_origin[1], 0 }, - { dst_origin[0], dst_origin[1], 0 }, - { region[0], region[1], 1 }, - events, - event); -} - -inline cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const array& src_origin, - const array& region, - size_type dst_offset, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyImageToBuffer( - src, - dst, - src_origin, - region, - dst_offset, - events, - event); -} - -inline cl_int enqueueCopyImageToBuffer( - const Image& src, - const Buffer& dst, - const array& src_origin, - const array& region, - size_type dst_offset, - const vector* events = nullptr, - Event* event = nullptr) -{ - return enqueueCopyImageToBuffer( - src, - dst, - { src_origin[0], src_origin[1], 0 }, - { region[0], region[1], 1 }, - dst_offset, - events, - event); -} - -inline cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - size_type src_offset, - const array& dst_origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.enqueueCopyBufferToImage( - src, - dst, - src_offset, - dst_origin, - region, - events, - event); -} - -inline cl_int enqueueCopyBufferToImage( - const Buffer& src, - const Image& dst, - size_type src_offset, - const array& dst_origin, - const array& region, - const vector* events = nullptr, - Event* event = nullptr) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return enqueueCopyBufferToImage( - src, - dst, - src_offset, - { dst_origin[0], dst_origin[1], 0 }, - { region[0], region[1], 1 }, - events, - event); -} - -inline cl_int flush(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - return queue.flush(); -} - -inline cl_int finish(void) -{ - cl_int error; - CommandQueue queue = CommandQueue::getDefault(&error); - - if (error != CL_SUCCESS) { - return error; - } - - - return queue.finish(); -} - -class EnqueueArgs -{ -private: - CommandQueue queue_; - const NDRange offset_; - const NDRange global_; - const NDRange local_; - vector events_; - - template - friend class KernelFunctor; - -public: - EnqueueArgs(NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(Event e, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(const vector &events, NDRange global) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(const vector &events, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(const vector &events, NDRange offset, NDRange global, NDRange local) : - queue_(CommandQueue::getDefault()), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local) - { - events_.push_back(e); - } - - EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(NullRange), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global, NDRange local) : - queue_(queue), - offset_(NullRange), - global_(global), - local_(local), - events_(events) - { - - } - - EnqueueArgs(CommandQueue &queue, const vector &events, NDRange offset, NDRange global, NDRange local) : - queue_(queue), - offset_(offset), - global_(global), - local_(local), - events_(events) - { - - } -}; - - -//---------------------------------------------------------------------------------------------- - - -/** - * Type safe kernel functor. - * - */ -template -class KernelFunctor -{ -private: - Kernel kernel_; - - template - void setArgs(T0&& t0, T1s&&... t1s) - { - kernel_.setArg(index, t0); - setArgs(std::forward(t1s)...); - } - - template - void setArgs(T0&& t0) - { - kernel_.setArg(index, t0); - } - - template - void setArgs() - { - } - - -public: - KernelFunctor(Kernel kernel) : kernel_(kernel) - {} - - KernelFunctor( - const Program& program, - const string name, - cl_int * err = nullptr) : - kernel_(program, name.c_str(), err) - {} - - //! \brief Return type of the functor - typedef Event result_type; - - /** - * Enqueue kernel. - * @param args Launch parameters of the kernel. - * @param t0... List of kernel arguments based on the template type of the functor. - */ - Event operator() ( - const EnqueueArgs& args, - Ts... ts) - { - Event event; - setArgs<0>(std::forward(ts)...); - - args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; - } - - /** - * Enqueue kernel with support for error code. - * @param args Launch parameters of the kernel. - * @param t0... List of kernel arguments based on the template type of the functor. - * @param error Out parameter returning the error code from the execution. - */ - Event operator() ( - const EnqueueArgs& args, - Ts... ts, - cl_int &error) - { - Event event; - setArgs<0>(std::forward(ts)...); - - error = args.queue_.enqueueNDRangeKernel( - kernel_, - args.offset_, - args.global_, - args.local_, - &args.events_, - &event); - - return event; - } + inline cl_int enqueueReadImage(const Image &image, cl_bool blocking, + const array &origin, + const array ®ion, size_type row_pitch, + size_type slice_pitch, void *ptr, + const vector *events = nullptr, Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueReadImage( + image, blocking, origin, region, row_pitch, slice_pitch, ptr, events, event); + } + + inline cl_int enqueueReadImage(const Image &image, cl_bool blocking, + const array &origin, + const array ®ion, size_type row_pitch, + size_type slice_pitch, void *ptr, + const vector *events = nullptr, Event *event = nullptr) { + return enqueueReadImage(image, + blocking, + {origin[0], origin[1], 0}, + {region[0], region[1], 1}, + row_pitch, + slice_pitch, + ptr, + events, + event); + } + + inline cl_int enqueueWriteImage(const Image &image, cl_bool blocking, + const array &origin, + const array ®ion, size_type row_pitch, + size_type slice_pitch, const void *ptr, + const vector *events = nullptr, Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueWriteImage( + image, blocking, origin, region, row_pitch, slice_pitch, ptr, events, event); + } + + inline cl_int enqueueWriteImage(const Image &image, cl_bool blocking, + const array &origin, + const array ®ion, size_type row_pitch, + size_type slice_pitch, const void *ptr, + const vector *events = nullptr, Event *event = nullptr) { + return enqueueWriteImage(image, + blocking, + {origin[0], origin[1], 0}, + {region[0], region[1], 1}, + row_pitch, + slice_pitch, + ptr, + events, + event); + } + + inline cl_int enqueueCopyImage(const Image &src, const Image &dst, + const array &src_origin, + const array &dst_origin, + const array ®ion, + const vector *events = nullptr, Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueCopyImage(src, dst, src_origin, dst_origin, region, events, event); + } + + inline cl_int enqueueCopyImage(const Image &src, const Image &dst, + const array &src_origin, + const array &dst_origin, + const array ®ion, + const vector *events = nullptr, Event *event = nullptr) { + return enqueueCopyImage(src, + dst, + {src_origin[0], src_origin[1], 0}, + {dst_origin[0], dst_origin[1], 0}, + {region[0], region[1], 1}, + events, + event); + } + + inline cl_int enqueueCopyImageToBuffer(const Image &src, const Buffer &dst, + const array &src_origin, + const array ®ion, size_type dst_offset, + const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueCopyImageToBuffer( + src, dst, src_origin, region, dst_offset, events, event); + } + + inline cl_int enqueueCopyImageToBuffer(const Image &src, const Buffer &dst, + const array &src_origin, + const array ®ion, size_type dst_offset, + const vector *events = nullptr, + Event *event = nullptr) { + return enqueueCopyImageToBuffer(src, + dst, + {src_origin[0], src_origin[1], 0}, + {region[0], region[1], 1}, + dst_offset, + events, + event); + } + + inline cl_int enqueueCopyBufferToImage(const Buffer &src, const Image &dst, + size_type src_offset, + const array &dst_origin, + const array ®ion, + const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.enqueueCopyBufferToImage( + src, dst, src_offset, dst_origin, region, events, event); + } + + inline cl_int enqueueCopyBufferToImage(const Buffer &src, const Image &dst, + size_type src_offset, + const array &dst_origin, + const array ®ion, + const vector *events = nullptr, + Event *event = nullptr) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return enqueueCopyBufferToImage(src, + dst, + src_offset, + {dst_origin[0], dst_origin[1], 0}, + {region[0], region[1], 1}, + events, + event); + } + + inline cl_int flush(void) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.flush(); + } + + inline cl_int finish(void) { + cl_int error; + CommandQueue queue = CommandQueue::getDefault(&error); + + if (error != CL_SUCCESS) { return error; } + + return queue.finish(); + } + + class EnqueueArgs { + private: + CommandQueue queue_; + const NDRange offset_; + const NDRange global_; + const NDRange local_; + vector events_; + + template + friend class KernelFunctor; + + public: + EnqueueArgs(NDRange global) : + queue_(CommandQueue::getDefault()), offset_(NullRange), global_(global), + local_(NullRange) {} + + EnqueueArgs(NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), offset_(NullRange), global_(global), + local_(local) {} + + EnqueueArgs(NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), offset_(offset), global_(global), + local_(local) {} + + EnqueueArgs(Event e, NDRange global) : + queue_(CommandQueue::getDefault()), offset_(NullRange), global_(global), + local_(NullRange) { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), offset_(NullRange), global_(global), + local_(local) { + events_.push_back(e); + } + + EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), offset_(offset), global_(global), + local_(local) { + events_.push_back(e); + } + + EnqueueArgs(const vector &events, NDRange global) : + queue_(CommandQueue::getDefault()), offset_(NullRange), global_(global), + local_(NullRange), events_(events) {} + + EnqueueArgs(const vector &events, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), offset_(NullRange), global_(global), + local_(local), events_(events) {} + + EnqueueArgs(const vector &events, NDRange offset, NDRange global, NDRange local) : + queue_(CommandQueue::getDefault()), offset_(offset), global_(global), local_(local), + events_(events) {} + + EnqueueArgs(CommandQueue &queue, NDRange global) : + queue_(queue), offset_(NullRange), global_(global), local_(NullRange) {} + + EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : + queue_(queue), offset_(NullRange), global_(global), local_(local) {} + + EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : + queue_(queue), offset_(offset), global_(global), local_(local) {} + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : + queue_(queue), offset_(NullRange), global_(global), local_(NullRange) { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : + queue_(queue), offset_(NullRange), global_(global), local_(local) { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : + queue_(queue), offset_(offset), global_(global), local_(local) { + events_.push_back(e); + } + + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global) : + queue_(queue), offset_(NullRange), global_(global), local_(NullRange), + events_(events) {} + + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange global, + NDRange local) : + queue_(queue), + offset_(NullRange), global_(global), local_(local), events_(events) {} + + EnqueueArgs(CommandQueue &queue, const vector &events, NDRange offset, + NDRange global, NDRange local) : + queue_(queue), + offset_(offset), global_(global), local_(local), events_(events) {} + }; + + //---------------------------------------------------------------------------------------------- + + /** + * Type safe kernel functor. + * + */ + template + class KernelFunctor { + private: + Kernel kernel_; + + template + void setArgs(T0 &&t0, T1s &&...t1s) { + kernel_.setArg(index, t0); + setArgs(std::forward(t1s)...); + } + + template + void setArgs(T0 &&t0) { + kernel_.setArg(index, t0); + } + + template + void setArgs() {} + + public: + KernelFunctor(Kernel kernel) : kernel_(kernel) {} + + KernelFunctor(const Program &program, const string name, cl_int *err = nullptr) : + kernel_(program, name.c_str(), err) {} + + //! \brief Return type of the functor + typedef Event result_type; + + /** + * Enqueue kernel. + * @param args Launch parameters of the kernel. + * @param t0... List of kernel arguments based on the template type of the functor. + */ + Event operator()(const EnqueueArgs &args, Ts... ts) { + Event event; + setArgs<0>(std::forward(ts)...); + + args.queue_.enqueueNDRangeKernel( + kernel_, args.offset_, args.global_, args.local_, &args.events_, &event); + + return event; + } + + /** + * Enqueue kernel with support for error code. + * @param args Launch parameters of the kernel. + * @param t0... List of kernel arguments based on the template type of the functor. + * @param error Out parameter returning the error code from the execution. + */ + Event operator()(const EnqueueArgs &args, Ts... ts, cl_int &error) { + Event event; + setArgs<0>(std::forward(ts)...); + + error = args.queue_.enqueueNDRangeKernel( + kernel_, args.offset_, args.global_, args.local_, &args.events_, &event); + + return event; + } #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - cl_int setSVMPointers(const vector &pointerList) - { - return kernel_.setSVMPointers(pointerList); - } - - template - cl_int setSVMPointers(const T0 &t0, T1s &... ts) - { - return kernel_.setSVMPointers(t0, ts...); - } + cl_int setSVMPointers(const vector &pointerList) { + return kernel_.setSVMPointers(pointerList); + } + + template + cl_int setSVMPointers(const T0 &t0, T1s &...ts) { + return kernel_.setSVMPointers(t0, ts...); + } #endif // #if CL_HPP_TARGET_OPENCL_VERSION >= 200 - Kernel getKernel() - { - return kernel_; - } -}; - -namespace compatibility { - /** - * Backward compatibility class to ensure that cl.hpp code works with opencl.hpp. - * Please use KernelFunctor directly. - */ - template - struct make_kernel - { - typedef KernelFunctor FunctorType; - - FunctorType functor_; - - make_kernel( - const Program& program, - const string name, - cl_int * err = nullptr) : - functor_(FunctorType(program, name, err)) - {} - - make_kernel( - const Kernel kernel) : - functor_(FunctorType(kernel)) - {} - - //! \brief Return type of the functor - typedef Event result_type; - - //! \brief Function signature of kernel functor with no event dependency. - typedef Event type_( - const EnqueueArgs&, - Ts...); - - Event operator()( - const EnqueueArgs& enqueueArgs, - Ts... args) - { - return functor_( - enqueueArgs, args...); - } - }; -} // namespace compatibility + Kernel getKernel() { return kernel_; } + }; -#ifdef cl_khr_semaphore + namespace compatibility { + /** + * Backward compatibility class to ensure that cl.hpp code works with opencl.hpp. + * Please use KernelFunctor directly. + */ + template + struct make_kernel { + typedef KernelFunctor FunctorType; -#ifdef cl_khr_external_semaphore -enum ExternalSemaphoreType : cl_external_semaphore_handle_type_khr -{ - None = 0, -#ifdef cl_khr_external_semaphore_dx_fence - D3D12Fence = CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, -#endif -#ifdef cl_khr_external_semaphore_opaque_fd - OpaqueFd = CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, -#endif -#ifdef cl_khr_external_semaphore_sync_fd - SyncFd = CL_SEMAPHORE_HANDLE_SYNC_FD_KHR, -#endif -#ifdef cl_khr_external_semaphore_win32 - OpaqueWin32 = CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR, - OpaqueWin32Kmt = CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR, -#endif // cl_khr_external_semaphore_win32 -}; -#endif // cl_khr_external_semaphore + FunctorType functor_; -class Semaphore : public detail::Wrapper -{ -public: - Semaphore() : detail::Wrapper() {} - Semaphore( - const Context &context, - const vector& sema_props, - cl_int *err = nullptr) - { - /* initialization of addresses to extension functions (it is done only once) */ - std::call_once(ext_init_, initExtensions, context); - - cl_int error = CL_INVALID_OPERATION; - - if (pfn_clCreateSemaphoreWithPropertiesKHR) - { - object_ = pfn_clCreateSemaphoreWithPropertiesKHR( - context(), - sema_props.data(), - &error); - } - - detail::errHandler(error, __CREATE_SEMAPHORE_KHR_WITH_PROPERTIES_ERR); - - if (err != nullptr) { - *err = error; - } - } - Semaphore( - const vector& sema_props, - cl_int* err = nullptr):Semaphore(Context::getDefault(err), sema_props, err) {} - - explicit Semaphore(const cl_semaphore_khr& semaphore, bool retainObject = false) : - detail::Wrapper(semaphore, retainObject) {} - Semaphore& operator = (const cl_semaphore_khr& rhs) { - detail::Wrapper::operator=(rhs); - return *this; - } - template - cl_int getInfo(cl_semaphore_info_khr name, T* param) const - { - if (pfn_clGetSemaphoreInfoKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __GET_SEMAPHORE_KHR_INFO_ERR); - } - - return detail::errHandler( - detail::getInfo(pfn_clGetSemaphoreInfoKHR, object_, name, param), - __GET_SEMAPHORE_KHR_INFO_ERR); - } - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_semaphore_info_khr, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } + make_kernel(const Program &program, const string name, cl_int *err = nullptr) : + functor_(FunctorType(program, name, err)) {} -#ifdef cl_khr_external_semaphore - template - cl_int getHandleForTypeKHR( - const Device& device, cl_external_semaphore_handle_type_khr name, T* param) const - { - if (pfn_clGetSemaphoreHandleForTypeKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __GET_SEMAPHORE_HANDLE_FOR_TYPE_KHR_ERR); - } - - return detail::errHandler( - detail::getInfo( - pfn_clGetSemaphoreHandleForTypeKHR, object_, device(), name, param), - __GET_SEMAPHORE_HANDLE_FOR_TYPE_KHR_ERR); - } - - template typename - detail::param_traits::param_type - getHandleForTypeKHR(const Device& device, cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_external_semaphore_handle_type_khr, type>::param_type param; - cl_int result = getHandleForTypeKHR(device, type, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } -#endif // cl_khr_external_semaphore - - cl_int retain() - { - if (pfn_clRetainSemaphoreKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __RETAIN_SEMAPHORE_KHR_ERR); - } - return pfn_clRetainSemaphoreKHR(object_); - } - - cl_int release() - { - if (pfn_clReleaseSemaphoreKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __RELEASE_SEMAPHORE_KHR_ERR); - } - return pfn_clReleaseSemaphoreKHR(object_); - } - -private: - static std::once_flag ext_init_; - - static void initExtensions(const Context& context) - { -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - Device device = context.getInfo().at(0); - cl_platform_id platform = device.getInfo()(); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateSemaphoreWithPropertiesKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clReleaseSemaphoreKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clRetainSemaphoreKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueWaitSemaphoresKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueSignalSemaphoresKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetSemaphoreInfoKHR); -#ifdef cl_khr_external_semaphore - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetSemaphoreHandleForTypeKHR); -#endif // cl_khr_external_semaphore + make_kernel(const Kernel kernel) : functor_(FunctorType(kernel)) {} -#else - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSemaphoreWithPropertiesKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clReleaseSemaphoreKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clRetainSemaphoreKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueWaitSemaphoresKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueSignalSemaphoresKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetSemaphoreInfoKHR); -#ifdef cl_khr_external_semaphore - CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetSemaphoreHandleForTypeKHR); -#endif // cl_khr_external_semaphore + //! \brief Return type of the functor + typedef Event result_type; -#endif - if ((pfn_clCreateSemaphoreWithPropertiesKHR == nullptr) && - (pfn_clReleaseSemaphoreKHR == nullptr) && - (pfn_clRetainSemaphoreKHR == nullptr) && - (pfn_clEnqueueWaitSemaphoresKHR == nullptr) && - (pfn_clEnqueueSignalSemaphoresKHR == nullptr) && -#ifdef cl_khr_external_semaphore - (pfn_clGetSemaphoreHandleForTypeKHR == nullptr) && -#endif // cl_khr_external_semaphore - (pfn_clGetSemaphoreInfoKHR == nullptr)) - { - detail::errHandler(CL_INVALID_VALUE, __CREATE_SEMAPHORE_KHR_WITH_PROPERTIES_ERR); - } - } - -}; - -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Semaphore::ext_init_; - -inline cl_int CommandQueue::enqueueWaitSemaphores( - const vector &sema_objects, - const vector &sema_payloads, - const vector* events_wait_list, - Event *event) const -{ - cl_event tmp; - cl_int err = CL_INVALID_OPERATION; - - if (pfn_clEnqueueWaitSemaphoresKHR != nullptr) { - err = pfn_clEnqueueWaitSemaphoresKHR( - object_, - (cl_uint)sema_objects.size(), - (const cl_semaphore_khr *) &sema_objects.front(), - (sema_payloads.size() > 0) ? &sema_payloads.front() : nullptr, - (events_wait_list != nullptr) ? (cl_uint) events_wait_list->size() : 0, - (events_wait_list != nullptr && events_wait_list->size() > 0) ? (cl_event*) &events_wait_list->front() : nullptr, - (event != nullptr) ? &tmp : nullptr); - } - - detail::errHandler(err, __ENQUEUE_WAIT_SEMAPHORE_KHR_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; -} - -inline cl_int CommandQueue::enqueueSignalSemaphores( - const vector &sema_objects, - const vector& sema_payloads, - const vector* events_wait_list, - Event* event) -{ - cl_event tmp; - cl_int err = CL_INVALID_OPERATION; - - if (pfn_clEnqueueSignalSemaphoresKHR != nullptr) { - err = pfn_clEnqueueSignalSemaphoresKHR( - object_, - (cl_uint)sema_objects.size(), - (const cl_semaphore_khr*) &sema_objects.front(), - (sema_payloads.size() > 0) ? &sema_payloads.front() : nullptr, - (events_wait_list != nullptr) ? (cl_uint) events_wait_list->size() : 0, - (events_wait_list != nullptr && events_wait_list->size() > 0) ? (cl_event*) &events_wait_list->front() : nullptr, - (event != nullptr) ? &tmp : nullptr); - } - - detail::errHandler(err, __ENQUEUE_SIGNAL_SEMAPHORE_KHR_ERR); - - if (event != nullptr && err == CL_SUCCESS) - *event = tmp; - - return err; -} + //! \brief Function signature of kernel functor with no event dependency. + typedef Event type_(const EnqueueArgs &, Ts...); -#endif // cl_khr_semaphore + Event operator()(const EnqueueArgs &enqueueArgs, Ts... args) { + return functor_(enqueueArgs, args...); + } + }; + } // namespace compatibility -#if defined(cl_khr_command_buffer) -/*! \class CommandBufferKhr - * \brief CommandBufferKhr interface for cl_command_buffer_khr. - */ -class CommandBufferKhr : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to nullptr. - CommandBufferKhr() : detail::Wrapper() { } - - explicit CommandBufferKhr(const vector &queues, - cl_command_buffer_properties_khr properties = 0, - cl_int* errcode_ret = nullptr) - { - cl_command_buffer_properties_khr command_buffer_properties[] = { - CL_COMMAND_BUFFER_FLAGS_KHR, properties, 0 - }; - - /* initialization of addresses to extension functions (it is done only once) */ - std::call_once(ext_init_, [&] { initExtensions(queues[0].getInfo()); }); - cl_int error = CL_INVALID_OPERATION; - - static_assert(sizeof(cl::CommandQueue) == sizeof(cl_command_queue), - "Size of cl::CommandQueue must be equal to size of cl_command_queue"); - - if (pfn_clCreateCommandBufferKHR) - { - object_ = pfn_clCreateCommandBufferKHR((cl_uint) queues.size(), - (cl_command_queue *) &queues.front(), - command_buffer_properties, - &error); - } - - detail::errHandler(error, __CREATE_COMMAND_BUFFER_KHR_ERR); - if (errcode_ret != nullptr) { - *errcode_ret = error; - } - } - - explicit CommandBufferKhr(const cl_command_buffer_khr& commandBufferKhr, bool retainObject = false) : - detail::Wrapper(commandBufferKhr, retainObject) { } - - CommandBufferKhr& operator=(const cl_command_buffer_khr& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_command_buffer_info_khr name, T* param) const - { - if (pfn_clGetCommandBufferInfoKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __GET_COMMAND_BUFFER_INFO_KHR_ERR); - } - return detail::errHandler( - detail::getInfo(pfn_clGetCommandBufferInfoKHR, object_, name, param), - __GET_COMMAND_BUFFER_INFO_KHR_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_command_buffer_info_khr, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } - - cl_int finalizeCommandBuffer() const - { - return detail::errHandler(::clFinalizeCommandBufferKHR(object_), __FINALIZE_COMMAND_BUFFER_KHR_ERR); - } - - cl_int enqueueCommandBuffer(vector &queues, - const vector* events = nullptr, - Event* event = nullptr) - { - if (pfn_clEnqueueCommandBufferKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __ENQUEUE_COMMAND_BUFFER_KHR_ERR); - } - - static_assert(sizeof(cl::CommandQueue) == sizeof(cl_command_queue), - "Size of cl::CommandQueue must be equal to size of cl_command_queue"); - - return detail::errHandler(pfn_clEnqueueCommandBufferKHR((cl_uint) queues.size(), - (cl_command_queue *) &queues.front(), - object_, - (events != nullptr) ? (cl_uint) events->size() : 0, - (events != nullptr && events->size() > 0) ? (cl_event*) &events->front() : nullptr, - (cl_event*) event), - __ENQUEUE_COMMAND_BUFFER_KHR_ERR); - } - - cl_int commandBarrierWithWaitList(const vector* sync_points_vec = nullptr, - cl_sync_point_khr* sync_point = nullptr, - MutableCommandKhr* mutable_handle = nullptr, - const CommandQueue* command_queue = nullptr) - { - if (pfn_clCommandBarrierWithWaitListKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __COMMAND_BARRIER_WITH_WAIT_LIST_KHR_ERR); - } - - cl_sync_point_khr tmp_sync_point; - cl_int error = detail::errHandler( - pfn_clCommandBarrierWithWaitListKHR(object_, - (command_queue != nullptr) ? (*command_queue)() : nullptr, - (sync_points_vec != nullptr) ? (cl_uint) sync_points_vec->size() : 0, - (sync_points_vec != nullptr && sync_points_vec->size() > 0) ? &sync_points_vec->front() : nullptr, - (sync_point != nullptr) ? &tmp_sync_point : nullptr, - (cl_mutable_command_khr*) mutable_handle), - __COMMAND_BARRIER_WITH_WAIT_LIST_KHR_ERR); - - if (sync_point != nullptr && error == CL_SUCCESS) - *sync_point = tmp_sync_point; - - return error; - } - - cl_int commandCopyBuffer(const Buffer& src, - const Buffer& dst, - size_type src_offset, - size_type dst_offset, - size_type size, - const vector* sync_points_vec = nullptr, - cl_sync_point_khr* sync_point = nullptr, - MutableCommandKhr* mutable_handle = nullptr, - const CommandQueue* command_queue = nullptr) - { - if (pfn_clCommandCopyBufferKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __COMMAND_COPY_BUFFER_KHR_ERR); - } - - cl_sync_point_khr tmp_sync_point; - cl_int error = detail::errHandler( - pfn_clCommandCopyBufferKHR(object_, - (command_queue != nullptr) ? (*command_queue)() : nullptr, - src(), - dst(), - src_offset, - dst_offset, - size, - (sync_points_vec != nullptr) ? (cl_uint) sync_points_vec->size() : 0, - (sync_points_vec != nullptr && sync_points_vec->size() > 0) ? &sync_points_vec->front() : nullptr, - (sync_point != nullptr) ? &tmp_sync_point : nullptr, - (cl_mutable_command_khr*) mutable_handle), - __COMMAND_COPY_BUFFER_KHR_ERR); - - if (sync_point != nullptr && error == CL_SUCCESS) - *sync_point = tmp_sync_point; - - return error; - } - - cl_int commandCopyBufferRect(const Buffer& src, - const Buffer& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - size_type src_row_pitch, - size_type src_slice_pitch, - size_type dst_row_pitch, - size_type dst_slice_pitch, - const vector* sync_points_vec = nullptr, - cl_sync_point_khr* sync_point = nullptr, - MutableCommandKhr* mutable_handle = nullptr, - const CommandQueue* command_queue = nullptr) - { - if (pfn_clCommandCopyBufferRectKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __COMMAND_COPY_BUFFER_RECT_KHR_ERR); - } - - cl_sync_point_khr tmp_sync_point; - cl_int error = detail::errHandler( - pfn_clCommandCopyBufferRectKHR(object_, - (command_queue != nullptr) ? (*command_queue)() : nullptr, - src(), - dst(), - src_origin.data(), - dst_origin.data(), - region.data(), - src_row_pitch, - src_slice_pitch, - dst_row_pitch, - dst_slice_pitch, - (sync_points_vec != nullptr) ? (cl_uint) sync_points_vec->size() : 0, - (sync_points_vec != nullptr && sync_points_vec->size() > 0) ? &sync_points_vec->front() : nullptr, - (sync_point != nullptr) ? &tmp_sync_point : nullptr, - (cl_mutable_command_khr*) mutable_handle), - __COMMAND_COPY_BUFFER_RECT_KHR_ERR); - - if (sync_point != nullptr && error == CL_SUCCESS) - *sync_point = tmp_sync_point; - - return error; - } - - cl_int commandCopyBufferToImage(const Buffer& src, - const Image& dst, - size_type src_offset, - const array& dst_origin, - const array& region, - const vector* sync_points_vec = nullptr, - cl_sync_point_khr* sync_point = nullptr, - MutableCommandKhr* mutable_handle = nullptr, - const CommandQueue* command_queue = nullptr) - { - if (pfn_clCommandCopyBufferToImageKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __COMMAND_COPY_BUFFER_TO_IMAGE_KHR_ERR); - } - - cl_sync_point_khr tmp_sync_point; - cl_int error = detail::errHandler( - pfn_clCommandCopyBufferToImageKHR(object_, - (command_queue != nullptr) ? (*command_queue)() : nullptr, - src(), - dst(), - src_offset, - dst_origin.data(), - region.data(), - (sync_points_vec != nullptr) ? (cl_uint) sync_points_vec->size() : 0, - (sync_points_vec != nullptr && sync_points_vec->size() > 0) ? &sync_points_vec->front() : nullptr, - (sync_point != nullptr) ? &tmp_sync_point : nullptr, - (cl_mutable_command_khr*) mutable_handle), - __COMMAND_COPY_BUFFER_TO_IMAGE_KHR_ERR); - - if (sync_point != nullptr && error == CL_SUCCESS) - *sync_point = tmp_sync_point; - - return error; - } - - cl_int commandCopyImage(const Image& src, - const Image& dst, - const array& src_origin, - const array& dst_origin, - const array& region, - const vector* sync_points_vec = nullptr, - cl_sync_point_khr* sync_point = nullptr, - MutableCommandKhr* mutable_handle = nullptr, - const CommandQueue* command_queue = nullptr) - { - if (pfn_clCommandCopyImageKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __COMMAND_COPY_IMAGE_KHR_ERR); - } - - cl_sync_point_khr tmp_sync_point; - cl_int error = detail::errHandler( - pfn_clCommandCopyImageKHR(object_, - (command_queue != nullptr) ? (*command_queue)() : nullptr, - src(), - dst(), - src_origin.data(), - dst_origin.data(), - region.data(), - (sync_points_vec != nullptr) ? (cl_uint) sync_points_vec->size() : 0, - (sync_points_vec != nullptr && sync_points_vec->size() > 0) ? &sync_points_vec->front() : nullptr, - (sync_point != nullptr) ? &tmp_sync_point : nullptr, - (cl_mutable_command_khr*) mutable_handle), - __COMMAND_COPY_IMAGE_KHR_ERR); - - if (sync_point != nullptr && error == CL_SUCCESS) - *sync_point = tmp_sync_point; - - return error; - } - - cl_int commandCopyImageToBuffer(const Image& src, - const Buffer& dst, - const array& src_origin, - const array& region, - size_type dst_offset, - const vector* sync_points_vec = nullptr, - cl_sync_point_khr* sync_point = nullptr, - MutableCommandKhr* mutable_handle = nullptr, - const CommandQueue* command_queue = nullptr) - { - if (pfn_clCommandCopyImageToBufferKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __COMMAND_COPY_IMAGE_TO_BUFFER_KHR_ERR); - } - - cl_sync_point_khr tmp_sync_point; - cl_int error = detail::errHandler( - pfn_clCommandCopyImageToBufferKHR(object_, - (command_queue != nullptr) ? (*command_queue)() : nullptr, - src(), - dst(), - src_origin.data(), - region.data(), - dst_offset, - (sync_points_vec != nullptr) ? (cl_uint) sync_points_vec->size() : 0, - (sync_points_vec != nullptr && sync_points_vec->size() > 0) ? &sync_points_vec->front() : nullptr, - (sync_point != nullptr) ? &tmp_sync_point : nullptr, - (cl_mutable_command_khr*) mutable_handle), - __COMMAND_COPY_IMAGE_TO_BUFFER_KHR_ERR); - - if (sync_point != nullptr && error == CL_SUCCESS) - *sync_point = tmp_sync_point; - - return error; - } - - template - cl_int commandFillBuffer(const Buffer& buffer, - PatternType pattern, - size_type offset, - size_type size, - const vector* sync_points_vec = nullptr, - cl_sync_point_khr* sync_point = nullptr, - MutableCommandKhr* mutable_handle = nullptr, - const CommandQueue* command_queue = nullptr) - { - if (pfn_clCommandFillBufferKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __COMMAND_FILL_BUFFER_KHR_ERR); - } - - cl_sync_point_khr tmp_sync_point; - cl_int error = detail::errHandler( - pfn_clCommandFillBufferKHR(object_, - (command_queue != nullptr) ? (*command_queue)() : nullptr, - buffer(), - static_cast(&pattern), - sizeof(PatternType), - offset, - size, - (sync_points_vec != nullptr) ? (cl_uint) sync_points_vec->size() : 0, - (sync_points_vec != nullptr && sync_points_vec->size() > 0) ? &sync_points_vec->front() : nullptr, - (sync_point != nullptr) ? &tmp_sync_point : nullptr, - (cl_mutable_command_khr*) mutable_handle), - __COMMAND_FILL_BUFFER_KHR_ERR); - - if (sync_point != nullptr && error == CL_SUCCESS) - *sync_point = tmp_sync_point; - - return error; - } - - cl_int commandFillImage(const Image& image, - cl_float4 fillColor, - const array& origin, - const array& region, - const vector* sync_points_vec = nullptr, - cl_sync_point_khr* sync_point = nullptr, - MutableCommandKhr* mutable_handle = nullptr, - const CommandQueue* command_queue = nullptr) - { - if (pfn_clCommandFillImageKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __COMMAND_FILL_IMAGE_KHR_ERR); - } - - cl_sync_point_khr tmp_sync_point; - cl_int error = detail::errHandler( - pfn_clCommandFillImageKHR(object_, - (command_queue != nullptr) ? (*command_queue)() : nullptr, - image(), - static_cast(&fillColor), - origin.data(), - region.data(), - (sync_points_vec != nullptr) ? (cl_uint) sync_points_vec->size() : 0, - (sync_points_vec != nullptr && sync_points_vec->size() > 0) ? &sync_points_vec->front() : nullptr, - (sync_point != nullptr) ? &tmp_sync_point : nullptr, - (cl_mutable_command_khr*) mutable_handle), - __COMMAND_FILL_IMAGE_KHR_ERR); - - if (sync_point != nullptr && error == CL_SUCCESS) - *sync_point = tmp_sync_point; - - return error; - } - - cl_int commandNDRangeKernel(const cl::vector &properties, - const Kernel& kernel, - const NDRange& offset, - const NDRange& global, - const NDRange& local = NullRange, - const vector* sync_points_vec = nullptr, - cl_sync_point_khr* sync_point = nullptr, - MutableCommandKhr* mutable_handle = nullptr, - const CommandQueue* command_queue = nullptr) - { - if (pfn_clCommandNDRangeKernelKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __COMMAND_NDRANGE_KERNEL_KHR_ERR); - } - - cl_sync_point_khr tmp_sync_point; - cl_int error = detail::errHandler( - pfn_clCommandNDRangeKernelKHR(object_, - (command_queue != nullptr) ? (*command_queue)() : nullptr, - &properties[0], - kernel(), - (cl_uint) global.dimensions(), - offset.dimensions() != 0 ? (const size_type*) offset : nullptr, - (const size_type*) global, - local.dimensions() != 0 ? (const size_type*) local : nullptr, - (sync_points_vec != nullptr) ? (cl_uint) sync_points_vec->size() : 0, - (sync_points_vec != nullptr && sync_points_vec->size() > 0) ? &sync_points_vec->front() : nullptr, - (sync_point != nullptr) ? &tmp_sync_point : nullptr, - (cl_mutable_command_khr*) mutable_handle), - __COMMAND_NDRANGE_KERNEL_KHR_ERR); - - if (sync_point != nullptr && error == CL_SUCCESS) - *sync_point = tmp_sync_point; - - return error; - } - -#if defined(cl_khr_command_buffer_mutable_dispatch) - cl_int updateMutableCommands(const cl_mutable_base_config_khr* mutable_config) - { - if (pfn_clUpdateMutableCommandsKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __UPDATE_MUTABLE_COMMANDS_KHR_ERR); - } - return detail::errHandler(pfn_clUpdateMutableCommandsKHR(object_, mutable_config), - __UPDATE_MUTABLE_COMMANDS_KHR_ERR); - } -#endif /* cl_khr_command_buffer_mutable_dispatch */ - -private: - static std::once_flag ext_init_; +#ifdef cl_khr_semaphore - static void initExtensions(const cl::Device& device) - { -#if CL_HPP_TARGET_OPENCL_VERSION >= 120 - cl_platform_id platform = device.getInfo()(); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clFinalizeCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clRetainCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clReleaseCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetCommandBufferInfoKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandBarrierWithWaitListKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyBufferRectKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyBufferToImageKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyImageKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyImageToBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandFillBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandFillImageKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandNDRangeKernelKHR); -#if defined(cl_khr_command_buffer_mutable_dispatch) - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clUpdateMutableCommandsKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetMutableCommandInfoKHR); -#endif /* cl_khr_command_buffer_mutable_dispatch */ -#elif CL_HPP_TARGET_OPENCL_VERSION >= 110 - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clFinalizeCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clRetainCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clReleaseCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetCommandBufferInfoKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueCommandBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandBarrierWithWaitListKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyBufferRectKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyBufferToImageKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyImageKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyImageToBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandFillBufferKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandFillImageKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandNDRangeKernelKHR); -#if defined(cl_khr_command_buffer_mutable_dispatch) - CL_HPP_INIT_CL_EXT_FCN_PTR_(clUpdateMutableCommandsKHR); - CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetMutableCommandInfoKHR); -#endif /* cl_khr_command_buffer_mutable_dispatch */ -#endif - if ((pfn_clCreateCommandBufferKHR == nullptr) && - (pfn_clFinalizeCommandBufferKHR == nullptr) && - (pfn_clRetainCommandBufferKHR == nullptr) && - (pfn_clReleaseCommandBufferKHR == nullptr) && - (pfn_clGetCommandBufferInfoKHR == nullptr) && - (pfn_clEnqueueCommandBufferKHR == nullptr) && - (pfn_clCommandBarrierWithWaitListKHR == nullptr) && - (pfn_clCommandCopyBufferKHR == nullptr) && - (pfn_clCommandCopyBufferRectKHR == nullptr) && - (pfn_clCommandCopyBufferToImageKHR == nullptr) && - (pfn_clCommandCopyImageKHR == nullptr) && - (pfn_clCommandCopyImageToBufferKHR == nullptr) && - (pfn_clCommandFillBufferKHR == nullptr) && - (pfn_clCommandFillImageKHR == nullptr) && - (pfn_clCommandNDRangeKernelKHR == nullptr) -#if defined(cl_khr_command_buffer_mutable_dispatch) - && (pfn_clUpdateMutableCommandsKHR == nullptr) - && (pfn_clGetMutableCommandInfoKHR == nullptr) -#endif /* cl_khr_command_buffer_mutable_dispatch */ - ) - { - detail::errHandler(CL_INVALID_VALUE, __CREATE_COMMAND_BUFFER_KHR_ERR); - } - } -}; // CommandBufferKhr +# ifdef cl_khr_external_semaphore + enum ExternalSemaphoreType : cl_external_semaphore_handle_type_khr { + None = 0, +# ifdef cl_khr_external_semaphore_dx_fence + D3D12Fence = CL_SEMAPHORE_HANDLE_D3D12_FENCE_KHR, +# endif +# ifdef cl_khr_external_semaphore_opaque_fd + OpaqueFd = CL_SEMAPHORE_HANDLE_OPAQUE_FD_KHR, +# endif +# ifdef cl_khr_external_semaphore_sync_fd + SyncFd = CL_SEMAPHORE_HANDLE_SYNC_FD_KHR, +# endif +# ifdef cl_khr_external_semaphore_win32 + OpaqueWin32 = CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KHR, + OpaqueWin32Kmt = CL_SEMAPHORE_HANDLE_OPAQUE_WIN32_KMT_KHR, +# endif // cl_khr_external_semaphore_win32 + }; +# endif // cl_khr_external_semaphore + + class Semaphore : public detail::Wrapper { + public: + Semaphore() : detail::Wrapper() {} + Semaphore(const Context &context, const vector &sema_props, + cl_int *err = nullptr) { + /* initialization of addresses to extension functions (it is done only once) */ + std::call_once(ext_init_, initExtensions, context); + + cl_int error = CL_INVALID_OPERATION; + + if (pfn_clCreateSemaphoreWithPropertiesKHR) { + object_ = + pfn_clCreateSemaphoreWithPropertiesKHR(context(), sema_props.data(), &error); + } + + detail::errHandler(error, __CREATE_SEMAPHORE_KHR_WITH_PROPERTIES_ERR); + + if (err != nullptr) { *err = error; } + } + Semaphore(const vector &sema_props, cl_int *err = nullptr) : + Semaphore(Context::getDefault(err), sema_props, err) {} + + explicit Semaphore(const cl_semaphore_khr &semaphore, bool retainObject = false) : + detail::Wrapper(semaphore, retainObject) {} + Semaphore &operator=(const cl_semaphore_khr &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + template + cl_int getInfo(cl_semaphore_info_khr name, T *param) const { + if (pfn_clGetSemaphoreInfoKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __GET_SEMAPHORE_KHR_INFO_ERR); + } + + return detail::errHandler( + detail::getInfo(pfn_clGetSemaphoreInfoKHR, object_, name, param), + __GET_SEMAPHORE_KHR_INFO_ERR); + } + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + +# ifdef cl_khr_external_semaphore + template + cl_int getHandleForTypeKHR(const Device &device, cl_external_semaphore_handle_type_khr name, + T *param) const { + if (pfn_clGetSemaphoreHandleForTypeKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, + __GET_SEMAPHORE_HANDLE_FOR_TYPE_KHR_ERR); + } + + return detail::errHandler( + detail::getInfo(pfn_clGetSemaphoreHandleForTypeKHR, object_, device(), name, param), + __GET_SEMAPHORE_HANDLE_FOR_TYPE_KHR_ERR); + } + + template + typename detail::param_traits::param_type + getHandleForTypeKHR(const Device &device, cl_int *err = nullptr) const { + typename detail::param_traits::param_type param; + cl_int result = getHandleForTypeKHR(device, type, ¶m); + if (err != nullptr) { *err = result; } + return param; + } +# endif // cl_khr_external_semaphore + + cl_int retain() { + if (pfn_clRetainSemaphoreKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __RETAIN_SEMAPHORE_KHR_ERR); + } + return pfn_clRetainSemaphoreKHR(object_); + } + + cl_int release() { + if (pfn_clReleaseSemaphoreKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __RELEASE_SEMAPHORE_KHR_ERR); + } + return pfn_clReleaseSemaphoreKHR(object_); + } + + private: + static std::once_flag ext_init_; + + static void initExtensions(const Context &context) { +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 + Device device = context.getInfo().at(0); + cl_platform_id platform = device.getInfo()(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateSemaphoreWithPropertiesKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clReleaseSemaphoreKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clRetainSemaphoreKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueWaitSemaphoresKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueSignalSemaphoresKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetSemaphoreInfoKHR); +# ifdef cl_khr_external_semaphore + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetSemaphoreHandleForTypeKHR); +# endif // cl_khr_external_semaphore + +# else + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateSemaphoreWithPropertiesKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clReleaseSemaphoreKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clRetainSemaphoreKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueWaitSemaphoresKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueSignalSemaphoresKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetSemaphoreInfoKHR); +# ifdef cl_khr_external_semaphore + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetSemaphoreHandleForTypeKHR); +# endif // cl_khr_external_semaphore + +# endif + if ((pfn_clCreateSemaphoreWithPropertiesKHR == nullptr) && + (pfn_clReleaseSemaphoreKHR == nullptr) && (pfn_clRetainSemaphoreKHR == nullptr) && + (pfn_clEnqueueWaitSemaphoresKHR == nullptr) && + (pfn_clEnqueueSignalSemaphoresKHR == nullptr) && +# ifdef cl_khr_external_semaphore + (pfn_clGetSemaphoreHandleForTypeKHR == nullptr) && +# endif // cl_khr_external_semaphore + (pfn_clGetSemaphoreInfoKHR == nullptr)) { + detail::errHandler(CL_INVALID_VALUE, __CREATE_SEMAPHORE_KHR_WITH_PROPERTIES_ERR); + } + } + }; + + CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag Semaphore::ext_init_; + + inline cl_int + CommandQueue::enqueueWaitSemaphores(const vector &sema_objects, + const vector &sema_payloads, + const vector *events_wait_list, Event *event) const { + cl_event tmp; + cl_int err = CL_INVALID_OPERATION; + + if (pfn_clEnqueueWaitSemaphoresKHR != nullptr) { + err = pfn_clEnqueueWaitSemaphoresKHR( + object_, + (cl_uint)sema_objects.size(), + (const cl_semaphore_khr *)&sema_objects.front(), + (sema_payloads.size() > 0) ? &sema_payloads.front() : nullptr, + (events_wait_list != nullptr) ? (cl_uint)events_wait_list->size() : 0, + (events_wait_list != nullptr && events_wait_list->size() > 0) + ? (cl_event *)&events_wait_list->front() + : nullptr, + (event != nullptr) ? &tmp : nullptr); + } + + detail::errHandler(err, __ENQUEUE_WAIT_SEMAPHORE_KHR_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } + + inline cl_int + CommandQueue::enqueueSignalSemaphores(const vector &sema_objects, + const vector &sema_payloads, + const vector *events_wait_list, Event *event) { + cl_event tmp; + cl_int err = CL_INVALID_OPERATION; + + if (pfn_clEnqueueSignalSemaphoresKHR != nullptr) { + err = pfn_clEnqueueSignalSemaphoresKHR( + object_, + (cl_uint)sema_objects.size(), + (const cl_semaphore_khr *)&sema_objects.front(), + (sema_payloads.size() > 0) ? &sema_payloads.front() : nullptr, + (events_wait_list != nullptr) ? (cl_uint)events_wait_list->size() : 0, + (events_wait_list != nullptr && events_wait_list->size() > 0) + ? (cl_event *)&events_wait_list->front() + : nullptr, + (event != nullptr) ? &tmp : nullptr); + } + + detail::errHandler(err, __ENQUEUE_SIGNAL_SEMAPHORE_KHR_ERR); + + if (event != nullptr && err == CL_SUCCESS) *event = tmp; + + return err; + } -CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandBufferKhr::ext_init_; +#endif // cl_khr_semaphore -#if defined(cl_khr_command_buffer_mutable_dispatch) -/*! \class MutableCommandKhr - * \brief MutableCommandKhr interface for cl_mutable_command_khr. - */ -class MutableCommandKhr : public detail::Wrapper -{ -public: - //! \brief Default constructor - initializes to nullptr. - MutableCommandKhr() : detail::Wrapper() { } - - explicit MutableCommandKhr(const cl_mutable_command_khr& mutableCommandKhr, bool retainObject = false) : - detail::Wrapper(mutableCommandKhr, retainObject) { } - - MutableCommandKhr& operator=(const cl_mutable_command_khr& rhs) - { - detail::Wrapper::operator=(rhs); - return *this; - } - - template - cl_int getInfo(cl_mutable_command_info_khr name, T* param) const - { - if (pfn_clGetMutableCommandInfoKHR == nullptr) { - return detail::errHandler(CL_INVALID_OPERATION, - __GET_MUTABLE_COMMAND_INFO_KHR_ERR); - } - return detail::errHandler( - detail::getInfo(pfn_clGetMutableCommandInfoKHR, object_, name, param), - __GET_MUTABLE_COMMAND_INFO_KHR_ERR); - } - - template typename - detail::param_traits::param_type - getInfo(cl_int* err = nullptr) const - { - typename detail::param_traits< - detail::cl_mutable_command_info_khr, name>::param_type param; - cl_int result = getInfo(name, ¶m); - if (err != nullptr) { - *err = result; - } - return param; - } -}; // MutableCommandKhr -#endif /* cl_khr_command_buffer_mutable_dispatch */ +#if defined(cl_khr_command_buffer) + /*! \class CommandBufferKhr + * \brief CommandBufferKhr interface for cl_command_buffer_khr. + */ + class CommandBufferKhr : public detail::Wrapper { + public: + //! \brief Default constructor - initializes to nullptr. + CommandBufferKhr() : detail::Wrapper() {} + + explicit CommandBufferKhr(const vector &queues, + cl_command_buffer_properties_khr properties = 0, + cl_int *errcode_ret = nullptr) { + cl_command_buffer_properties_khr command_buffer_properties[] = { + CL_COMMAND_BUFFER_FLAGS_KHR, properties, 0}; + + /* initialization of addresses to extension functions (it is done only once) */ + std::call_once(ext_init_, + [&] { initExtensions(queues[0].getInfo()); }); + cl_int error = CL_INVALID_OPERATION; + + static_assert(sizeof(cl::CommandQueue) == sizeof(cl_command_queue), + "Size of cl::CommandQueue must be equal to size of cl_command_queue"); + + if (pfn_clCreateCommandBufferKHR) { + object_ = pfn_clCreateCommandBufferKHR((cl_uint)queues.size(), + (cl_command_queue *)&queues.front(), + command_buffer_properties, + &error); + } + + detail::errHandler(error, __CREATE_COMMAND_BUFFER_KHR_ERR); + if (errcode_ret != nullptr) { *errcode_ret = error; } + } + + explicit CommandBufferKhr(const cl_command_buffer_khr &commandBufferKhr, + bool retainObject = false) : + detail::Wrapper(commandBufferKhr, retainObject) {} + + CommandBufferKhr &operator=(const cl_command_buffer_khr &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_command_buffer_info_khr name, T *param) const { + if (pfn_clGetCommandBufferInfoKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __GET_COMMAND_BUFFER_INFO_KHR_ERR); + } + return detail::errHandler( + detail::getInfo(pfn_clGetCommandBufferInfoKHR, object_, name, param), + __GET_COMMAND_BUFFER_INFO_KHR_ERR); + } + + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type + param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + + cl_int finalizeCommandBuffer() const { + return detail::errHandler(::clFinalizeCommandBufferKHR(object_), + __FINALIZE_COMMAND_BUFFER_KHR_ERR); + } + + cl_int enqueueCommandBuffer(vector &queues, + const vector *events = nullptr, Event *event = nullptr) { + if (pfn_clEnqueueCommandBufferKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __ENQUEUE_COMMAND_BUFFER_KHR_ERR); + } + + static_assert(sizeof(cl::CommandQueue) == sizeof(cl_command_queue), + "Size of cl::CommandQueue must be equal to size of cl_command_queue"); + + return detail::errHandler( + pfn_clEnqueueCommandBufferKHR( + (cl_uint)queues.size(), + (cl_command_queue *)&queues.front(), + object_, + (events != nullptr) ? (cl_uint)events->size() : 0, + (events != nullptr && events->size() > 0) ? (cl_event *)&events->front() : nullptr, + (cl_event *)event), + __ENQUEUE_COMMAND_BUFFER_KHR_ERR); + } + + cl_int + commandBarrierWithWaitList(const vector *sync_points_vec = nullptr, + cl_sync_point_khr *sync_point = nullptr, + MutableCommandKhr *mutable_handle = nullptr, + const CommandQueue *command_queue = nullptr) { + if (pfn_clCommandBarrierWithWaitListKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, + __COMMAND_BARRIER_WITH_WAIT_LIST_KHR_ERR); + } + + cl_sync_point_khr tmp_sync_point; + cl_int error = detail::errHandler( + pfn_clCommandBarrierWithWaitListKHR( + object_, + (command_queue != nullptr) ? (*command_queue)() : nullptr, + (sync_points_vec != nullptr) ? (cl_uint)sync_points_vec->size() : 0, + (sync_points_vec != nullptr && sync_points_vec->size() > 0) + ? &sync_points_vec->front() + : nullptr, + (sync_point != nullptr) ? &tmp_sync_point : nullptr, + (cl_mutable_command_khr *)mutable_handle), + __COMMAND_BARRIER_WITH_WAIT_LIST_KHR_ERR); + + if (sync_point != nullptr && error == CL_SUCCESS) *sync_point = tmp_sync_point; + + return error; + } + + cl_int commandCopyBuffer(const Buffer &src, const Buffer &dst, size_type src_offset, + size_type dst_offset, size_type size, + const vector *sync_points_vec = nullptr, + cl_sync_point_khr *sync_point = nullptr, + MutableCommandKhr *mutable_handle = nullptr, + const CommandQueue *command_queue = nullptr) { + if (pfn_clCommandCopyBufferKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __COMMAND_COPY_BUFFER_KHR_ERR); + } + + cl_sync_point_khr tmp_sync_point; + cl_int error = detail::errHandler( + pfn_clCommandCopyBufferKHR( + object_, + (command_queue != nullptr) ? (*command_queue)() : nullptr, + src(), + dst(), + src_offset, + dst_offset, + size, + (sync_points_vec != nullptr) ? (cl_uint)sync_points_vec->size() : 0, + (sync_points_vec != nullptr && sync_points_vec->size() > 0) + ? &sync_points_vec->front() + : nullptr, + (sync_point != nullptr) ? &tmp_sync_point : nullptr, + (cl_mutable_command_khr *)mutable_handle), + __COMMAND_COPY_BUFFER_KHR_ERR); + + if (sync_point != nullptr && error == CL_SUCCESS) *sync_point = tmp_sync_point; + + return error; + } + + cl_int commandCopyBufferRect( + const Buffer &src, const Buffer &dst, const array &src_origin, + const array &dst_origin, const array ®ion, + size_type src_row_pitch, size_type src_slice_pitch, size_type dst_row_pitch, + size_type dst_slice_pitch, const vector *sync_points_vec = nullptr, + cl_sync_point_khr *sync_point = nullptr, MutableCommandKhr *mutable_handle = nullptr, + const CommandQueue *command_queue = nullptr) { + if (pfn_clCommandCopyBufferRectKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __COMMAND_COPY_BUFFER_RECT_KHR_ERR); + } + + cl_sync_point_khr tmp_sync_point; + cl_int error = detail::errHandler( + pfn_clCommandCopyBufferRectKHR( + object_, + (command_queue != nullptr) ? (*command_queue)() : nullptr, + src(), + dst(), + src_origin.data(), + dst_origin.data(), + region.data(), + src_row_pitch, + src_slice_pitch, + dst_row_pitch, + dst_slice_pitch, + (sync_points_vec != nullptr) ? (cl_uint)sync_points_vec->size() : 0, + (sync_points_vec != nullptr && sync_points_vec->size() > 0) + ? &sync_points_vec->front() + : nullptr, + (sync_point != nullptr) ? &tmp_sync_point : nullptr, + (cl_mutable_command_khr *)mutable_handle), + __COMMAND_COPY_BUFFER_RECT_KHR_ERR); + + if (sync_point != nullptr && error == CL_SUCCESS) *sync_point = tmp_sync_point; + + return error; + } + + cl_int commandCopyBufferToImage(const Buffer &src, const Image &dst, size_type src_offset, + const array &dst_origin, + const array ®ion, + const vector *sync_points_vec = nullptr, + cl_sync_point_khr *sync_point = nullptr, + MutableCommandKhr *mutable_handle = nullptr, + const CommandQueue *command_queue = nullptr) { + if (pfn_clCommandCopyBufferToImageKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, + __COMMAND_COPY_BUFFER_TO_IMAGE_KHR_ERR); + } + + cl_sync_point_khr tmp_sync_point; + cl_int error = detail::errHandler( + pfn_clCommandCopyBufferToImageKHR( + object_, + (command_queue != nullptr) ? (*command_queue)() : nullptr, + src(), + dst(), + src_offset, + dst_origin.data(), + region.data(), + (sync_points_vec != nullptr) ? (cl_uint)sync_points_vec->size() : 0, + (sync_points_vec != nullptr && sync_points_vec->size() > 0) + ? &sync_points_vec->front() + : nullptr, + (sync_point != nullptr) ? &tmp_sync_point : nullptr, + (cl_mutable_command_khr *)mutable_handle), + __COMMAND_COPY_BUFFER_TO_IMAGE_KHR_ERR); + + if (sync_point != nullptr && error == CL_SUCCESS) *sync_point = tmp_sync_point; + + return error; + } + + cl_int commandCopyImage(const Image &src, const Image &dst, + const array &src_origin, + const array &dst_origin, + const array ®ion, + const vector *sync_points_vec = nullptr, + cl_sync_point_khr *sync_point = nullptr, + MutableCommandKhr *mutable_handle = nullptr, + const CommandQueue *command_queue = nullptr) { + if (pfn_clCommandCopyImageKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __COMMAND_COPY_IMAGE_KHR_ERR); + } + + cl_sync_point_khr tmp_sync_point; + cl_int error = detail::errHandler( + pfn_clCommandCopyImageKHR( + object_, + (command_queue != nullptr) ? (*command_queue)() : nullptr, + src(), + dst(), + src_origin.data(), + dst_origin.data(), + region.data(), + (sync_points_vec != nullptr) ? (cl_uint)sync_points_vec->size() : 0, + (sync_points_vec != nullptr && sync_points_vec->size() > 0) + ? &sync_points_vec->front() + : nullptr, + (sync_point != nullptr) ? &tmp_sync_point : nullptr, + (cl_mutable_command_khr *)mutable_handle), + __COMMAND_COPY_IMAGE_KHR_ERR); + + if (sync_point != nullptr && error == CL_SUCCESS) *sync_point = tmp_sync_point; + + return error; + } + + cl_int commandCopyImageToBuffer(const Image &src, const Buffer &dst, + const array &src_origin, + const array ®ion, size_type dst_offset, + const vector *sync_points_vec = nullptr, + cl_sync_point_khr *sync_point = nullptr, + MutableCommandKhr *mutable_handle = nullptr, + const CommandQueue *command_queue = nullptr) { + if (pfn_clCommandCopyImageToBufferKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, + __COMMAND_COPY_IMAGE_TO_BUFFER_KHR_ERR); + } + + cl_sync_point_khr tmp_sync_point; + cl_int error = detail::errHandler( + pfn_clCommandCopyImageToBufferKHR( + object_, + (command_queue != nullptr) ? (*command_queue)() : nullptr, + src(), + dst(), + src_origin.data(), + region.data(), + dst_offset, + (sync_points_vec != nullptr) ? (cl_uint)sync_points_vec->size() : 0, + (sync_points_vec != nullptr && sync_points_vec->size() > 0) + ? &sync_points_vec->front() + : nullptr, + (sync_point != nullptr) ? &tmp_sync_point : nullptr, + (cl_mutable_command_khr *)mutable_handle), + __COMMAND_COPY_IMAGE_TO_BUFFER_KHR_ERR); + + if (sync_point != nullptr && error == CL_SUCCESS) *sync_point = tmp_sync_point; + + return error; + } + + template + cl_int commandFillBuffer(const Buffer &buffer, PatternType pattern, size_type offset, + size_type size, + const vector *sync_points_vec = nullptr, + cl_sync_point_khr *sync_point = nullptr, + MutableCommandKhr *mutable_handle = nullptr, + const CommandQueue *command_queue = nullptr) { + if (pfn_clCommandFillBufferKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __COMMAND_FILL_BUFFER_KHR_ERR); + } + + cl_sync_point_khr tmp_sync_point; + cl_int error = detail::errHandler( + pfn_clCommandFillBufferKHR( + object_, + (command_queue != nullptr) ? (*command_queue)() : nullptr, + buffer(), + static_cast(&pattern), + sizeof(PatternType), + offset, + size, + (sync_points_vec != nullptr) ? (cl_uint)sync_points_vec->size() : 0, + (sync_points_vec != nullptr && sync_points_vec->size() > 0) + ? &sync_points_vec->front() + : nullptr, + (sync_point != nullptr) ? &tmp_sync_point : nullptr, + (cl_mutable_command_khr *)mutable_handle), + __COMMAND_FILL_BUFFER_KHR_ERR); + + if (sync_point != nullptr && error == CL_SUCCESS) *sync_point = tmp_sync_point; + + return error; + } + + cl_int commandFillImage(const Image &image, cl_float4 fillColor, + const array &origin, + const array ®ion, + const vector *sync_points_vec = nullptr, + cl_sync_point_khr *sync_point = nullptr, + MutableCommandKhr *mutable_handle = nullptr, + const CommandQueue *command_queue = nullptr) { + if (pfn_clCommandFillImageKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __COMMAND_FILL_IMAGE_KHR_ERR); + } + + cl_sync_point_khr tmp_sync_point; + cl_int error = detail::errHandler( + pfn_clCommandFillImageKHR( + object_, + (command_queue != nullptr) ? (*command_queue)() : nullptr, + image(), + static_cast(&fillColor), + origin.data(), + region.data(), + (sync_points_vec != nullptr) ? (cl_uint)sync_points_vec->size() : 0, + (sync_points_vec != nullptr && sync_points_vec->size() > 0) + ? &sync_points_vec->front() + : nullptr, + (sync_point != nullptr) ? &tmp_sync_point : nullptr, + (cl_mutable_command_khr *)mutable_handle), + __COMMAND_FILL_IMAGE_KHR_ERR); + + if (sync_point != nullptr && error == CL_SUCCESS) *sync_point = tmp_sync_point; + + return error; + } + + cl_int + commandNDRangeKernel(const cl::vector &properties, + const Kernel &kernel, const NDRange &offset, const NDRange &global, + const NDRange &local = NullRange, + const vector *sync_points_vec = nullptr, + cl_sync_point_khr *sync_point = nullptr, + MutableCommandKhr *mutable_handle = nullptr, + const CommandQueue *command_queue = nullptr) { + if (pfn_clCommandNDRangeKernelKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __COMMAND_NDRANGE_KERNEL_KHR_ERR); + } + + cl_sync_point_khr tmp_sync_point; + cl_int error = detail::errHandler( + pfn_clCommandNDRangeKernelKHR( + object_, + (command_queue != nullptr) ? (*command_queue)() : nullptr, + &properties[0], + kernel(), + (cl_uint)global.dimensions(), + offset.dimensions() != 0 ? (const size_type *)offset : nullptr, + (const size_type *)global, + local.dimensions() != 0 ? (const size_type *)local : nullptr, + (sync_points_vec != nullptr) ? (cl_uint)sync_points_vec->size() : 0, + (sync_points_vec != nullptr && sync_points_vec->size() > 0) + ? &sync_points_vec->front() + : nullptr, + (sync_point != nullptr) ? &tmp_sync_point : nullptr, + (cl_mutable_command_khr *)mutable_handle), + __COMMAND_NDRANGE_KERNEL_KHR_ERR); + + if (sync_point != nullptr && error == CL_SUCCESS) *sync_point = tmp_sync_point; + + return error; + } + +# if defined(cl_khr_command_buffer_mutable_dispatch) + cl_int updateMutableCommands(const cl_mutable_base_config_khr *mutable_config) { + if (pfn_clUpdateMutableCommandsKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __UPDATE_MUTABLE_COMMANDS_KHR_ERR); + } + return detail::errHandler(pfn_clUpdateMutableCommandsKHR(object_, mutable_config), + __UPDATE_MUTABLE_COMMANDS_KHR_ERR); + } +# endif /* cl_khr_command_buffer_mutable_dispatch */ + + private: + static std::once_flag ext_init_; + + static void initExtensions(const cl::Device &device) { +# if CL_HPP_TARGET_OPENCL_VERSION >= 120 + cl_platform_id platform = device.getInfo()(); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCreateCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clFinalizeCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clRetainCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clReleaseCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetCommandBufferInfoKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clEnqueueCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandBarrierWithWaitListKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyBufferRectKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyBufferToImageKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyImageKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandCopyImageToBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandFillBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandFillImageKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clCommandNDRangeKernelKHR); +# if defined(cl_khr_command_buffer_mutable_dispatch) + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clUpdateMutableCommandsKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_PLATFORM_(platform, clGetMutableCommandInfoKHR); +# endif /* cl_khr_command_buffer_mutable_dispatch */ +# elif CL_HPP_TARGET_OPENCL_VERSION >= 110 + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCreateCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clFinalizeCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clRetainCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clReleaseCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetCommandBufferInfoKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clEnqueueCommandBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandBarrierWithWaitListKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyBufferRectKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyBufferToImageKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyImageKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandCopyImageToBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandFillBufferKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandFillImageKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clCommandNDRangeKernelKHR); +# if defined(cl_khr_command_buffer_mutable_dispatch) + CL_HPP_INIT_CL_EXT_FCN_PTR_(clUpdateMutableCommandsKHR); + CL_HPP_INIT_CL_EXT_FCN_PTR_(clGetMutableCommandInfoKHR); +# endif /* cl_khr_command_buffer_mutable_dispatch */ +# endif + if ((pfn_clCreateCommandBufferKHR == nullptr) && + (pfn_clFinalizeCommandBufferKHR == nullptr) && + (pfn_clRetainCommandBufferKHR == nullptr) && + (pfn_clReleaseCommandBufferKHR == nullptr) && + (pfn_clGetCommandBufferInfoKHR == nullptr) && + (pfn_clEnqueueCommandBufferKHR == nullptr) && + (pfn_clCommandBarrierWithWaitListKHR == nullptr) && + (pfn_clCommandCopyBufferKHR == nullptr) && + (pfn_clCommandCopyBufferRectKHR == nullptr) && + (pfn_clCommandCopyBufferToImageKHR == nullptr) && + (pfn_clCommandCopyImageKHR == nullptr) && + (pfn_clCommandCopyImageToBufferKHR == nullptr) && + (pfn_clCommandFillBufferKHR == nullptr) && (pfn_clCommandFillImageKHR == nullptr) && + (pfn_clCommandNDRangeKernelKHR == nullptr) +# if defined(cl_khr_command_buffer_mutable_dispatch) + && (pfn_clUpdateMutableCommandsKHR == nullptr) && + (pfn_clGetMutableCommandInfoKHR == nullptr) +# endif /* cl_khr_command_buffer_mutable_dispatch */ + ) { + detail::errHandler(CL_INVALID_VALUE, __CREATE_COMMAND_BUFFER_KHR_ERR); + } + } + }; // CommandBufferKhr + + CL_HPP_DEFINE_STATIC_MEMBER_ std::once_flag CommandBufferKhr::ext_init_; + +# if defined(cl_khr_command_buffer_mutable_dispatch) + /*! \class MutableCommandKhr + * \brief MutableCommandKhr interface for cl_mutable_command_khr. + */ + class MutableCommandKhr : public detail::Wrapper { + public: + //! \brief Default constructor - initializes to nullptr. + MutableCommandKhr() : detail::Wrapper() {} + + explicit MutableCommandKhr(const cl_mutable_command_khr &mutableCommandKhr, + bool retainObject = false) : + detail::Wrapper(mutableCommandKhr, retainObject) {} + + MutableCommandKhr &operator=(const cl_mutable_command_khr &rhs) { + detail::Wrapper::operator=(rhs); + return *this; + } + + template + cl_int getInfo(cl_mutable_command_info_khr name, T *param) const { + if (pfn_clGetMutableCommandInfoKHR == nullptr) { + return detail::errHandler(CL_INVALID_OPERATION, __GET_MUTABLE_COMMAND_INFO_KHR_ERR); + } + return detail::errHandler( + detail::getInfo(pfn_clGetMutableCommandInfoKHR, object_, name, param), + __GET_MUTABLE_COMMAND_INFO_KHR_ERR); + } + + template + typename detail::param_traits::param_type + getInfo(cl_int *err = nullptr) const { + typename detail::param_traits::param_type + param; + cl_int result = getInfo(name, ¶m); + if (err != nullptr) { *err = result; } + return param; + } + }; // MutableCommandKhr +# endif /* cl_khr_command_buffer_mutable_dispatch */ #endif // cl_khr_command_buffer -//---------------------------------------------------------------------------------------------------------------------- + //---------------------------------------------------------------------------------------------------------------------- #undef CL_HPP_ERR_STR_ #if !defined(CL_HPP_USER_OVERRIDE_ERROR_STRINGS) -#undef __GET_DEVICE_INFO_ERR -#undef __GET_PLATFORM_INFO_ERR -#undef __GET_DEVICE_IDS_ERR -#undef __GET_PLATFORM_IDS_ERR -#undef __GET_CONTEXT_INFO_ERR -#undef __GET_EVENT_INFO_ERR -#undef __GET_EVENT_PROFILE_INFO_ERR -#undef __GET_MEM_OBJECT_INFO_ERR -#undef __GET_IMAGE_INFO_ERR -#undef __GET_SAMPLER_INFO_ERR -#undef __GET_KERNEL_INFO_ERR -#undef __GET_KERNEL_ARG_INFO_ERR -#undef __GET_KERNEL_SUB_GROUP_INFO_ERR -#undef __GET_KERNEL_WORK_GROUP_INFO_ERR -#undef __GET_PROGRAM_INFO_ERR -#undef __GET_PROGRAM_BUILD_INFO_ERR -#undef __GET_COMMAND_QUEUE_INFO_ERR -#undef __CREATE_CONTEXT_ERR -#undef __CREATE_CONTEXT_FROM_TYPE_ERR -#undef __CREATE_COMMAND_BUFFER_KHR_ERR -#undef __GET_COMMAND_BUFFER_INFO_KHR_ERR -#undef __FINALIZE_COMMAND_BUFFER_KHR_ERR -#undef __ENQUEUE_COMMAND_BUFFER_KHR_ERR -#undef __COMMAND_BARRIER_WITH_WAIT_LIST_KHR_ERR -#undef __COMMAND_COPY_BUFFER_KHR_ERR -#undef __COMMAND_COPY_BUFFER_RECT_KHR_ERR -#undef __COMMAND_COPY_BUFFER_TO_IMAGE_KHR_ERR -#undef __COMMAND_COPY_IMAGE_KHR_ERR -#undef __COMMAND_COPY_IMAGE_TO_BUFFER_KHR_ERR -#undef __COMMAND_FILL_BUFFER_KHR_ERR -#undef __COMMAND_FILL_IMAGE_KHR_ERR -#undef __COMMAND_NDRANGE_KERNEL_KHR_ERR -#undef __UPDATE_MUTABLE_COMMANDS_KHR_ERR -#undef __GET_MUTABLE_COMMAND_INFO_KHR_ERR -#undef __RETAIN_COMMAND_BUFFER_KHR_ERR -#undef __RELEASE_COMMAND_BUFFER_KHR_ERR -#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR -#undef __SET_CONTEXT_DESCTRUCTOR_CALLBACK_ERR -#undef __CREATE_BUFFER_ERR -#undef __COPY_ERR -#undef __CREATE_SUBBUFFER_ERR -#undef __CREATE_GL_BUFFER_ERR -#undef __CREATE_GL_RENDER_BUFFER_ERR -#undef __GET_GL_OBJECT_INFO_ERR -#undef __CREATE_IMAGE_ERR -#undef __CREATE_GL_TEXTURE_ERR -#undef __IMAGE_DIMENSION_ERR -#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR -#undef __CREATE_USER_EVENT_ERR -#undef __SET_USER_EVENT_STATUS_ERR -#undef __SET_EVENT_CALLBACK_ERR -#undef __WAIT_FOR_EVENTS_ERR -#undef __CREATE_KERNEL_ERR -#undef __SET_KERNEL_ARGS_ERR -#undef __CREATE_PROGRAM_WITH_SOURCE_ERR -#undef __CREATE_PROGRAM_WITH_BINARY_ERR -#undef __CREATE_PROGRAM_WITH_IL_ERR -#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR -#undef __BUILD_PROGRAM_ERR -#undef __COMPILE_PROGRAM_ERR -#undef __LINK_PROGRAM_ERR -#undef __CREATE_KERNELS_IN_PROGRAM_ERR -#undef __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR -#undef __CREATE_SAMPLER_WITH_PROPERTIES_ERR -#undef __SET_COMMAND_QUEUE_PROPERTY_ERR -#undef __ENQUEUE_READ_BUFFER_ERR -#undef __ENQUEUE_READ_BUFFER_RECT_ERR -#undef __ENQUEUE_WRITE_BUFFER_ERR -#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR -#undef __ENQEUE_COPY_BUFFER_ERR -#undef __ENQEUE_COPY_BUFFER_RECT_ERR -#undef __ENQUEUE_FILL_BUFFER_ERR -#undef __ENQUEUE_READ_IMAGE_ERR -#undef __ENQUEUE_WRITE_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_ERR -#undef __ENQUEUE_FILL_IMAGE_ERR -#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR -#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR -#undef __ENQUEUE_MAP_BUFFER_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_MAP_SVM_ERR -#undef __ENQUEUE_FILL_SVM_ERR -#undef __ENQUEUE_COPY_SVM_ERR -#undef __ENQUEUE_UNMAP_SVM_ERR -#undef __ENQUEUE_MAP_IMAGE_ERR -#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR -#undef __ENQUEUE_NDRANGE_KERNEL_ERR -#undef __ENQUEUE_NATIVE_KERNEL -#undef __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR -#undef __ENQUEUE_MIGRATE_SVM_ERR -#undef __ENQUEUE_ACQUIRE_GL_ERR -#undef __ENQUEUE_RELEASE_GL_ERR -#undef __CREATE_PIPE_ERR -#undef __GET_PIPE_INFO_ERR -#undef __RETAIN_ERR -#undef __RELEASE_ERR -#undef __FLUSH_ERR -#undef __FINISH_ERR -#undef __VECTOR_CAPACITY_ERR -#undef __CREATE_SUB_DEVICES_ERR -#undef __ENQUEUE_ACQUIRE_EXTERNAL_MEMORY_ERR -#undef __ENQUEUE_RELEASE_EXTERNAL_MEMORY_ERR -#undef __ENQUEUE_MARKER_ERR -#undef __ENQUEUE_WAIT_FOR_EVENTS_ERR -#undef __ENQUEUE_BARRIER_ERR -#undef __UNLOAD_COMPILER_ERR -#undef __CREATE_GL_TEXTURE_2D_ERR -#undef __CREATE_GL_TEXTURE_3D_ERR -#undef __CREATE_IMAGE2D_ERR -#undef __CREATE_IMAGE3D_ERR -#undef __CREATE_COMMAND_QUEUE_ERR -#undef __ENQUEUE_TASK_ERR -#undef __CREATE_SAMPLER_ERR -#undef __ENQUEUE_MARKER_WAIT_LIST_ERR -#undef __ENQUEUE_BARRIER_WAIT_LIST_ERR -#undef __CLONE_KERNEL_ERR -#undef __GET_HOST_TIMER_ERR -#undef __GET_DEVICE_AND_HOST_TIMER_ERR -#undef __GET_SEMAPHORE_KHR_INFO_ERR -#undef __CREATE_SEMAPHORE_KHR_WITH_PROPERTIES_ERR -#undef __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR -#undef __ENQUEUE_WAIT_SEMAPHORE_KHR_ERR -#undef __ENQUEUE_SIGNAL_SEMAPHORE_KHR_ERR -#undef __RETAIN_SEMAPHORE_KHR_ERR -#undef __RELEASE_SEMAPHORE_KHR_ERR -#undef __GET_SEMAPHORE_HANDLE_FOR_TYPE_KHR_ERR - -#endif //CL_HPP_USER_OVERRIDE_ERROR_STRINGS +# undef __GET_DEVICE_INFO_ERR +# undef __GET_PLATFORM_INFO_ERR +# undef __GET_DEVICE_IDS_ERR +# undef __GET_PLATFORM_IDS_ERR +# undef __GET_CONTEXT_INFO_ERR +# undef __GET_EVENT_INFO_ERR +# undef __GET_EVENT_PROFILE_INFO_ERR +# undef __GET_MEM_OBJECT_INFO_ERR +# undef __GET_IMAGE_INFO_ERR +# undef __GET_SAMPLER_INFO_ERR +# undef __GET_KERNEL_INFO_ERR +# undef __GET_KERNEL_ARG_INFO_ERR +# undef __GET_KERNEL_SUB_GROUP_INFO_ERR +# undef __GET_KERNEL_WORK_GROUP_INFO_ERR +# undef __GET_PROGRAM_INFO_ERR +# undef __GET_PROGRAM_BUILD_INFO_ERR +# undef __GET_COMMAND_QUEUE_INFO_ERR +# undef __CREATE_CONTEXT_ERR +# undef __CREATE_CONTEXT_FROM_TYPE_ERR +# undef __CREATE_COMMAND_BUFFER_KHR_ERR +# undef __GET_COMMAND_BUFFER_INFO_KHR_ERR +# undef __FINALIZE_COMMAND_BUFFER_KHR_ERR +# undef __ENQUEUE_COMMAND_BUFFER_KHR_ERR +# undef __COMMAND_BARRIER_WITH_WAIT_LIST_KHR_ERR +# undef __COMMAND_COPY_BUFFER_KHR_ERR +# undef __COMMAND_COPY_BUFFER_RECT_KHR_ERR +# undef __COMMAND_COPY_BUFFER_TO_IMAGE_KHR_ERR +# undef __COMMAND_COPY_IMAGE_KHR_ERR +# undef __COMMAND_COPY_IMAGE_TO_BUFFER_KHR_ERR +# undef __COMMAND_FILL_BUFFER_KHR_ERR +# undef __COMMAND_FILL_IMAGE_KHR_ERR +# undef __COMMAND_NDRANGE_KERNEL_KHR_ERR +# undef __UPDATE_MUTABLE_COMMANDS_KHR_ERR +# undef __GET_MUTABLE_COMMAND_INFO_KHR_ERR +# undef __RETAIN_COMMAND_BUFFER_KHR_ERR +# undef __RELEASE_COMMAND_BUFFER_KHR_ERR +# undef __GET_SUPPORTED_IMAGE_FORMATS_ERR +# undef __SET_CONTEXT_DESCTRUCTOR_CALLBACK_ERR +# undef __CREATE_BUFFER_ERR +# undef __COPY_ERR +# undef __CREATE_SUBBUFFER_ERR +# undef __CREATE_GL_BUFFER_ERR +# undef __CREATE_GL_RENDER_BUFFER_ERR +# undef __GET_GL_OBJECT_INFO_ERR +# undef __CREATE_IMAGE_ERR +# undef __CREATE_GL_TEXTURE_ERR +# undef __IMAGE_DIMENSION_ERR +# undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR +# undef __CREATE_USER_EVENT_ERR +# undef __SET_USER_EVENT_STATUS_ERR +# undef __SET_EVENT_CALLBACK_ERR +# undef __WAIT_FOR_EVENTS_ERR +# undef __CREATE_KERNEL_ERR +# undef __SET_KERNEL_ARGS_ERR +# undef __CREATE_PROGRAM_WITH_SOURCE_ERR +# undef __CREATE_PROGRAM_WITH_BINARY_ERR +# undef __CREATE_PROGRAM_WITH_IL_ERR +# undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR +# undef __BUILD_PROGRAM_ERR +# undef __COMPILE_PROGRAM_ERR +# undef __LINK_PROGRAM_ERR +# undef __CREATE_KERNELS_IN_PROGRAM_ERR +# undef __CREATE_COMMAND_QUEUE_WITH_PROPERTIES_ERR +# undef __CREATE_SAMPLER_WITH_PROPERTIES_ERR +# undef __SET_COMMAND_QUEUE_PROPERTY_ERR +# undef __ENQUEUE_READ_BUFFER_ERR +# undef __ENQUEUE_READ_BUFFER_RECT_ERR +# undef __ENQUEUE_WRITE_BUFFER_ERR +# undef __ENQUEUE_WRITE_BUFFER_RECT_ERR +# undef __ENQEUE_COPY_BUFFER_ERR +# undef __ENQEUE_COPY_BUFFER_RECT_ERR +# undef __ENQUEUE_FILL_BUFFER_ERR +# undef __ENQUEUE_READ_IMAGE_ERR +# undef __ENQUEUE_WRITE_IMAGE_ERR +# undef __ENQUEUE_COPY_IMAGE_ERR +# undef __ENQUEUE_FILL_IMAGE_ERR +# undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR +# undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR +# undef __ENQUEUE_MAP_BUFFER_ERR +# undef __ENQUEUE_MAP_IMAGE_ERR +# undef __ENQUEUE_MAP_SVM_ERR +# undef __ENQUEUE_FILL_SVM_ERR +# undef __ENQUEUE_COPY_SVM_ERR +# undef __ENQUEUE_UNMAP_SVM_ERR +# undef __ENQUEUE_MAP_IMAGE_ERR +# undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR +# undef __ENQUEUE_NDRANGE_KERNEL_ERR +# undef __ENQUEUE_NATIVE_KERNEL +# undef __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR +# undef __ENQUEUE_MIGRATE_SVM_ERR +# undef __ENQUEUE_ACQUIRE_GL_ERR +# undef __ENQUEUE_RELEASE_GL_ERR +# undef __CREATE_PIPE_ERR +# undef __GET_PIPE_INFO_ERR +# undef __RETAIN_ERR +# undef __RELEASE_ERR +# undef __FLUSH_ERR +# undef __FINISH_ERR +# undef __VECTOR_CAPACITY_ERR +# undef __CREATE_SUB_DEVICES_ERR +# undef __ENQUEUE_ACQUIRE_EXTERNAL_MEMORY_ERR +# undef __ENQUEUE_RELEASE_EXTERNAL_MEMORY_ERR +# undef __ENQUEUE_MARKER_ERR +# undef __ENQUEUE_WAIT_FOR_EVENTS_ERR +# undef __ENQUEUE_BARRIER_ERR +# undef __UNLOAD_COMPILER_ERR +# undef __CREATE_GL_TEXTURE_2D_ERR +# undef __CREATE_GL_TEXTURE_3D_ERR +# undef __CREATE_IMAGE2D_ERR +# undef __CREATE_IMAGE3D_ERR +# undef __CREATE_COMMAND_QUEUE_ERR +# undef __ENQUEUE_TASK_ERR +# undef __CREATE_SAMPLER_ERR +# undef __ENQUEUE_MARKER_WAIT_LIST_ERR +# undef __ENQUEUE_BARRIER_WAIT_LIST_ERR +# undef __CLONE_KERNEL_ERR +# undef __GET_HOST_TIMER_ERR +# undef __GET_DEVICE_AND_HOST_TIMER_ERR +# undef __GET_SEMAPHORE_KHR_INFO_ERR +# undef __CREATE_SEMAPHORE_KHR_WITH_PROPERTIES_ERR +# undef __GET_IMAGE_REQUIREMENT_INFO_EXT_ERR +# undef __ENQUEUE_WAIT_SEMAPHORE_KHR_ERR +# undef __ENQUEUE_SIGNAL_SEMAPHORE_KHR_ERR +# undef __RETAIN_SEMAPHORE_KHR_ERR +# undef __RELEASE_SEMAPHORE_KHR_ERR +# undef __GET_SEMAPHORE_HANDLE_FOR_TYPE_KHR_ERR + +#endif // CL_HPP_USER_OVERRIDE_ERROR_STRINGS // Extensions #undef CL_HPP_CREATE_CL_EXT_FCN_PTR_ALIAS_ @@ -11689,4 +9873,8 @@ class MutableCommandKhr : public detail::Wrapper } // namespace cl +#if defined(LIBRAPID_GNU) || defined(LIBRAPID_CLANG) +# pragma GCC diagnostic pop +#endif + #endif // CL_HPP_ diff --git a/librapid/include/librapid/utils/consoleSize.hpp b/librapid/include/librapid/utils/consoleSize.hpp new file mode 100644 index 00000000..205b0019 --- /dev/null +++ b/librapid/include/librapid/utils/consoleSize.hpp @@ -0,0 +1,15 @@ +#ifndef LIBRAPID_UTILS_CONSOLE_SIZE_HPP +#define LIBRAPID_UTILS_CONSOLE_SIZE_HPP + +namespace librapid { + struct ConsoleSize { + int rows; + int cols; + }; + + /// \brief Get the size of the console window in characters (rows and columns). + /// \return ConsoleSize + ConsoleSize consoleSize(); +} // namespace librapid + +#endif // LIBRAPID_UTILS_CONSOLE_SIZE_HPP \ No newline at end of file diff --git a/librapid/include/librapid/utils/utils.hpp b/librapid/include/librapid/utils/utils.hpp index e6abbc99..89c5599c 100644 --- a/librapid/include/librapid/utils/utils.hpp +++ b/librapid/include/librapid/utils/utils.hpp @@ -4,5 +4,6 @@ #include "cacheLineSize.hpp" #include "time.hpp" #include "memUtils.hpp" +#include "consoleSize.hpp" #endif // LIBRAPID_UTILS \ No newline at end of file diff --git a/librapid/src/consoleSize.cpp b/librapid/src/consoleSize.cpp new file mode 100644 index 00000000..cc40c2fe --- /dev/null +++ b/librapid/src/consoleSize.cpp @@ -0,0 +1,32 @@ +#include + +#if defined(LIBRAPID_OSX) +# include +# include +#elif defined(LIBRAPID_LINUX) +# include +# include +#elif defined(LIBRAPID_WINDOWS) +# include +#endif + +namespace librapid { + ConsoleSize consoleSize() { +#if defined(LIBRAPID_OSX) + struct winsize w {}; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + return {w.ws_row, w.ws_col}; +#elif defined(LIBRAPID_LINUX) + struct winsize w; + ioctl(STDOUT_FILENO, TIOCGWINSZ, &w); + return {w.ws_row, w.ws_col}; +#elif defined(LIBRAPID_WINDOWS) + CONSOLE_SCREEN_BUFFER_INFO csbi; + GetConsoleScreenBufferInfo(GetStdHandle(STD_OUTPUT_HANDLE), &csbi); + return {csbi.srWindow.Bottom - csbi.srWindow.Top + 1, + csbi.srWindow.Right - csbi.srWindow.Left + 1}; +#else + return {24, 80}; +#endif + } +} // namespace librapid diff --git a/librapid/src/global.cpp b/librapid/src/global.cpp index b741e883..2bfc7acd 100644 --- a/librapid/src/global.cpp +++ b/librapid/src/global.cpp @@ -4,7 +4,7 @@ namespace librapid { namespace global { - bool throwOnAssert = false; + bool printOnAssert = true; size_t multithreadThreshold = 5000; size_t gemmMultithreadThreshold = 100; size_t gemvMultithreadThreshold = 100; diff --git a/librapid/src/openclConfigure.cpp b/librapid/src/openclConfigure.cpp index 89576fb6..17955463 100644 --- a/librapid/src/openclConfigure.cpp +++ b/librapid/src/openclConfigure.cpp @@ -1,4 +1,5 @@ #include +#include // std::ifstream namespace librapid { #if defined(LIBRAPID_HAS_OPENCL) diff --git a/librapid/vendor/fmt b/librapid/vendor/fmt index d9063baf..19276d73 160000 --- a/librapid/vendor/fmt +++ b/librapid/vendor/fmt @@ -1 +1 @@ -Subproject commit d9063baf227882da0f48c761abcbb08247eb1296 +Subproject commit 19276d73254f2b06e5d466f45c7390f1ccf0354e diff --git a/pyproject.toml b/pyproject.toml index 69ef7efa..8622138e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,31 +14,11 @@ build-backend = "scikit_build_core.build" [tool.scikit-build] -cmake.build-type = "Release" +cmake.build-type = "MinSizeRel" +cmake.args = ["-DCMAKE_BUILD_TYPE=MinSizeRel"] ninja.make-fallback = true - -sdist.exclude = [ - "CMakeLists.txt", - "cmake", - "CMakeFiles", - "build", - "dist", - "*.h", - "*.c", - "*.hpp", - "*.cpp", - "*.tcc", - "*.cxx", - "*.cu", - "*.cuh", - "*.cl", - "*.so", - "*.dylib", - "*.dll", - "*.doc", - "*.tgz", -] +sdist.cmake = true [project] name = "librapid" diff --git a/scripts/tmp/pyprojectTemplate.toml b/scripts/tmp/pyprojectTemplate.toml index 7639b046..3aedf302 100644 --- a/scripts/tmp/pyprojectTemplate.toml +++ b/scripts/tmp/pyprojectTemplate.toml @@ -15,7 +15,6 @@ build-backend = "scikit_build_core.build" [tool.scikit-build] cmake.build-type = "Release" - ninja.make-fallback = true sdist.exclude = [ diff --git a/test/test-arrayConstructors.cpp b/test/test-arrayConstructors.cpp index 7face6df..cc101892 100644 --- a/test/test-arrayConstructors.cpp +++ b/test/test-arrayConstructors.cpp @@ -75,7 +75,7 @@ using CUDA = lrc::backend::CUDA; /* Due to the way the code works, if this passes for a 3D array, it *must* pass for all \ * other dimensions */ \ auto testI = \ - lrc::Array::fromData(InitList({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); \ + lrc::Array(InitList({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); \ REQUIRE(fmt::format("{}", testI) == \ fmt::format("[[[{} {}]\n [{} {}]]\n\n [[{} {}]\n [{} {}]]]", \ SCALAR(1), \ @@ -88,7 +88,7 @@ using CUDA = lrc::backend::CUDA; SCALAR(8))); \ \ auto testJ = \ - lrc::Array::fromData(Vec({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); \ + lrc::Array(Vec({{{1, 2}, {3, 4}}, {{5, 6}, {7, 8}}})); \ REQUIRE(fmt::format("{}", testJ) == \ fmt::format("[[[{} {}]\n [{} {}]]\n\n [[{} {}]\n [{} {}]]]", \ SCALAR(1), \