From f7b9c4531c2f1c29c613254c381f497288591959 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 6 Nov 2025 14:56:01 +0800 Subject: [PATCH 1/7] Expose the categories container to C. --- include/xgboost/c_api.h | 72 +++++++++++++++++++++++++++++++++++++++-- src/c_api/c_api.cc | 35 ++------------------ 2 files changed, 73 insertions(+), 34 deletions(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 6237af7b5196..ca03d1baa86d 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -45,10 +45,12 @@ typedef uint64_t bst_ulong; // NOLINT(*) * @{ */ -/** @brief handle to DMatrix */ +/** @brief Handle to the DMatrix */ typedef void *DMatrixHandle; // NOLINT(*) -/** @brief handle to Booster */ +/** @brief Handle to the Booster */ typedef void *BoosterHandle; // NOLINT(*) +/** @brief Handle to the categories container. */ +typedef void * CategoriesHandle; // NOLINT(*) /** * @brief Return the version of the XGBoost library. @@ -801,6 +803,53 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field, bst_ulong *size, const char ***out_features); +/** + * @brief Create an opaque handle to the internal category container. + * + * @since 3.2 + * + * @note Experimental API, subject to change in the future. + * + * The container should be freed by @ref XGBCategoriesFree + * + * @param handle An instance of the data matrix. + * @param out Created handle to the category container. Set to NULL if there's no category. + * + * @return 0 when success, -1 when failure happens. + */ +XGB_DLL int XGDMatrixGetCategories(DMatrixHandle handle, char const *config, CategoriesHandle *out); + +/** + * @brief Create an opaque handle to the internal container and export it to arrow. + * + * @since 3.2 + * + * @note Experimental API, subject to change in the future. + * + * The container should be freed by @ref XGBCategoriesFree + * + * @param handle An instance of the data matrix. + * @param out Created handle to the category container + * @param export_out JSON encoded array of categories, with length equal to the number of features. + * + * @return 0 when success, -1 when failure happens. + */ +XGB_DLL int XGDMatrixGetCategoriesExportToArrow(DMatrixHandle handle, char const *config, + CategoriesHandle *out, char const **export_out); + +/** + * @brief Free the opaque handle. + * + * @since 3.2 + * + * @note Experimental API, subject to change in the future. + * + * @param handle An instance of the category container. + * + * @return 0 when success, -1 when failure happens. + */ +XGB_DLL int XGBCategoriesFree(CategoriesHandle handle); + /** * @deprecated since 2.1.0 * @@ -1503,6 +1552,25 @@ XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle, bst_ulong *out_len, const char ***out_models); +/** + * See @ref XGDMatrixGetCategories + * + * @since 3.2 + * + * @note Experimental API, subject to change in the future. + */ +XGB_DLL int XGBoosterGetCategories(DMatrixHandle handle, char const *config, CategoriesHandle *out); + +/** + * See @ref XGDMatrixGetCategoriesExportToArrow + * + * @since 3.2 + * + * @note Experimental API, subject to change in the future. + */ +XGB_DLL int XGBoosterGetCategoriesExportToArrow(BoosterHandle handle, char const * config, + CategoriesHandle *out, char const **export_out); + /** * @brief Get string attribute from Booster. * @param handle handle diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 074d1ab76e4f..f35c033bda58 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -755,20 +755,10 @@ CatContainer *CopyCatContainer(Context const *ctx, CatContainer const *cats, typedef void * CategoriesHandle; // NOLINT /** - * Fetching categories is experimental (3.1), C functions are hidden at the moment. - * * No actual container method is exposed through the C API. It's just an opaque handle at * the moment. This way we get to reuse the methods and the context from the DMatrix and * Booster. */ -/** - * @brief Create an opaque handle to the internal container. - * - * @param handle An instance of the data matrix. - * @param out Created handle to the category container. Set to NULL if there's no category. - * - * @return 0 when success, -1 when failure happens. - */ XGB_DLL int XGDMatrixGetCategories(DMatrixHandle handle, char const * /*config*/, CategoriesHandle *out) { API_BEGIN() @@ -786,15 +776,7 @@ XGB_DLL int XGDMatrixGetCategories(DMatrixHandle handle, char const * /*config*/ API_END() } -/** - * @brief Create an opaque handle to the internal container and export it to arrow. - * - * @param handle An instance of the data matrix. - * @param out Created handle to the category container - * @param export_out JSON encoded array of categories, with length equal to the number of features. - * - * @return 0 when success, -1 when failure happens. - */ + XGB_DLL int XGDMatrixGetCategoriesExportToArrow(DMatrixHandle handle, char const * /*config*/, CategoriesHandle *out, char const **export_out) { API_BEGIN(); @@ -821,13 +803,7 @@ XGB_DLL int XGDMatrixGetCategoriesExportToArrow(DMatrixHandle handle, char const API_END(); } -/** - * @brief Free the opaque handle. - * - * @param handle An instance of the category container. - * - * @return 0 when success, -1 when failure happens. - */ + XGB_DLL int XGBCategoriesFree(CategoriesHandle handle) { API_BEGIN(); xgboost_CHECK_C_ARG_PTR(handle); @@ -1763,12 +1739,6 @@ XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle, API_END(); } -/** - * Experimental (3.1), hidden. - */ -/** - * See @ref XGDMatrixGetCategories - */ XGB_DLL int XGBoosterGetCategories(DMatrixHandle handle, char const * /*config*/, CategoriesHandle *out) { API_BEGIN() @@ -1786,6 +1756,7 @@ XGB_DLL int XGBoosterGetCategories(DMatrixHandle handle, char const * /*config*/ API_END() } + /** * See @ref XGDMatrixGetCategoriesExportToArrow */ From a4ac75a37da624507335a83547433c3a49a4b277 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 7 Nov 2025 05:42:36 +0800 Subject: [PATCH 2/7] Update include/xgboost/c_api.h Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- include/xgboost/c_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index ca03d1baa86d..5585085ff5b8 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -1559,7 +1559,7 @@ XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle, * * @note Experimental API, subject to change in the future. */ -XGB_DLL int XGBoosterGetCategories(DMatrixHandle handle, char const *config, CategoriesHandle *out); +XGB_DLL int XGBoosterGetCategories(BoosterHandle handle, char const *config, CategoriesHandle *out); /** * See @ref XGDMatrixGetCategoriesExportToArrow From 2d29a549b5d7cae3874af4af7769def0a2c0a325 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 7 Nov 2025 05:47:48 +0800 Subject: [PATCH 3/7] Apply suggestions from code review Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- include/xgboost/c_api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 5585085ff5b8..efc411a20308 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -50,7 +50,7 @@ typedef void *DMatrixHandle; // NOLINT(*) /** @brief Handle to the Booster */ typedef void *BoosterHandle; // NOLINT(*) /** @brief Handle to the categories container. */ -typedef void * CategoriesHandle; // NOLINT(*) +typedef void * CategoriesHandle; // NOLINT(*) /** * @brief Return the version of the XGBoost library. @@ -1568,7 +1568,7 @@ XGB_DLL int XGBoosterGetCategories(BoosterHandle handle, char const *config, Cat * * @note Experimental API, subject to change in the future. */ -XGB_DLL int XGBoosterGetCategoriesExportToArrow(BoosterHandle handle, char const * config, +XGB_DLL int XGBoosterGetCategoriesExportToArrow(BoosterHandle handle, char const *config, CategoriesHandle *out, char const **export_out); /** From 41ff2540f7d957288f1985c5f04a85d4b4a90a15 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 7 Nov 2025 05:49:13 +0800 Subject: [PATCH 4/7] Update src/c_api/c_api.cc --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index f35c033bda58..8c161be35aae 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1739,7 +1739,7 @@ XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle, API_END(); } -XGB_DLL int XGBoosterGetCategories(DMatrixHandle handle, char const * /*config*/, +XGB_DLL int XGBoosterGetCategories(BoosterHandle handle, char const * /*config*/, CategoriesHandle *out) { API_BEGIN() CHECK_HANDLE() From 7b041c16cce408b3c25505bc5f22d701fa9edf58 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 7 Nov 2025 17:01:57 +0800 Subject: [PATCH 5/7] Document config. --- include/xgboost/c_api.h | 2 ++ src/c_api/c_api.cc | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index efc411a20308..e9d434e3d618 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -813,6 +813,7 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field, * The container should be freed by @ref XGBCategoriesFree * * @param handle An instance of the data matrix. + * @param config Unused, reserved for the future. * @param out Created handle to the category container. Set to NULL if there's no category. * * @return 0 when success, -1 when failure happens. @@ -829,6 +830,7 @@ XGB_DLL int XGDMatrixGetCategories(DMatrixHandle handle, char const *config, Cat * The container should be freed by @ref XGBCategoriesFree * * @param handle An instance of the data matrix. + * @param config Unused, reserved for the future. * @param out Created handle to the category container * @param export_out JSON encoded array of categories, with length equal to the number of features. * diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 8c161be35aae..3a8dd9158ab6 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1757,9 +1757,6 @@ XGB_DLL int XGBoosterGetCategories(BoosterHandle handle, char const * /*config*/ API_END() } -/** - * See @ref XGDMatrixGetCategoriesExportToArrow - */ XGB_DLL int XGBoosterGetCategoriesExportToArrow(BoosterHandle handle, char const * /*config*/, CategoriesHandle *out, char const **export_out) { API_BEGIN() From dfaaf1cb03861cb4050690f4d19a8b10289abd85 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 7 Nov 2025 17:05:51 +0800 Subject: [PATCH 6/7] doxygen. --- include/xgboost/c_api.h | 18 +++++++++++------- src/c_api/c_api.cc | 2 -- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index e9d434e3d618..03513468b175 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -49,8 +49,12 @@ typedef uint64_t bst_ulong; // NOLINT(*) typedef void *DMatrixHandle; // NOLINT(*) /** @brief Handle to the Booster */ typedef void *BoosterHandle; // NOLINT(*) -/** @brief Handle to the categories container. */ -typedef void * CategoriesHandle; // NOLINT(*) +/** + * @brief Handle to the categories container. + * + * @since 3.2.0 + */ +typedef void *CategoriesHandle; // NOLINT(*) /** * @brief Return the version of the XGBoost library. @@ -806,7 +810,7 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field, /** * @brief Create an opaque handle to the internal category container. * - * @since 3.2 + * @since 3.2.0 * * @note Experimental API, subject to change in the future. * @@ -823,7 +827,7 @@ XGB_DLL int XGDMatrixGetCategories(DMatrixHandle handle, char const *config, Cat /** * @brief Create an opaque handle to the internal container and export it to arrow. * - * @since 3.2 + * @since 3.2.0 * * @note Experimental API, subject to change in the future. * @@ -842,7 +846,7 @@ XGB_DLL int XGDMatrixGetCategoriesExportToArrow(DMatrixHandle handle, char const /** * @brief Free the opaque handle. * - * @since 3.2 + * @since 3.2.0 * * @note Experimental API, subject to change in the future. * @@ -1557,7 +1561,7 @@ XGB_DLL int XGBoosterDumpModelExWithFeatures(BoosterHandle handle, /** * See @ref XGDMatrixGetCategories * - * @since 3.2 + * @since 3.2.0 * * @note Experimental API, subject to change in the future. */ @@ -1566,7 +1570,7 @@ XGB_DLL int XGBoosterGetCategories(BoosterHandle handle, char const *config, Cat /** * See @ref XGDMatrixGetCategoriesExportToArrow * - * @since 3.2 + * @since 3.2.0 * * @note Experimental API, subject to change in the future. */ diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 3a8dd9158ab6..fd53f3e2ecd2 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -752,8 +752,6 @@ CatContainer *CopyCatContainer(Context const *ctx, CatContainer const *cats, } } // anonymous namespace -typedef void * CategoriesHandle; // NOLINT - /** * No actual container method is exposed through the C API. It's just an opaque handle at * the moment. This way we get to reuse the methods and the context from the DMatrix and From 6b56f0614624a72279e15d573b4116c089098b0f Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 7 Nov 2025 17:14:45 +0800 Subject: [PATCH 7/7] Documents. --- include/xgboost/c_api.h | 15 +++++++++++++++ python-package/xgboost/_data_utils.py | 8 ++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 03513468b175..6b7a879350c9 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -821,6 +821,21 @@ XGB_DLL int XGDMatrixGetStrFeatureInfo(DMatrixHandle handle, const char *field, * @param out Created handle to the category container. Set to NULL if there's no category. * * @return 0 when success, -1 when failure happens. + * + * @code{c} + * DMatrixHandle fmat; + * // Create a DMatrix from categorical data + * // ... + * CategoriesHandle cats; + * int err = XGBoosterGetCategories(fmat, NULL, &cats) + * if (err != 0) { + * exit(-1); + * } + * err = XGBCategoriesFree(cats); + * if (err != 0) { + * exit(-1); + * } + * @endcode */ XGB_DLL int XGDMatrixGetCategories(DMatrixHandle handle, char const *config, CategoriesHandle *out); diff --git a/python-package/xgboost/_data_utils.py b/python-package/xgboost/_data_utils.py index 4531848aa782..e16d7bd41af0 100644 --- a/python-package/xgboost/_data_utils.py +++ b/python-package/xgboost/_data_utils.py @@ -643,10 +643,10 @@ def __init__( arrow_arrays: Optional[ArrowCatList], ) -> None: # The handle type is a bundle of the handle and the free call. Otherwise, we - # will have to import the lib and checkcall inside the __del__ method from the - # core module to avoid cyclic model dependency. Importing modules in __del__ can - # result in Python abort if __del__ is called during exception handling - # (interpreter is shutting down). + # will have to import the `_lib` and the `_check_call` from the core module + # inside the __del__ method to avoid cyclic model dependency. + # Importing modules in __del__ can result in Python abort if __del__ is called + # during exception handling (interpreter is shutting down). self._handle, self._free = handle self._arrow_arrays = arrow_arrays