From 25cf2c4b747bfa75031476e242891325a289f923 Mon Sep 17 00:00:00 2001 From: Michael Rapp Date: Wed, 27 Mar 2024 23:58:00 +0100 Subject: [PATCH] Remove code from repository. --- CHANGELOG.md | 181 - CODE_OF_CONDUCT.md | 128 - CONTRIBUTORS.md | 22 - LICENSE.md | 9 - Makefile | 242 - VERSION | 1 - assets/project_structure.svg | 230 - cpp/meson.build | 2 - .../boosting/binning/feature_binning_auto.hpp | 35 - .../boosting/binning/label_binning.hpp | 222 - .../boosting/binning/label_binning_auto.hpp | 52 - .../binning/label_binning_equal_width.hpp | 134 - .../boosting/binning/label_binning_no.hpp | 52 - .../boosting/include/boosting/data/arrays.hpp | 122 - .../data/histogram_view_label_wise_sparse.hpp | 142 - .../data/matrix_c_contiguous_numeric.hpp | 95 - .../data/matrix_sparse_set_numeric.hpp | 89 - .../statistic_vector_example_wise_dense.hpp | 313 - .../statistic_vector_label_wise_dense.hpp | 227 - .../statistic_vector_label_wise_sparse.hpp | 377 - .../statistic_view_example_wise_dense.hpp | 221 - .../data/statistic_view_label_wise_dense.hpp | 135 - .../data/statistic_view_label_wise_sparse.hpp | 123 - .../boosting/iterator/diagonal_iterator.hpp | 127 - .../boosting/include/boosting/learner.hpp | 1083 -- .../include/boosting/learner_boomer.hpp | 177 - .../boosting/include/boosting/losses/loss.hpp | 116 - .../boosting/losses/loss_example_wise.hpp | 103 - .../losses/loss_example_wise_logistic.hpp | 41 - .../loss_example_wise_squared_error.hpp | 41 - .../loss_example_wise_squared_hinge.hpp | 41 - .../boosting/losses/loss_label_wise.hpp | 167 - .../losses/loss_label_wise_logistic.hpp | 41 - .../losses/loss_label_wise_sparse.hpp | 172 - .../losses/loss_label_wise_squared_error.hpp | 41 - .../losses/loss_label_wise_squared_hinge.hpp | 41 - .../boosting/include/boosting/macros.hpp | 14 - .../boosting/include/boosting/math/blas.hpp | 71 - .../boosting/include/boosting/math/lapack.hpp | 90 - .../boosting/include/boosting/math/math.hpp | 136 - .../boosting/model/rule_list_builder.hpp | 23 - .../parallel_rule_refinement_auto.hpp | 46 - .../parallel_statistic_update_auto.hpp | 33 - .../post_processing/shrinkage_constant.hpp | 60 - .../prediction/discretization_function.hpp | 51 - .../discretization_function_probability.hpp | 35 - .../discretization_function_score.hpp | 30 - .../prediction/predictor_binary_auto.hpp | 53 - .../prediction/predictor_binary_common.hpp | 388 - .../predictor_binary_example_wise.hpp | 120 - .../prediction/predictor_binary_gfm.hpp | 93 - .../predictor_binary_label_wise.hpp | 118 - .../prediction/predictor_probability_auto.hpp | 47 - .../predictor_probability_common.hpp | 164 - .../predictor_probability_label_wise.hpp | 90 - .../predictor_probability_marginalized.hpp | 88 - .../prediction/predictor_score_common.hpp | 247 - .../prediction/predictor_score_label_wise.hpp | 41 - .../probability_calibration_isotonic.hpp | 145 - .../probability_function_chain_rule.hpp | 37 - .../prediction/probability_function_joint.hpp | 141 - .../probability_function_logistic.hpp | 21 - .../probability_function_marginal.hpp | 50 - .../prediction/transformation_binary.hpp | 50 - .../transformation_binary_example_wise.hpp | 44 - .../prediction/transformation_binary_gfm.hpp | 46 - .../transformation_binary_label_wise.hpp | 37 - .../prediction/transformation_probability.hpp | 35 - .../transformation_probability_label_wise.hpp | 37 - ...ransformation_probability_marginalized.hpp | 41 - .../boosting/rule_evaluation/head_type.hpp | 93 - .../rule_evaluation/head_type_auto.hpp | 67 - .../rule_evaluation/head_type_complete.hpp | 62 - .../head_type_partial_dynamic.hpp | 118 - .../head_type_partial_fixed.hpp | 136 - .../rule_evaluation/head_type_single.hpp | 62 - .../rule_evaluation/regularization.hpp | 27 - .../rule_evaluation/regularization_manual.hpp | 58 - .../rule_evaluation/regularization_no.hpp | 19 - .../rule_evaluation/rule_compare_function.hpp | 28 - .../rule_evaluation/rule_evaluation.hpp | 36 - .../rule_evaluation_example_wise.hpp | 56 - .../rule_evaluation_example_wise_complete.hpp | 51 - ...valuation_example_wise_complete_binned.hpp | 58 - ...valuation_example_wise_partial_dynamic.hpp | 68 - ...on_example_wise_partial_dynamic_binned.hpp | 73 - ..._evaluation_example_wise_partial_fixed.hpp | 67 - ...tion_example_wise_partial_fixed_binned.hpp | 74 - .../rule_evaluation_label_wise.hpp | 56 - .../rule_evaluation_label_wise_complete.hpp | 40 - ..._evaluation_label_wise_complete_binned.hpp | 46 - ..._evaluation_label_wise_partial_dynamic.hpp | 65 - ...tion_label_wise_partial_dynamic_binned.hpp | 71 - ...le_evaluation_label_wise_partial_fixed.hpp | 64 - ...uation_label_wise_partial_fixed_binned.hpp | 71 - .../rule_evaluation_label_wise_single.hpp | 48 - .../rule_evaluation_label_wise_sparse.hpp | 55 - .../default_rule_auto.hpp | 46 - .../sampling/partition_sampling_auto.hpp | 52 - .../boosting/statistics/statistic_format.hpp | 71 - .../statistics/statistic_format_auto.hpp | 49 - .../statistics/statistic_format_dense.hpp | 35 - .../statistics/statistic_format_sparse.hpp | 35 - .../boosting/statistics/statistics.hpp | 40 - .../statistics/statistics_example_wise.hpp | 51 - .../statistics/statistics_label_wise.hpp | 33 - ...statistics_provider_example_wise_dense.hpp | 140 - .../statistics_provider_label_wise_dense.hpp | 76 - .../statistics_provider_label_wise_sparse.hpp | 71 - cpp/subprojects/boosting/meson.build | 109 - .../boosting/binning/feature_binning_auto.cpp | 22 - .../boosting/binning/label_binning_auto.cpp | 59 - .../binning/label_binning_equal_width.cpp | 272 - .../src/boosting/binning/label_binning_no.cpp | 72 - .../data/histogram_view_label_wise_sparse.cpp | 53 - .../data/matrix_c_contiguous_numeric.cpp | 74 - .../data/matrix_sparse_set_numeric.cpp | 76 - .../statistic_vector_example_wise_dense.cpp | 189 - .../statistic_vector_label_wise_dense.cpp | 107 - .../statistic_vector_label_wise_sparse.cpp | 246 - ...tistic_vector_label_wise_sparse_common.hpp | 113 - .../statistic_view_example_wise_dense.cpp | 86 - .../data/statistic_view_label_wise_dense.cpp | 48 - .../data/statistic_view_label_wise_sparse.cpp | 41 - .../boosting/iterator/diagonal_iterator.cpp | 65 - .../boosting/src/boosting/learner.cpp | 60 - .../boosting/src/boosting/learner_boomer.cpp | 55 - .../losses/loss_example_wise_logistic.cpp | 374 - .../loss_example_wise_squared_error.cpp | 276 - .../loss_example_wise_squared_hinge.cpp | 369 - .../losses/loss_label_wise_common.hpp | 198 - .../losses/loss_label_wise_logistic.cpp | 106 - .../losses/loss_label_wise_sparse_common.hpp | 275 - .../losses/loss_label_wise_squared_error.cpp | 59 - .../losses/loss_label_wise_squared_hinge.cpp | 87 - .../boosting/src/boosting/math/blas.cpp | 28 - .../boosting/src/boosting/math/lapack.cpp | 50 - .../src/boosting/model/rule_list_builder.cpp | 53 - .../parallel_rule_refinement_auto.cpp | 24 - .../parallel_statistic_update_auto.cpp | 20 - .../post_processing/shrinkage_constant.cpp | 76 - .../discretization_function_probability.cpp | 43 - .../discretization_function_score.cpp | 33 - .../prediction/predictor_binary_auto.cpp | 43 - .../predictor_binary_example_wise.cpp | 287 - .../prediction/predictor_binary_gfm.cpp | 274 - .../predictor_binary_label_wise.cpp | 231 - .../prediction/predictor_probability_auto.cpp | 34 - .../predictor_probability_label_wise.cpp | 114 - .../predictor_probability_marginalized.cpp | 151 - .../prediction/predictor_score_label_wise.cpp | 59 - .../probability_calibration_isotonic.cpp | 691 -- .../probability_function_chain_rule.cpp | 96 - .../probability_function_logistic.cpp | 37 - .../transformation_binary_example_wise.cpp | 42 - .../prediction/transformation_binary_gfm.cpp | 168 - .../transformation_binary_label_wise.cpp | 36 - .../transformation_probability_label_wise.cpp | 22 - ...ransformation_probability_marginalized.cpp | 36 - .../rule_evaluation/head_type_auto.cpp | 61 - .../rule_evaluation/head_type_complete.cpp | 71 - .../head_type_partial_dynamic.cpp | 99 - .../head_type_partial_fixed.cpp | 122 - .../rule_evaluation/head_type_single.cpp | 86 - .../rule_evaluation/regularization_manual.cpp | 25 - .../rule_evaluation/regularization_no.cpp | 9 - ..._evaluation_example_wise_binned_common.hpp | 405 - .../rule_evaluation_example_wise_common.hpp | 71 - .../rule_evaluation_example_wise_complete.cpp | 26 - ...valuation_example_wise_complete_binned.cpp | 33 - ...valuation_example_wise_complete_common.hpp | 212 - ...evaluation_example_wise_partial_common.hpp | 35 - ...valuation_example_wise_partial_dynamic.cpp | 145 - ...on_example_wise_partial_dynamic_binned.cpp | 127 - ...on_example_wise_partial_dynamic_common.hpp | 50 - ..._evaluation_example_wise_partial_fixed.cpp | 137 - ...tion_example_wise_partial_fixed_binned.cpp | 116 - ...tion_example_wise_partial_fixed_common.hpp | 40 - ...le_evaluation_label_wise_binned_common.hpp | 215 - .../rule_evaluation_label_wise_common.hpp | 64 - .../rule_evaluation_label_wise_complete.cpp | 23 - ..._evaluation_label_wise_complete_binned.cpp | 31 - ..._evaluation_label_wise_complete_common.hpp | 64 - ..._evaluation_label_wise_partial_dynamic.cpp | 124 - ...tion_label_wise_partial_dynamic_binned.cpp | 138 - ...tion_label_wise_partial_dynamic_common.hpp | 75 - ...le_evaluation_label_wise_partial_fixed.cpp | 115 - ...uation_label_wise_partial_fixed_binned.cpp | 123 - ...uation_label_wise_partial_fixed_common.hpp | 65 - .../rule_evaluation_label_wise_single.cpp | 104 - .../default_rule_auto.cpp | 21 - .../sampling/partition_sampling_auto.cpp | 27 - .../statistics/statistic_format_auto.cpp | 27 - .../statistics/statistic_format_dense.cpp | 22 - .../statistics/statistic_format_sparse.cpp | 22 - .../statistics_example_wise_common.hpp | 1027 -- .../statistics_label_wise_common.hpp | 989 -- .../statistics_label_wise_dense.hpp | 87 - .../statistics_provider_example_wise.hpp | 166 - ...statistics_provider_example_wise_dense.cpp | 228 - .../statistics_provider_label_wise.hpp | 66 - .../statistics_provider_label_wise_dense.cpp | 75 - .../statistics_provider_label_wise_sparse.cpp | 170 - .../common/binning/bin_index_vector.hpp | 53 - .../common/binning/bin_index_vector_dense.hpp | 30 - .../common/binning/bin_index_vector_dok.hpp | 47 - .../common/binning/bin_weight_vector.hpp | 49 - .../common/binning/feature_binning.hpp | 92 - .../feature_binning_equal_frequency.hpp | 111 - .../binning/feature_binning_equal_width.hpp | 111 - .../common/binning/feature_binning_no.hpp | 28 - .../common/binning/threshold_vector.hpp | 103 - .../common/include/common/data/arrays.hpp | 164 - .../include/common/data/indexed_value.hpp | 75 - .../include/common/data/list_of_lists.hpp | 111 - .../common/data/matrix_c_contiguous.hpp | 31 - .../common/include/common/data/matrix_lil.hpp | 15 - .../include/common/data/matrix_lil_binary.hpp | 12 - .../include/common/data/matrix_sparse_set.hpp | 274 - .../include/common/data/ring_buffer.hpp | 86 - .../common/include/common/data/triple.hpp | 185 - .../common/include/common/data/tuple.hpp | 172 - .../common/include/common/data/types.hpp | 26 - .../common/data/vector_binned_dense.hpp | 264 - .../common/include/common/data/vector_bit.hpp | 60 - .../include/common/data/vector_dense.hpp | 41 - .../common/include/common/data/vector_dok.hpp | 88 - .../include/common/data/vector_dok_binary.hpp | 59 - .../common/data/vector_sparse_array.hpp | 532 - .../data/vector_sparse_array_binary.hpp | 11 - .../include/common/data/view_c_contiguous.hpp | 115 - .../common/include/common/data/view_csc.hpp | 187 - .../include/common/data/view_csc_binary.hpp | 124 - .../common/include/common/data/view_csr.hpp | 187 - .../include/common/data/view_csr_binary.hpp | 124 - .../common/data/view_fortran_contiguous.hpp | 116 - .../common/data/view_one_dimensional.hpp | 23 - .../common/data/view_two_dimensional.hpp | 30 - .../include/common/data/view_vector.hpp | 123 - .../include/common/indices/index_vector.hpp | 56 - .../common/indices/index_vector_complete.hpp | 59 - .../common/indices/index_vector_partial.hpp | 84 - .../include/common/input/feature_info.hpp | 27 - .../common/input/feature_info_equal.hpp | 37 - .../common/input/feature_info_mixed.hpp | 45 - .../include/common/input/feature_matrix.hpp | 22 - .../input/feature_matrix_c_contiguous.hpp | 79 - .../input/feature_matrix_column_wise.hpp | 29 - .../common/input/feature_matrix_csc.hpp | 34 - .../common/input/feature_matrix_csr.hpp | 90 - .../feature_matrix_fortran_contiguous.hpp | 27 - .../common/input/feature_matrix_row_wise.hpp | 127 - .../include/common/input/feature_type.hpp | 20 - .../common/input/feature_type_nominal.hpp | 15 - .../common/input/feature_type_numerical.hpp | 15 - .../common/input/feature_type_ordinal.hpp | 15 - .../include/common/input/feature_vector.hpp | 82 - .../include/common/input/label_matrix.hpp | 22 - .../input/label_matrix_c_contiguous.hpp | 164 - .../include/common/input/label_matrix_csc.hpp | 65 - .../include/common/input/label_matrix_csr.hpp | 161 - .../common/input/label_matrix_row_wise.hpp | 168 - .../include/common/input/label_vector.hpp | 11 - .../common/input/missing_feature_vector.hpp | 67 - .../iterator/binary_forward_iterator.hpp | 145 - .../common/iterator/index_iterator.hpp | 118 - .../non_zero_index_forward_iterator.hpp | 155 - .../common/include/common/learner.hpp | 2015 --- .../common/include/common/macros.hpp | 14 - .../common/include/common/math/math.hpp | 81 - .../common/measures/measure_distance.hpp | 93 - .../common/measures/measure_evaluation.hpp | 65 - .../measures/measure_evaluation_sparse.hpp | 66 - .../common/include/common/model/body.hpp | 76 - .../include/common/model/body_conjunctive.hpp | 374 - .../include/common/model/body_empty.hpp | 24 - .../common/include/common/model/condition.hpp | 88 - .../include/common/model/condition_list.hpp | 76 - .../common/include/common/model/head.hpp | 40 - .../include/common/model/head_complete.hpp | 74 - .../include/common/model/head_partial.hpp | 120 - .../include/common/model/model_builder.hpp | 66 - .../common/include/common/model/rule_list.hpp | 381 - .../include/common/model/rule_model.hpp | 252 - .../multi_threading/multi_threading.hpp | 26 - .../multi_threading_manual.hpp | 55 - .../multi_threading/multi_threading_no.hpp | 15 - .../model_builder_intermediate.hpp | 88 - .../post_optimization/post_optimization.hpp | 121 - .../post_optimization_phase_list.hpp | 31 - .../post_optimization_sequential.hpp | 101 - .../post_optimization_unused_rule_removal.hpp | 15 - .../common/post_processing/post_processor.hpp | 55 - .../post_processing/post_processor_no.hpp | 16 - .../common/prediction/label_space_info.hpp | 236 - .../common/prediction/label_space_info_no.hpp | 22 - .../common/prediction/label_vector_set.hpp | 163 - .../prediction/prediction_matrix_dense.hpp | 50 - .../prediction_matrix_sparse_binary.hpp | 85 - .../include/common/prediction/predictor.hpp | 120 - .../common/prediction/predictor_binary.hpp | 173 - .../common/prediction/predictor_common.hpp | 228 - .../prediction/predictor_probability.hpp | 90 - .../common/prediction/predictor_score.hpp | 70 - .../prediction/probability_calibration.hpp | 77 - .../probability_calibration_isotonic.hpp | 114 - .../probability_calibration_joint.hpp | 95 - .../probability_calibration_marginal.hpp | 83 - .../prediction/probability_calibration_no.hpp | 73 - .../rule_evaluation/rule_compare_function.hpp | 37 - .../common/rule_evaluation/score_vector.hpp | 35 - .../score_vector_binned_dense.hpp | 198 - .../rule_evaluation/score_vector_dense.hpp | 119 - .../common/rule_induction/rule_induction.hpp | 98 - .../rule_induction_top_down_beam_search.hpp | 215 - .../rule_induction_top_down_greedy.hpp | 168 - .../rule_model_assemblage/default_rule.hpp | 42 - .../rule_model_assemblage.hpp | 96 - .../rule_model_assemblage_sequential.hpp | 28 - .../common/rule_pruning/rule_pruning.hpp | 74 - .../common/rule_pruning/rule_pruning_irep.hpp | 26 - .../common/rule_pruning/rule_pruning_no.hpp | 15 - .../common/rule_refinement/prediction.hpp | 195 - .../rule_refinement/prediction_complete.hpp | 76 - .../rule_refinement/prediction_evaluated.hpp | 21 - .../rule_refinement/prediction_partial.hpp | 116 - .../common/rule_refinement/refinement.hpp | 39 - .../refinement_comparator_fixed.hpp | 111 - .../refinement_comparator_single.hpp | 93 - .../rule_refinement/rule_refinement.hpp | 34 - .../rule_refinement_approximate.hpp | 54 - .../rule_refinement_callback.hpp | 54 - .../rule_refinement/rule_refinement_exact.hpp | 56 - .../rule_refinement/score_processor.hpp | 71 - .../common/sampling/feature_sampling.hpp | 80 - .../common/sampling/feature_sampling_no.hpp | 18 - .../sampling/feature_sampling_predefined.hpp | 27 - .../feature_sampling_without_replacement.hpp | 80 - .../common/sampling/instance_sampling.hpp | 119 - .../common/sampling/instance_sampling_no.hpp | 16 - ...tance_sampling_stratified_example_wise.hpp | 56 - ...nstance_sampling_stratified_label_wise.hpp | 57 - .../instance_sampling_with_replacement.hpp | 54 - .../instance_sampling_without_replacement.hpp | 54 - .../common/sampling/label_sampling.hpp | 63 - .../common/sampling/label_sampling_no.hpp | 16 - .../sampling/label_sampling_round_robin.hpp | 17 - .../label_sampling_without_replacement.hpp | 53 - .../include/common/sampling/partition.hpp | 119 - .../include/common/sampling/partition_bi.hpp | 150 - .../common/sampling/partition_sampling.hpp | 75 - .../sampling/partition_sampling_bi_random.hpp | 57 - ...on_sampling_bi_stratified_example_wise.hpp | 56 - ...tion_sampling_bi_stratified_label_wise.hpp | 56 - .../common/sampling/partition_sampling_no.hpp | 16 - .../common/sampling/partition_single.hpp | 70 - .../common/include/common/sampling/random.hpp | 32 - .../stratified_sampling_example_wise.hpp | 65 - .../stratified_sampling_label_wise.hpp | 63 - .../common/sampling/weight_sampling.hpp | 109 - .../include/common/sampling/weight_vector.hpp | 38 - .../common/sampling/weight_vector_bit.hpp | 77 - .../common/sampling/weight_vector_dense.hpp | 113 - .../common/sampling/weight_vector_equal.hpp | 48 - .../sampling/weight_vector_out_of_sample.hpp | 42 - .../include/common/statistics/histogram.hpp | 37 - .../include/common/statistics/statistics.hpp | 278 - .../common/statistics/statistics_provider.hpp | 66 - .../common/statistics/statistics_subset.hpp | 52 - .../statistics/statistics_subset_weighted.hpp | 79 - .../common/statistics/statistics_weighted.hpp | 91 - .../statistics_weighted_immutable.hpp | 56 - .../common/stopping/aggregation_function.hpp | 27 - .../common/stopping/global_pruning.hpp | 30 - .../common/stopping/global_pruning_post.hpp | 141 - .../common/stopping/global_pruning_pre.hpp | 269 - .../common/stopping/stopping_criterion.hpp | 97 - .../stopping/stopping_criterion_list.hpp | 32 - .../stopping/stopping_criterion_size.hpp | 54 - .../stopping/stopping_criterion_time.hpp | 53 - .../common/thresholds/coverage_mask.hpp | 121 - .../common/thresholds/coverage_set.hpp | 112 - .../common/thresholds/coverage_state.hpp | 86 - .../include/common/thresholds/thresholds.hpp | 81 - .../thresholds/thresholds_approximate.hpp | 41 - .../common/thresholds/thresholds_exact.hpp | 26 - .../common/thresholds/thresholds_subset.hpp | 237 - .../common/include/common/util/quality.hpp | 41 - .../common/include/common/util/threads.hpp | 20 - .../common/include/common/util/validation.hpp | 87 - cpp/subprojects/common/meson.build | 146 - .../common/binning/bin_index_vector_dense.cpp | 18 - .../common/binning/bin_index_vector_dok.cpp | 26 - .../src/common/binning/bin_weight_vector.cpp | 21 - .../feature_binning_equal_frequency.cpp | 231 - .../binning/feature_binning_equal_width.cpp | 251 - .../src/common/binning/feature_binning_no.cpp | 12 - .../binning/feature_binning_nominal.hpp | 80 - .../src/common/binning/threshold_vector.cpp | 50 - .../common/src/common/data/list_of_lists.cpp | 82 - .../src/common/data/matrix_c_contiguous.cpp | 22 - .../src/common/data/matrix_sparse_set.cpp | 196 - .../common/src/common/data/ring_buffer.cpp | 55 - .../src/common/data/vector_binned_dense.cpp | 134 - .../common/src/common/data/vector_bit.cpp | 48 - .../common/src/common/data/vector_dense.cpp | 47 - .../common/src/common/data/vector_dok.cpp | 49 - .../src/common/data/vector_dok_binary.cpp | 25 - .../src/common/data/vector_sparse_array.cpp | 279 - .../src/common/data/view_c_contiguous.cpp | 53 - .../common/src/common/data/view_csc.cpp | 78 - .../src/common/data/view_csc_binary.cpp | 35 - .../common/src/common/data/view_csr.cpp | 78 - .../src/common/data/view_csr_binary.cpp | 35 - .../common/data/view_fortran_contiguous.cpp | 55 - .../common/src/common/data/view_vector.cpp | 80 - .../src/common/indices/index_iterator.cpp | 45 - .../common/indices/index_vector_complete.cpp | 36 - .../common/indices/index_vector_partial.cpp | 45 - .../src/common/input/feature_info_equal.cpp | 31 - .../src/common/input/feature_info_mixed.cpp | 55 - .../input/feature_matrix_c_contiguous.cpp | 48 - .../src/common/input/feature_matrix_csc.cpp | 69 - .../src/common/input/feature_matrix_csr.cpp | 49 - .../feature_matrix_fortran_contiguous.cpp | 61 - .../src/common/input/feature_type_nominal.cpp | 5 - .../common/input/feature_type_numerical.cpp | 5 - .../src/common/input/feature_type_ordinal.cpp | 5 - .../src/common/input/feature_vector.cpp | 33 - .../input/label_matrix_c_contiguous.cpp | 109 - .../src/common/input/label_matrix_csc.cpp | 131 - .../src/common/input/label_matrix_csr.cpp | 98 - .../common/input/missing_feature_vector.cpp | 26 - cpp/subprojects/common/src/common/learner.cpp | 591 - .../src/common/model/body_conjunctive.cpp | 274 - .../common/src/common/model/body_empty.cpp | 18 - .../src/common/model/condition_list.cpp | 80 - .../common/src/common/model/head_complete.cpp | 31 - .../common/src/common/model/head_partial.cpp | 49 - .../common/src/common/model/rule_list.cpp | 221 - .../multi_threading_manual.cpp | 20 - .../multi_threading/multi_threading_no.cpp | 5 - .../model_builder_intermediate.cpp | 59 - .../post_optimization_phase_list.cpp | 96 - .../post_optimization_sequential.cpp | 190 - .../post_optimization_unused_rule_removal.cpp | 48 - .../post_processing/post_processor_no.cpp | 29 - .../common/prediction/label_space_info_no.cpp | 86 - .../common/prediction/label_vector_set.cpp | 173 - .../prediction/prediction_matrix_dense.cpp | 35 - .../prediction_matrix_sparse_binary.cpp | 53 - .../probability_calibration_isotonic.cpp | 189 - .../prediction/probability_calibration_no.cpp | 114 - .../score_vector_binned_dense.cpp | 122 - .../rule_evaluation/score_vector_dense.cpp | 69 - .../rule_induction/rule_induction_common.hpp | 123 - .../rule_induction_top_down_beam_search.cpp | 512 - .../rule_induction_top_down_common.hpp | 83 - .../rule_induction_top_down_greedy.cpp | 228 - .../rule_model_assemblage/default_rule.cpp | 7 - .../rule_model_assemblage_sequential.cpp | 105 - .../common/rule_pruning/rule_pruning_irep.cpp | 104 - .../common/rule_pruning/rule_pruning_no.cpp | 29 - .../src/common/rule_refinement/prediction.cpp | 35 - .../rule_refinement/prediction_complete.cpp | 77 - .../rule_refinement/prediction_evaluated.cpp | 3 - .../rule_refinement/prediction_partial.cpp | 123 - .../refinement_comparator_fixed.cpp | 125 - .../refinement_comparator_single.cpp | 43 - .../rule_refinement_approximate.cpp | 376 - .../rule_refinement/rule_refinement_exact.cpp | 551 - .../rule_refinement/score_processor.cpp | 65 - .../common/sampling/feature_sampling_no.cpp | 57 - .../sampling/feature_sampling_predefined.cpp | 11 - .../feature_sampling_without_replacement.cpp | 122 - .../src/common/sampling/index_sampling.hpp | 166 - .../common/sampling/instance_sampling_no.cpp | 87 - ...tance_sampling_stratified_example_wise.cpp | 113 - ...nstance_sampling_stratified_label_wise.cpp | 115 - .../instance_sampling_with_replacement.cpp | 148 - .../instance_sampling_without_replacement.cpp | 114 - .../src/common/sampling/label_sampling_no.cpp | 48 - .../sampling/label_sampling_round_robin.cpp | 60 - .../label_sampling_without_replacement.cpp | 74 - .../src/common/sampling/partition_bi.cpp | 103 - .../sampling/partition_sampling_bi_random.cpp | 90 - ...on_sampling_bi_stratified_example_wise.cpp | 94 - ...tion_sampling_bi_stratified_label_wise.cpp | 94 - .../common/sampling/partition_sampling_no.cpp | 44 - .../src/common/sampling/partition_single.cpp | 53 - .../common/src/common/sampling/random.cpp | 20 - .../sampling/stratified_sampling_common.hpp | 16 - .../stratified_sampling_example_wise.cpp | 126 - .../stratified_sampling_label_wise.cpp | 293 - .../src/common/sampling/weight_vector_bit.cpp | 41 - .../common/sampling/weight_vector_dense.cpp | 68 - .../common/sampling/weight_vector_equal.cpp | 26 - .../sampling/weight_vector_out_of_sample.cpp | 22 - .../stopping/aggregation_function_common.hpp | 167 - .../common/stopping/global_pruning_common.hpp | 70 - .../common/stopping/global_pruning_post.cpp | 147 - .../common/stopping/global_pruning_pre.cpp | 301 - .../stopping/stopping_criterion_list.cpp | 85 - .../stopping/stopping_criterion_size.cpp | 71 - .../stopping/stopping_criterion_time.cpp | 89 - .../src/common/thresholds/coverage_mask.cpp | 79 - .../src/common/thresholds/coverage_set.cpp | 77 - .../thresholds/thresholds_approximate.cpp | 416 - .../common/thresholds/thresholds_common.hpp | 186 - .../common/thresholds/thresholds_exact.cpp | 536 - doc/Doxyfile_boosting | 2770 ----- doc/Doxyfile_common | 2770 ----- doc/_static/logo.png | Bin 33823 -> 0 bytes doc/_static/structure.png | Bin 13347 -> 0 bytes doc/api/codestyle.inc.rst | 24 - doc/api/compilation.inc.rst | 108 - doc/api/cpp.inc.rst | 9 - doc/api/documentation.inc.rst | 18 - doc/api/index.rst | 14 - doc/api/python.inc.rst | 10 - doc/api/structure.inc.rst | 16 - doc/api/testing.inc.rst | 12 - doc/conf.py | 57 - doc/index.rst | 29 - doc/python/boosting/conf.py | 57 - doc/python/common/conf.py | 57 - doc/python/testbed/conf.py | 57 - doc/quickstart/index.rst | 11 - doc/quickstart/installation.inc.rst | 23 - doc/quickstart/parameters.inc.rst | 308 - doc/quickstart/rules.inc.rst | 24 - doc/quickstart/usage.inc.rst | 54 - doc/references/firstparty.inc.rst | 73 - doc/references/index.rst | 9 - doc/references/thirdparty.inc.rst | 27 - doc/requirements.txt | 3 - doc/source/CHANGELOG.md | 1 - doc/source/CONTRIBUTORS.md | 1 - doc/source/LICENSE.md | 1 - doc/testbed/arguments.inc.rst | 351 - doc/testbed/experiments.inc.rst | 24 - doc/testbed/index.rst | 13 - python/meson.build | 2 - python/requirements.txt | 12 - python/subprojects/boosting/meson.build | 55 - python/subprojects/boosting/mlrl/__init__.py | 0 .../boosting/mlrl/boosting/__init__.py | 1 - .../mlrl/boosting/boosting_learners.py | 179 - .../boosting/mlrl/boosting/config.py | 478 - .../boosting/mlrl/boosting/cython/__init__.py | 0 .../mlrl/boosting/cython/head_type.pxd | 49 - .../mlrl/boosting/cython/head_type.pyx | 129 - .../mlrl/boosting/cython/label_binning.pxd | 27 - .../mlrl/boosting/cython/label_binning.pyx | 76 - .../boosting/mlrl/boosting/cython/learner.pxd | 289 - .../boosting/mlrl/boosting/cython/learner.pyx | 587 - .../mlrl/boosting/cython/learner_boomer.pxd | 139 - .../mlrl/boosting/cython/learner_boomer.pyx | 546 - .../mlrl/boosting/cython/post_processor.pxd | 19 - .../mlrl/boosting/cython/post_processor.pyx | 30 - .../mlrl/boosting/cython/prediction.pxd | 99 - .../mlrl/boosting/cython/prediction.pyx | 195 - .../cython/probability_calibration.pxd | 35 - .../cython/probability_calibration.pyx | 59 - .../mlrl/boosting/cython/regularization.pxd | 19 - .../mlrl/boosting/cython/regularization.pyx | 31 - python/subprojects/boosting/pyproject.toml | 3 - python/subprojects/boosting/setup.py | 96 - python/subprojects/common/README.md | 74 - python/subprojects/common/meson.build | 75 - python/subprojects/common/mlrl/__init__.py | 0 .../common/mlrl/common/__init__.py | 0 .../subprojects/common/mlrl/common/arrays.py | 37 - .../subprojects/common/mlrl/common/config.py | 790 -- .../common/mlrl/common/cython/__init__.py | 0 .../common/mlrl/common/cython/_arrays.pxd | 94 - .../common/mlrl/common/cython/_types.pxd | 10 - .../mlrl/common/cython/feature_binning.pxd | 53 - .../mlrl/common/cython/feature_binning.pyx | 148 - .../mlrl/common/cython/feature_info.pxd | 59 - .../mlrl/common/cython/feature_info.pyx | 72 - .../mlrl/common/cython/feature_matrix.pxd | 134 - .../mlrl/common/cython/feature_matrix.pyx | 164 - .../mlrl/common/cython/feature_sampling.pxd | 23 - .../mlrl/common/cython/feature_sampling.pyx | 54 - .../mlrl/common/cython/instance_sampling.pxd | 73 - .../mlrl/common/cython/instance_sampling.pyx | 118 - .../mlrl/common/cython/label_matrix.pxd | 79 - .../mlrl/common/cython/label_matrix.pyx | 98 - .../mlrl/common/cython/label_sampling.pxd | 19 - .../mlrl/common/cython/label_sampling.pyx | 30 - .../mlrl/common/cython/label_space_info.pxd | 138 - .../mlrl/common/cython/label_space_info.pyx | 129 - .../common/mlrl/common/cython/learner.pxd | 398 - .../common/mlrl/common/cython/learner.pyx | 893 -- .../mlrl/common/cython/multi_threading.pxd | 19 - .../mlrl/common/cython/multi_threading.pyx | 33 - .../mlrl/common/cython/partition_sampling.pxd | 55 - .../mlrl/common/cython/partition_sampling.pyx | 92 - .../mlrl/common/cython/post_optimization.pxd | 29 - .../mlrl/common/cython/post_optimization.pyx | 72 - .../common/mlrl/common/cython/prediction.pxd | 143 - .../common/mlrl/common/cython/prediction.pyx | 359 - .../common/cython/probability_calibration.pxd | 197 - .../common/cython/probability_calibration.pyx | 212 - .../mlrl/common/cython/rule_induction.pxd | 79 - .../mlrl/common/cython/rule_induction.pyx | 285 - .../common/mlrl/common/cython/rule_model.pxd | 333 - .../common/mlrl/common/cython/rule_model.pyx | 427 - .../mlrl/common/cython/stopping_criterion.pxd | 130 - .../mlrl/common/cython/stopping_criterion.pyx | 360 - .../common/mlrl/common/cython/validation.py | 79 - .../common/mlrl/common/data_types.py | 16 - .../subprojects/common/mlrl/common/format.py | 37 - .../common/mlrl/common/learners.py | 227 - .../subprojects/common/mlrl/common/options.py | 187 - .../common/mlrl/common/rule_learners.py | 485 - python/subprojects/common/pyproject.toml | 3 - python/subprojects/common/setup.py | 114 - python/subprojects/testbed/README.md | 31 - python/subprojects/testbed/mlrl/__init__.py | 0 .../testbed/mlrl/testbed/__init__.py | 0 .../testbed/mlrl/testbed/characteristics.py | 179 - .../subprojects/testbed/mlrl/testbed/data.py | 493 - .../mlrl/testbed/data_characteristics.py | 157 - .../testbed/mlrl/testbed/data_splitting.py | 494 - .../testbed/mlrl/testbed/evaluation.py | 459 - .../testbed/mlrl/testbed/experiments.py | 409 - .../testbed/mlrl/testbed/format.py | 150 - python/subprojects/testbed/mlrl/testbed/io.py | 157 - .../testbed/mlrl/testbed/label_vectors.py | 166 - .../testbed/mlrl/testbed/main_boomer.py | 20 - .../mlrl/testbed/model_characteristics.py | 316 - .../testbed/mlrl/testbed/models.py | 243 - .../testbed/mlrl/testbed/output_writer.py | 256 - .../testbed/mlrl/testbed/parameters.py | 111 - .../testbed/mlrl/testbed/persistence.py | 66 - .../testbed/prediction_characteristics.py | 47 - .../testbed/mlrl/testbed/prediction_scope.py | 86 - .../testbed/mlrl/testbed/predictions.py | 93 - .../mlrl/testbed/probability_calibration.py | 211 - .../testbed/mlrl/testbed/runnables.py | 1000 -- python/subprojects/testbed/pyproject.toml | 3 - python/subprojects/testbed/setup.py | 72 - .../testbed/tests/res/data/breast-cancer.arff | 394 - .../testbed/tests/res/data/breast-cancer.xml | 4 - .../tests/res/data/emotions-predefined.xml | 9 - .../res/data/emotions-predefined_fold-1.arff | 141 - .../res/data/emotions-predefined_fold-10.arff | 142 - .../res/data/emotions-predefined_fold-2.arff | 141 - .../res/data/emotions-predefined_fold-3.arff | 142 - .../res/data/emotions-predefined_fold-4.arff | 141 - .../res/data/emotions-predefined_fold-5.arff | 141 - .../res/data/emotions-predefined_fold-6.arff | 141 - .../res/data/emotions-predefined_fold-7.arff | 142 - .../res/data/emotions-predefined_fold-8.arff | 141 - .../res/data/emotions-predefined_fold-9.arff | 141 - .../res/data/emotions-predefined_test.arff | 284 - .../data/emotions-predefined_training.arff | 473 - .../testbed/tests/res/data/emotions.arff | 675 -- .../testbed/tests/res/data/emotions.xml | 9 - .../testbed/tests/res/data/enron.arff | 2760 ----- .../testbed/tests/res/data/enron.xml | 56 - .../testbed/tests/res/data/langlog.arff | 2543 ---- .../testbed/tests/res/data/langlog.xml | 81 - .../testbed/tests/res/data/meka.arff | 26 - .../testbed/tests/res/data/weather.arff | 26 - .../testbed/tests/res/data/weather.xml | 6 - .../tests/res/in/parameters_fold-1.csv | 2 - .../tests/res/in/parameters_fold-10.csv | 2 - .../tests/res/in/parameters_fold-2.csv | 2 - .../tests/res/in/parameters_fold-3.csv | 2 - .../tests/res/in/parameters_fold-4.csv | 2 - .../tests/res/in/parameters_fold-5.csv | 2 - .../tests/res/in/parameters_fold-6.csv | 2 - .../tests/res/in/parameters_fold-7.csv | 2 - .../tests/res/in/parameters_fold-8.csv | 2 - .../tests/res/in/parameters_fold-9.csv | 2 - .../tests/res/in/parameters_overall.csv | 2 - .../res/out/boomer/binary-features-dense.txt | 32 - .../res/out/boomer/binary-features-sparse.txt | 32 - .../data-characteristics_cross-validation.txt | 205 - .../data-characteristics_single-fold.txt | 25 - .../data-characteristics_train-test.txt | 24 - ...evaluation_cross-validation-predefined.txt | 313 - .../boomer/evaluation_cross-validation.txt | 304 - .../res/out/boomer/evaluation_incremental.txt | 431 - .../out/boomer/evaluation_no-data-split.txt | 32 - .../res/out/boomer/evaluation_single-fold.txt | 33 - .../evaluation_train-test-predefined.txt | 33 - .../res/out/boomer/evaluation_train-test.txt | 32 - .../out/boomer/evaluation_training-data.txt | 55 - .../boomer/example-wise-complete-heads.txt | 55 - ...mplete-heads_equal-width-label-binning.txt | 55 - .../example-wise-partial-dynamic-heads.txt | 55 - ...ynamic-heads_equal-width-label-binning.txt | 55 - .../example-wise-partial-fixed-heads.txt | 55 - ...-fixed-heads_equal-width-label-binning.txt | 55 - .../example-wise-single-label-heads.txt | 55 - ...-equal-frequency_binary-features-dense.txt | 32 - ...equal-frequency_binary-features-sparse.txt | 32 - ...equal-frequency_nominal-features-dense.txt | 32 - ...qual-frequency_nominal-features-sparse.txt | 32 - ...ual-frequency_numerical-features-dense.txt | 32 - ...al-frequency_numerical-features-sparse.txt | 32 - ...ning-equal-width_binary-features-dense.txt | 32 - ...ing-equal-width_binary-features-sparse.txt | 32 - ...ing-equal-width_nominal-features-dense.txt | 32 - ...ng-equal-width_nominal-features-sparse.txt | 32 - ...g-equal-width_numerical-features-dense.txt | 32 - ...-equal-width_numerical-features-sparse.txt | 32 - .../res/out/boomer/feature-sampling-no.txt | 32 - .../feature-sampling-without-replacement.txt | 32 - .../res/out/boomer/instance-sampling-no.txt | 32 - ...tance-sampling-stratified-example-wise.txt | 32 - ...nstance-sampling-stratified-label-wise.txt | 32 - .../instance-sampling-with-replacement.txt | 32 - .../instance-sampling-without-replacement.txt | 32 - .../res/out/boomer/label-format-dense.txt | 32 - .../res/out/boomer/label-format-sparse.txt | 32 - .../res/out/boomer/label-sampling-no.txt | 32 - .../out/boomer/label-sampling-round-robin.txt | 32 - .../label-sampling-without-replacement.txt | 32 - .../boomer/label-vectors_cross-validation.txt | 391 - .../out/boomer/label-vectors_single-fold.txt | 43 - .../out/boomer/label-vectors_train-test.txt | 39 - .../out/boomer/label-wise-complete-heads.txt | 55 - ...mplete-heads_equal-width-label-binning.txt | 55 - .../label-wise-partial-dynamic-heads.txt | 55 - ...ynamic-heads_equal-width-label-binning.txt | 55 - .../boomer/label-wise-partial-fixed-heads.txt | 55 - ...-fixed-heads_equal-width-label-binning.txt | 55 - .../boomer/label-wise-single-label-heads.txt | 55 - .../out/boomer/loss-logistic-example-wise.txt | 32 - .../out/boomer/loss-logistic-label-wise.txt | 32 - .../loss-squared-error-example-wise.txt | 32 - .../boomer/loss-squared-error-label-wise.txt | 32 - .../loss-squared-hinge-example-wise.txt | 32 - .../boomer/loss-squared-hinge-label-wise.txt | 32 - .../tests/res/out/boomer/meka-format.txt | 9 - ...model-characteristics_cross-validation.txt | 285 - .../model-characteristics_single-fold.txt | 33 - .../model-characteristics_train-test.txt | 32 - .../model-persistence_cross-validation.txt | 284 - .../boomer/model-persistence_single-fold.txt | 31 - .../boomer/model-persistence_train-test.txt | 30 - .../tests/res/out/boomer/no-default-rule.txt | 53 - .../res/out/boomer/nominal-features-dense.txt | 32 - .../out/boomer/nominal-features-sparse.txt | 32 - .../res/out/boomer/numeric-features-dense.txt | 32 - .../out/boomer/numeric-features-sparse.txt | 32 - .../boomer/parameters_cross-validation.txt | 385 - .../res/out/boomer/parameters_single-fold.txt | 43 - .../res/out/boomer/parameters_train-test.txt | 42 - .../out/boomer/post-pruning_no-holdout.txt | 55 - .../boomer/post-pruning_random-holdout.txt | 55 - ...runing_stratified-example-wise-holdout.txt | 55 - ...-pruning_stratified-label-wise-holdout.txt | 55 - .../res/out/boomer/pre-pruning_no-holdout.txt | 55 - .../out/boomer/pre-pruning_random-holdout.txt | 55 - ...runing_stratified-example-wise-holdout.txt | 55 - ...-pruning_stratified-label-wise-holdout.txt | 55 - ...ction-characteristics_cross-validation.txt | 185 - ...prediction-characteristics_single-fold.txt | 23 - .../prediction-characteristics_train-test.txt | 22 - ...ediction-characteristics_training-data.txt | 35 - .../out/boomer/prediction-format-dense.txt | 432 - .../out/boomer/prediction-format-sparse.txt | 432 - .../boomer/predictions_cross-validation.txt | 1381 --- .../out/boomer/predictions_single-fold.txt | 144 - .../res/out/boomer/predictions_train-test.txt | 415 - .../out/boomer/predictions_training-data.txt | 1223 -- .../boomer/predictor-binary-example-wise.txt | 462 - ...ry-example-wise_based-on-probabilities.txt | 723 -- ...dictor-binary-example-wise_incremental.txt | 431 - ...ise_incremental_based-on-probabilities.txt | 692 -- .../predictor-binary-example-wise_sparse.txt | 462 - ...binary-example-wise_sparse_incremental.txt | 431 - .../res/out/boomer/predictor-binary-gfm.txt | 723 -- ...ctor-binary-gfm_based-on-probabilities.txt | 432 - .../predictor-binary-gfm_incremental.txt | 692 -- ...gfm_incremental_based-on-probabilities.txt | 431 - .../boomer/predictor-binary-gfm_sparse.txt | 723 -- ...redictor-binary-gfm_sparse_incremental.txt | 692 -- .../boomer/predictor-binary-label-wise.txt | 432 - ...nary-label-wise_based-on-probabilities.txt | 504 - ...redictor-binary-label-wise_incremental.txt | 431 - ...ise_incremental_based-on-probabilities.txt | 503 - .../predictor-binary-label-wise_sparse.txt | 432 - ...r-binary-label-wise_sparse_incremental.txt | 431 - .../predictor-probability-label-wise.txt | 497 - ...tor-probability-label-wise_incremental.txt | 363 - .../predictor-probability-marginalized.txt | 716 -- ...r-probability-marginalized_incremental.txt | 552 - .../out/boomer/predictor-score-label-wise.txt | 425 - ...predictor-score-label-wise_incremental.txt | 291 - .../tests/res/out/boomer/pruning-irep.txt | 32 - .../tests/res/out/boomer/pruning-no.txt | 32 - .../rule-induction-top-down-beam-search.txt | 32 - .../res/out/boomer/rules_cross-validation.txt | 10095 ---------------- .../res/out/boomer/rules_single-fold.txt | 1014 -- .../tests/res/out/boomer/rules_train-test.txt | 1013 -- .../boomer/sequential-post-optimization.txt | 32 - .../boomer/single-label-classification.txt | 22 - .../out/boomer/single-label-probabilities.txt | 20 - .../out/boomer/single-label-regression.txt | 20 - .../statistics-sparse_label-format-dense.txt | 32 - .../statistics-sparse_label-format-sparse.txt | 32 - .../testbed/tests/test_boosting.py | 1034 -- .../subprojects/testbed/tests/test_common.py | 1596 --- 810 files changed, 122344 deletions(-) delete mode 100644 CHANGELOG.md delete mode 100644 CODE_OF_CONDUCT.md delete mode 100644 CONTRIBUTORS.md delete mode 100644 LICENSE.md delete mode 100644 Makefile delete mode 100644 VERSION delete mode 100644 assets/project_structure.svg delete mode 100644 cpp/meson.build delete mode 100644 cpp/subprojects/boosting/include/boosting/binning/feature_binning_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/binning/label_binning.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/binning/label_binning_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/binning/label_binning_equal_width.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/binning/label_binning_no.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/arrays.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/histogram_view_label_wise_sparse.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/matrix_c_contiguous_numeric.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/matrix_sparse_set_numeric.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/statistic_vector_example_wise_dense.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/statistic_vector_label_wise_dense.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/statistic_vector_label_wise_sparse.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/statistic_view_example_wise_dense.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/statistic_view_label_wise_dense.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/data/statistic_view_label_wise_sparse.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/iterator/diagonal_iterator.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/learner.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/learner_boomer.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss_example_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_logistic.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_squared_error.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_squared_hinge.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss_label_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_logistic.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_sparse.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_squared_error.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_squared_hinge.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/macros.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/math/blas.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/math/lapack.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/math/math.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/model/rule_list_builder.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/multi_threading/parallel_rule_refinement_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/multi_threading/parallel_statistic_update_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/post_processing/shrinkage_constant.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/discretization_function.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/discretization_function_probability.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/discretization_function_score.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_common.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_example_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_gfm.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_label_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_common.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_label_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_marginalized.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_score_common.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/predictor_score_label_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/probability_calibration_isotonic.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/probability_function_chain_rule.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/probability_function_joint.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/probability_function_logistic.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/probability_function_marginal.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/transformation_binary.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_example_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_gfm.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_label_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/transformation_probability.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/transformation_probability_label_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/prediction/transformation_probability_marginalized.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_complete.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_partial_dynamic.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_partial_fixed.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_single.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization_manual.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization_no.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_compare_function.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_complete.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_complete.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_single.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/rule_model_assemblage/default_rule_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/sampling/partition_sampling_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistic_format.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistic_format_auto.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistic_format_dense.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistic_format_sparse.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistics.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistics_example_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistics_label_wise.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_example_wise_dense.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_label_wise_dense.hpp delete mode 100644 cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_label_wise_sparse.hpp delete mode 100644 cpp/subprojects/boosting/meson.build delete mode 100644 cpp/subprojects/boosting/src/boosting/binning/feature_binning_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/binning/label_binning_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/binning/label_binning_equal_width.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/binning/label_binning_no.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/histogram_view_label_wise_sparse.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/matrix_c_contiguous_numeric.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/matrix_sparse_set_numeric.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/statistic_vector_example_wise_dense.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_dense.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_sparse.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_sparse_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/statistic_view_example_wise_dense.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/statistic_view_label_wise_dense.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/data/statistic_view_label_wise_sparse.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/iterator/diagonal_iterator.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/learner.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/learner_boomer.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_logistic.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_squared_error.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_squared_hinge.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_logistic.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_sparse_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_squared_error.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_squared_hinge.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/math/blas.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/math/lapack.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/model/rule_list_builder.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/multi_threading/parallel_rule_refinement_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/multi_threading/parallel_statistic_update_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/post_processing/shrinkage_constant.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/discretization_function_probability.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/discretization_function_score.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_example_wise.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_gfm.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_label_wise.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_label_wise.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_marginalized.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/predictor_score_label_wise.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/probability_calibration_isotonic.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/probability_function_chain_rule.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/probability_function_logistic.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_example_wise.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_gfm.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_label_wise.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/transformation_probability_label_wise.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/prediction/transformation_probability_marginalized.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_complete.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_partial_dynamic.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_partial_fixed.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_single.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/regularization_manual.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/regularization_no.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_binned_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_binned_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_single.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/rule_model_assemblage/default_rule_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/sampling/partition_sampling_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistic_format_auto.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistic_format_dense.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistic_format_sparse.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistics_example_wise_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistics_label_wise_common.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistics_label_wise_dense.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_example_wise.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_example_wise_dense.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise.hpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise_dense.cpp delete mode 100644 cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise_sparse.cpp delete mode 100644 cpp/subprojects/common/include/common/binning/bin_index_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/binning/bin_index_vector_dense.hpp delete mode 100644 cpp/subprojects/common/include/common/binning/bin_index_vector_dok.hpp delete mode 100644 cpp/subprojects/common/include/common/binning/bin_weight_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/binning/feature_binning.hpp delete mode 100644 cpp/subprojects/common/include/common/binning/feature_binning_equal_frequency.hpp delete mode 100644 cpp/subprojects/common/include/common/binning/feature_binning_equal_width.hpp delete mode 100644 cpp/subprojects/common/include/common/binning/feature_binning_no.hpp delete mode 100644 cpp/subprojects/common/include/common/binning/threshold_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/data/arrays.hpp delete mode 100644 cpp/subprojects/common/include/common/data/indexed_value.hpp delete mode 100644 cpp/subprojects/common/include/common/data/list_of_lists.hpp delete mode 100644 cpp/subprojects/common/include/common/data/matrix_c_contiguous.hpp delete mode 100644 cpp/subprojects/common/include/common/data/matrix_lil.hpp delete mode 100644 cpp/subprojects/common/include/common/data/matrix_lil_binary.hpp delete mode 100644 cpp/subprojects/common/include/common/data/matrix_sparse_set.hpp delete mode 100644 cpp/subprojects/common/include/common/data/ring_buffer.hpp delete mode 100644 cpp/subprojects/common/include/common/data/triple.hpp delete mode 100644 cpp/subprojects/common/include/common/data/tuple.hpp delete mode 100644 cpp/subprojects/common/include/common/data/types.hpp delete mode 100644 cpp/subprojects/common/include/common/data/vector_binned_dense.hpp delete mode 100644 cpp/subprojects/common/include/common/data/vector_bit.hpp delete mode 100644 cpp/subprojects/common/include/common/data/vector_dense.hpp delete mode 100644 cpp/subprojects/common/include/common/data/vector_dok.hpp delete mode 100644 cpp/subprojects/common/include/common/data/vector_dok_binary.hpp delete mode 100644 cpp/subprojects/common/include/common/data/vector_sparse_array.hpp delete mode 100644 cpp/subprojects/common/include/common/data/vector_sparse_array_binary.hpp delete mode 100644 cpp/subprojects/common/include/common/data/view_c_contiguous.hpp delete mode 100644 cpp/subprojects/common/include/common/data/view_csc.hpp delete mode 100644 cpp/subprojects/common/include/common/data/view_csc_binary.hpp delete mode 100644 cpp/subprojects/common/include/common/data/view_csr.hpp delete mode 100644 cpp/subprojects/common/include/common/data/view_csr_binary.hpp delete mode 100644 cpp/subprojects/common/include/common/data/view_fortran_contiguous.hpp delete mode 100644 cpp/subprojects/common/include/common/data/view_one_dimensional.hpp delete mode 100644 cpp/subprojects/common/include/common/data/view_two_dimensional.hpp delete mode 100644 cpp/subprojects/common/include/common/data/view_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/indices/index_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/indices/index_vector_complete.hpp delete mode 100644 cpp/subprojects/common/include/common/indices/index_vector_partial.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_info.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_info_equal.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_info_mixed.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_matrix.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_matrix_c_contiguous.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_matrix_column_wise.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_matrix_csc.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_matrix_csr.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_matrix_fortran_contiguous.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_matrix_row_wise.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_type.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_type_nominal.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_type_numerical.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_type_ordinal.hpp delete mode 100644 cpp/subprojects/common/include/common/input/feature_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/input/label_matrix.hpp delete mode 100644 cpp/subprojects/common/include/common/input/label_matrix_c_contiguous.hpp delete mode 100644 cpp/subprojects/common/include/common/input/label_matrix_csc.hpp delete mode 100644 cpp/subprojects/common/include/common/input/label_matrix_csr.hpp delete mode 100644 cpp/subprojects/common/include/common/input/label_matrix_row_wise.hpp delete mode 100644 cpp/subprojects/common/include/common/input/label_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/input/missing_feature_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/iterator/binary_forward_iterator.hpp delete mode 100644 cpp/subprojects/common/include/common/iterator/index_iterator.hpp delete mode 100644 cpp/subprojects/common/include/common/iterator/non_zero_index_forward_iterator.hpp delete mode 100644 cpp/subprojects/common/include/common/learner.hpp delete mode 100644 cpp/subprojects/common/include/common/macros.hpp delete mode 100644 cpp/subprojects/common/include/common/math/math.hpp delete mode 100644 cpp/subprojects/common/include/common/measures/measure_distance.hpp delete mode 100644 cpp/subprojects/common/include/common/measures/measure_evaluation.hpp delete mode 100644 cpp/subprojects/common/include/common/measures/measure_evaluation_sparse.hpp delete mode 100644 cpp/subprojects/common/include/common/model/body.hpp delete mode 100644 cpp/subprojects/common/include/common/model/body_conjunctive.hpp delete mode 100644 cpp/subprojects/common/include/common/model/body_empty.hpp delete mode 100644 cpp/subprojects/common/include/common/model/condition.hpp delete mode 100644 cpp/subprojects/common/include/common/model/condition_list.hpp delete mode 100644 cpp/subprojects/common/include/common/model/head.hpp delete mode 100644 cpp/subprojects/common/include/common/model/head_complete.hpp delete mode 100644 cpp/subprojects/common/include/common/model/head_partial.hpp delete mode 100644 cpp/subprojects/common/include/common/model/model_builder.hpp delete mode 100644 cpp/subprojects/common/include/common/model/rule_list.hpp delete mode 100644 cpp/subprojects/common/include/common/model/rule_model.hpp delete mode 100644 cpp/subprojects/common/include/common/multi_threading/multi_threading.hpp delete mode 100644 cpp/subprojects/common/include/common/multi_threading/multi_threading_manual.hpp delete mode 100644 cpp/subprojects/common/include/common/multi_threading/multi_threading_no.hpp delete mode 100644 cpp/subprojects/common/include/common/post_optimization/model_builder_intermediate.hpp delete mode 100644 cpp/subprojects/common/include/common/post_optimization/post_optimization.hpp delete mode 100644 cpp/subprojects/common/include/common/post_optimization/post_optimization_phase_list.hpp delete mode 100644 cpp/subprojects/common/include/common/post_optimization/post_optimization_sequential.hpp delete mode 100644 cpp/subprojects/common/include/common/post_optimization/post_optimization_unused_rule_removal.hpp delete mode 100644 cpp/subprojects/common/include/common/post_processing/post_processor.hpp delete mode 100644 cpp/subprojects/common/include/common/post_processing/post_processor_no.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/label_space_info.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/label_space_info_no.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/label_vector_set.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/prediction_matrix_dense.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/prediction_matrix_sparse_binary.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/predictor.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/predictor_binary.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/predictor_common.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/predictor_probability.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/predictor_score.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/probability_calibration.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/probability_calibration_isotonic.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/probability_calibration_joint.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/probability_calibration_marginal.hpp delete mode 100644 cpp/subprojects/common/include/common/prediction/probability_calibration_no.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_evaluation/rule_compare_function.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_evaluation/score_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_evaluation/score_vector_binned_dense.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_evaluation/score_vector_dense.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_induction/rule_induction.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_induction/rule_induction_top_down_beam_search.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_induction/rule_induction_top_down_greedy.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_model_assemblage/default_rule.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_model_assemblage/rule_model_assemblage.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_model_assemblage/rule_model_assemblage_sequential.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_pruning/rule_pruning.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_pruning/rule_pruning_irep.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_pruning/rule_pruning_no.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/prediction.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/prediction_complete.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/prediction_evaluated.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/prediction_partial.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/refinement.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/refinement_comparator_fixed.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/refinement_comparator_single.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/rule_refinement.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/rule_refinement_approximate.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/rule_refinement_callback.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/rule_refinement_exact.hpp delete mode 100644 cpp/subprojects/common/include/common/rule_refinement/score_processor.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/feature_sampling.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/feature_sampling_no.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/feature_sampling_predefined.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/feature_sampling_without_replacement.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/instance_sampling.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/instance_sampling_no.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/instance_sampling_stratified_example_wise.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/instance_sampling_stratified_label_wise.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/instance_sampling_with_replacement.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/instance_sampling_without_replacement.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/label_sampling.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/label_sampling_no.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/label_sampling_round_robin.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/label_sampling_without_replacement.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/partition.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/partition_bi.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/partition_sampling.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/partition_sampling_bi_random.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/partition_sampling_bi_stratified_example_wise.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/partition_sampling_bi_stratified_label_wise.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/partition_sampling_no.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/partition_single.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/random.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/stratified_sampling_example_wise.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/stratified_sampling_label_wise.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/weight_sampling.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/weight_vector.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/weight_vector_bit.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/weight_vector_dense.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/weight_vector_equal.hpp delete mode 100644 cpp/subprojects/common/include/common/sampling/weight_vector_out_of_sample.hpp delete mode 100644 cpp/subprojects/common/include/common/statistics/histogram.hpp delete mode 100644 cpp/subprojects/common/include/common/statistics/statistics.hpp delete mode 100644 cpp/subprojects/common/include/common/statistics/statistics_provider.hpp delete mode 100644 cpp/subprojects/common/include/common/statistics/statistics_subset.hpp delete mode 100644 cpp/subprojects/common/include/common/statistics/statistics_subset_weighted.hpp delete mode 100644 cpp/subprojects/common/include/common/statistics/statistics_weighted.hpp delete mode 100644 cpp/subprojects/common/include/common/statistics/statistics_weighted_immutable.hpp delete mode 100644 cpp/subprojects/common/include/common/stopping/aggregation_function.hpp delete mode 100644 cpp/subprojects/common/include/common/stopping/global_pruning.hpp delete mode 100644 cpp/subprojects/common/include/common/stopping/global_pruning_post.hpp delete mode 100644 cpp/subprojects/common/include/common/stopping/global_pruning_pre.hpp delete mode 100644 cpp/subprojects/common/include/common/stopping/stopping_criterion.hpp delete mode 100644 cpp/subprojects/common/include/common/stopping/stopping_criterion_list.hpp delete mode 100644 cpp/subprojects/common/include/common/stopping/stopping_criterion_size.hpp delete mode 100644 cpp/subprojects/common/include/common/stopping/stopping_criterion_time.hpp delete mode 100644 cpp/subprojects/common/include/common/thresholds/coverage_mask.hpp delete mode 100644 cpp/subprojects/common/include/common/thresholds/coverage_set.hpp delete mode 100644 cpp/subprojects/common/include/common/thresholds/coverage_state.hpp delete mode 100644 cpp/subprojects/common/include/common/thresholds/thresholds.hpp delete mode 100644 cpp/subprojects/common/include/common/thresholds/thresholds_approximate.hpp delete mode 100644 cpp/subprojects/common/include/common/thresholds/thresholds_exact.hpp delete mode 100644 cpp/subprojects/common/include/common/thresholds/thresholds_subset.hpp delete mode 100644 cpp/subprojects/common/include/common/util/quality.hpp delete mode 100644 cpp/subprojects/common/include/common/util/threads.hpp delete mode 100644 cpp/subprojects/common/include/common/util/validation.hpp delete mode 100644 cpp/subprojects/common/meson.build delete mode 100644 cpp/subprojects/common/src/common/binning/bin_index_vector_dense.cpp delete mode 100644 cpp/subprojects/common/src/common/binning/bin_index_vector_dok.cpp delete mode 100644 cpp/subprojects/common/src/common/binning/bin_weight_vector.cpp delete mode 100644 cpp/subprojects/common/src/common/binning/feature_binning_equal_frequency.cpp delete mode 100644 cpp/subprojects/common/src/common/binning/feature_binning_equal_width.cpp delete mode 100644 cpp/subprojects/common/src/common/binning/feature_binning_no.cpp delete mode 100644 cpp/subprojects/common/src/common/binning/feature_binning_nominal.hpp delete mode 100644 cpp/subprojects/common/src/common/binning/threshold_vector.cpp delete mode 100644 cpp/subprojects/common/src/common/data/list_of_lists.cpp delete mode 100644 cpp/subprojects/common/src/common/data/matrix_c_contiguous.cpp delete mode 100644 cpp/subprojects/common/src/common/data/matrix_sparse_set.cpp delete mode 100644 cpp/subprojects/common/src/common/data/ring_buffer.cpp delete mode 100644 cpp/subprojects/common/src/common/data/vector_binned_dense.cpp delete mode 100644 cpp/subprojects/common/src/common/data/vector_bit.cpp delete mode 100644 cpp/subprojects/common/src/common/data/vector_dense.cpp delete mode 100644 cpp/subprojects/common/src/common/data/vector_dok.cpp delete mode 100644 cpp/subprojects/common/src/common/data/vector_dok_binary.cpp delete mode 100644 cpp/subprojects/common/src/common/data/vector_sparse_array.cpp delete mode 100644 cpp/subprojects/common/src/common/data/view_c_contiguous.cpp delete mode 100644 cpp/subprojects/common/src/common/data/view_csc.cpp delete mode 100644 cpp/subprojects/common/src/common/data/view_csc_binary.cpp delete mode 100644 cpp/subprojects/common/src/common/data/view_csr.cpp delete mode 100644 cpp/subprojects/common/src/common/data/view_csr_binary.cpp delete mode 100644 cpp/subprojects/common/src/common/data/view_fortran_contiguous.cpp delete mode 100644 cpp/subprojects/common/src/common/data/view_vector.cpp delete mode 100644 cpp/subprojects/common/src/common/indices/index_iterator.cpp delete mode 100644 cpp/subprojects/common/src/common/indices/index_vector_complete.cpp delete mode 100644 cpp/subprojects/common/src/common/indices/index_vector_partial.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_info_equal.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_info_mixed.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_matrix_c_contiguous.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_matrix_csc.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_matrix_csr.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_matrix_fortran_contiguous.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_type_nominal.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_type_numerical.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_type_ordinal.cpp delete mode 100644 cpp/subprojects/common/src/common/input/feature_vector.cpp delete mode 100644 cpp/subprojects/common/src/common/input/label_matrix_c_contiguous.cpp delete mode 100644 cpp/subprojects/common/src/common/input/label_matrix_csc.cpp delete mode 100644 cpp/subprojects/common/src/common/input/label_matrix_csr.cpp delete mode 100644 cpp/subprojects/common/src/common/input/missing_feature_vector.cpp delete mode 100644 cpp/subprojects/common/src/common/learner.cpp delete mode 100644 cpp/subprojects/common/src/common/model/body_conjunctive.cpp delete mode 100644 cpp/subprojects/common/src/common/model/body_empty.cpp delete mode 100644 cpp/subprojects/common/src/common/model/condition_list.cpp delete mode 100644 cpp/subprojects/common/src/common/model/head_complete.cpp delete mode 100644 cpp/subprojects/common/src/common/model/head_partial.cpp delete mode 100644 cpp/subprojects/common/src/common/model/rule_list.cpp delete mode 100644 cpp/subprojects/common/src/common/multi_threading/multi_threading_manual.cpp delete mode 100644 cpp/subprojects/common/src/common/multi_threading/multi_threading_no.cpp delete mode 100644 cpp/subprojects/common/src/common/post_optimization/model_builder_intermediate.cpp delete mode 100644 cpp/subprojects/common/src/common/post_optimization/post_optimization_phase_list.cpp delete mode 100644 cpp/subprojects/common/src/common/post_optimization/post_optimization_sequential.cpp delete mode 100644 cpp/subprojects/common/src/common/post_optimization/post_optimization_unused_rule_removal.cpp delete mode 100644 cpp/subprojects/common/src/common/post_processing/post_processor_no.cpp delete mode 100644 cpp/subprojects/common/src/common/prediction/label_space_info_no.cpp delete mode 100644 cpp/subprojects/common/src/common/prediction/label_vector_set.cpp delete mode 100644 cpp/subprojects/common/src/common/prediction/prediction_matrix_dense.cpp delete mode 100644 cpp/subprojects/common/src/common/prediction/prediction_matrix_sparse_binary.cpp delete mode 100644 cpp/subprojects/common/src/common/prediction/probability_calibration_isotonic.cpp delete mode 100644 cpp/subprojects/common/src/common/prediction/probability_calibration_no.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_evaluation/score_vector_binned_dense.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_evaluation/score_vector_dense.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_induction/rule_induction_common.hpp delete mode 100644 cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_beam_search.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_common.hpp delete mode 100644 cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_greedy.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_model_assemblage/default_rule.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_pruning/rule_pruning_irep.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_pruning/rule_pruning_no.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_refinement/prediction.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_refinement/prediction_complete.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_refinement/prediction_evaluated.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_refinement/prediction_partial.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_refinement/refinement_comparator_fixed.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_refinement/refinement_comparator_single.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_refinement/rule_refinement_approximate.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_refinement/rule_refinement_exact.cpp delete mode 100644 cpp/subprojects/common/src/common/rule_refinement/score_processor.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/feature_sampling_no.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/feature_sampling_predefined.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/feature_sampling_without_replacement.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/index_sampling.hpp delete mode 100644 cpp/subprojects/common/src/common/sampling/instance_sampling_no.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/instance_sampling_stratified_example_wise.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/instance_sampling_stratified_label_wise.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/instance_sampling_with_replacement.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/instance_sampling_without_replacement.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/label_sampling_no.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/label_sampling_round_robin.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/label_sampling_without_replacement.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/partition_bi.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/partition_sampling_bi_random.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/partition_sampling_bi_stratified_example_wise.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/partition_sampling_bi_stratified_label_wise.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/partition_sampling_no.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/partition_single.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/random.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/stratified_sampling_common.hpp delete mode 100644 cpp/subprojects/common/src/common/sampling/stratified_sampling_example_wise.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/stratified_sampling_label_wise.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/weight_vector_bit.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/weight_vector_dense.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/weight_vector_equal.cpp delete mode 100644 cpp/subprojects/common/src/common/sampling/weight_vector_out_of_sample.cpp delete mode 100644 cpp/subprojects/common/src/common/stopping/aggregation_function_common.hpp delete mode 100644 cpp/subprojects/common/src/common/stopping/global_pruning_common.hpp delete mode 100644 cpp/subprojects/common/src/common/stopping/global_pruning_post.cpp delete mode 100644 cpp/subprojects/common/src/common/stopping/global_pruning_pre.cpp delete mode 100644 cpp/subprojects/common/src/common/stopping/stopping_criterion_list.cpp delete mode 100644 cpp/subprojects/common/src/common/stopping/stopping_criterion_size.cpp delete mode 100644 cpp/subprojects/common/src/common/stopping/stopping_criterion_time.cpp delete mode 100644 cpp/subprojects/common/src/common/thresholds/coverage_mask.cpp delete mode 100644 cpp/subprojects/common/src/common/thresholds/coverage_set.cpp delete mode 100644 cpp/subprojects/common/src/common/thresholds/thresholds_approximate.cpp delete mode 100644 cpp/subprojects/common/src/common/thresholds/thresholds_common.hpp delete mode 100644 cpp/subprojects/common/src/common/thresholds/thresholds_exact.cpp delete mode 100644 doc/Doxyfile_boosting delete mode 100644 doc/Doxyfile_common delete mode 100644 doc/_static/logo.png delete mode 100644 doc/_static/structure.png delete mode 100644 doc/api/codestyle.inc.rst delete mode 100644 doc/api/compilation.inc.rst delete mode 100644 doc/api/cpp.inc.rst delete mode 100644 doc/api/documentation.inc.rst delete mode 100644 doc/api/index.rst delete mode 100644 doc/api/python.inc.rst delete mode 100644 doc/api/structure.inc.rst delete mode 100644 doc/api/testing.inc.rst delete mode 100644 doc/conf.py delete mode 100644 doc/index.rst delete mode 100644 doc/python/boosting/conf.py delete mode 100644 doc/python/common/conf.py delete mode 100644 doc/python/testbed/conf.py delete mode 100644 doc/quickstart/index.rst delete mode 100644 doc/quickstart/installation.inc.rst delete mode 100644 doc/quickstart/parameters.inc.rst delete mode 100644 doc/quickstart/rules.inc.rst delete mode 100644 doc/quickstart/usage.inc.rst delete mode 100644 doc/references/firstparty.inc.rst delete mode 100644 doc/references/index.rst delete mode 100644 doc/references/thirdparty.inc.rst delete mode 100644 doc/requirements.txt delete mode 120000 doc/source/CHANGELOG.md delete mode 120000 doc/source/CONTRIBUTORS.md delete mode 120000 doc/source/LICENSE.md delete mode 100644 doc/testbed/arguments.inc.rst delete mode 100644 doc/testbed/experiments.inc.rst delete mode 100644 doc/testbed/index.rst delete mode 100644 python/meson.build delete mode 100644 python/requirements.txt delete mode 100644 python/subprojects/boosting/meson.build delete mode 100644 python/subprojects/boosting/mlrl/__init__.py delete mode 100644 python/subprojects/boosting/mlrl/boosting/__init__.py delete mode 100644 python/subprojects/boosting/mlrl/boosting/boosting_learners.py delete mode 100644 python/subprojects/boosting/mlrl/boosting/config.py delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/__init__.py delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/head_type.pxd delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/head_type.pyx delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/label_binning.pxd delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/label_binning.pyx delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/learner.pxd delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/learner.pyx delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/learner_boomer.pxd delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/learner_boomer.pyx delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/post_processor.pxd delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/post_processor.pyx delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/prediction.pxd delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/prediction.pyx delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/probability_calibration.pxd delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/probability_calibration.pyx delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/regularization.pxd delete mode 100644 python/subprojects/boosting/mlrl/boosting/cython/regularization.pyx delete mode 100644 python/subprojects/boosting/pyproject.toml delete mode 100644 python/subprojects/boosting/setup.py delete mode 100644 python/subprojects/common/README.md delete mode 100644 python/subprojects/common/meson.build delete mode 100644 python/subprojects/common/mlrl/__init__.py delete mode 100644 python/subprojects/common/mlrl/common/__init__.py delete mode 100644 python/subprojects/common/mlrl/common/arrays.py delete mode 100644 python/subprojects/common/mlrl/common/config.py delete mode 100644 python/subprojects/common/mlrl/common/cython/__init__.py delete mode 100644 python/subprojects/common/mlrl/common/cython/_arrays.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/_types.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/feature_binning.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/feature_binning.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/feature_info.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/feature_info.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/feature_matrix.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/feature_matrix.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/feature_sampling.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/feature_sampling.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/instance_sampling.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/instance_sampling.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/label_matrix.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/label_matrix.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/label_sampling.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/label_sampling.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/label_space_info.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/label_space_info.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/learner.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/learner.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/multi_threading.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/multi_threading.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/partition_sampling.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/partition_sampling.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/post_optimization.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/post_optimization.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/prediction.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/prediction.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/probability_calibration.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/probability_calibration.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/rule_induction.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/rule_induction.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/rule_model.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/rule_model.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/stopping_criterion.pxd delete mode 100644 python/subprojects/common/mlrl/common/cython/stopping_criterion.pyx delete mode 100644 python/subprojects/common/mlrl/common/cython/validation.py delete mode 100644 python/subprojects/common/mlrl/common/data_types.py delete mode 100644 python/subprojects/common/mlrl/common/format.py delete mode 100644 python/subprojects/common/mlrl/common/learners.py delete mode 100644 python/subprojects/common/mlrl/common/options.py delete mode 100644 python/subprojects/common/mlrl/common/rule_learners.py delete mode 100644 python/subprojects/common/pyproject.toml delete mode 100644 python/subprojects/common/setup.py delete mode 100644 python/subprojects/testbed/README.md delete mode 100644 python/subprojects/testbed/mlrl/__init__.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/__init__.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/characteristics.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/data.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/data_characteristics.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/data_splitting.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/evaluation.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/experiments.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/format.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/io.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/label_vectors.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/main_boomer.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/model_characteristics.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/models.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/output_writer.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/parameters.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/persistence.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/prediction_characteristics.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/prediction_scope.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/predictions.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/probability_calibration.py delete mode 100644 python/subprojects/testbed/mlrl/testbed/runnables.py delete mode 100644 python/subprojects/testbed/pyproject.toml delete mode 100644 python/subprojects/testbed/setup.py delete mode 100644 python/subprojects/testbed/tests/res/data/breast-cancer.arff delete mode 100644 python/subprojects/testbed/tests/res/data/breast-cancer.xml delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined.xml delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-1.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-10.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-2.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-3.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-4.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-5.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-6.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-7.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-8.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_fold-9.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_test.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions-predefined_training.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions.arff delete mode 100644 python/subprojects/testbed/tests/res/data/emotions.xml delete mode 100644 python/subprojects/testbed/tests/res/data/enron.arff delete mode 100644 python/subprojects/testbed/tests/res/data/enron.xml delete mode 100644 python/subprojects/testbed/tests/res/data/langlog.arff delete mode 100644 python/subprojects/testbed/tests/res/data/langlog.xml delete mode 100644 python/subprojects/testbed/tests/res/data/meka.arff delete mode 100644 python/subprojects/testbed/tests/res/data/weather.arff delete mode 100644 python/subprojects/testbed/tests/res/data/weather.xml delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-1.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-10.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-2.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-3.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-4.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-5.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-6.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-7.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-8.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_fold-9.csv delete mode 100644 python/subprojects/testbed/tests/res/in/parameters_overall.csv delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/binary-features-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/binary-features-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/data-characteristics_cross-validation.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/data-characteristics_single-fold.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/data-characteristics_train-test.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/evaluation_cross-validation-predefined.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/evaluation_cross-validation.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/evaluation_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/evaluation_no-data-split.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/evaluation_single-fold.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/evaluation_train-test-predefined.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/evaluation_train-test.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/evaluation_training-data.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/example-wise-complete-heads.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/example-wise-complete-heads_equal-width-label-binning.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/example-wise-partial-dynamic-heads.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/example-wise-partial-dynamic-heads_equal-width-label-binning.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/example-wise-partial-fixed-heads.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/example-wise-partial-fixed-heads_equal-width-label-binning.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/example-wise-single-label-heads.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_binary-features-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_nominal-features-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-frequency_numerical-features-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_binary-features-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_nominal-features-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-binning-equal-width_numerical-features-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-sampling-no.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/feature-sampling-without-replacement.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/instance-sampling-no.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/instance-sampling-stratified-example-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/instance-sampling-stratified-label-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/instance-sampling-with-replacement.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/instance-sampling-without-replacement.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-format-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-format-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-sampling-no.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-sampling-round-robin.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-sampling-without-replacement.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-vectors_cross-validation.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-vectors_single-fold.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-vectors_train-test.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-wise-complete-heads.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-wise-complete-heads_equal-width-label-binning.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-wise-partial-dynamic-heads.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-wise-partial-dynamic-heads_equal-width-label-binning.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-wise-partial-fixed-heads.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-wise-partial-fixed-heads_equal-width-label-binning.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/label-wise-single-label-heads.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/loss-logistic-example-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/loss-logistic-label-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/loss-squared-error-example-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/loss-squared-error-label-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/loss-squared-hinge-example-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/loss-squared-hinge-label-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/meka-format.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/model-characteristics_cross-validation.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/model-characteristics_single-fold.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/model-characteristics_train-test.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/model-persistence_cross-validation.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/model-persistence_single-fold.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/model-persistence_train-test.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/no-default-rule.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/nominal-features-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/nominal-features-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/numeric-features-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/numeric-features-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/parameters_cross-validation.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/parameters_single-fold.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/parameters_train-test.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/post-pruning_no-holdout.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/post-pruning_random-holdout.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/post-pruning_stratified-example-wise-holdout.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/post-pruning_stratified-label-wise-holdout.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/pre-pruning_no-holdout.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/pre-pruning_random-holdout.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/pre-pruning_stratified-example-wise-holdout.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/pre-pruning_stratified-label-wise-holdout.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/prediction-characteristics_cross-validation.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/prediction-characteristics_single-fold.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/prediction-characteristics_train-test.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/prediction-characteristics_training-data.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/prediction-format-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/prediction-format-sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictions_cross-validation.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictions_single-fold.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictions_train-test.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictions_training-data.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-example-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-example-wise_based-on-probabilities.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-example-wise_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-example-wise_incremental_based-on-probabilities.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-example-wise_sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-example-wise_sparse_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-gfm.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-gfm_based-on-probabilities.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-gfm_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-gfm_incremental_based-on-probabilities.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-gfm_sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-gfm_sparse_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-label-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-label-wise_based-on-probabilities.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-label-wise_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-label-wise_incremental_based-on-probabilities.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-label-wise_sparse.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-binary-label-wise_sparse_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-probability-label-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-probability-label-wise_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-probability-marginalized.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-probability-marginalized_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-score-label-wise.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/predictor-score-label-wise_incremental.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/pruning-irep.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/pruning-no.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/rule-induction-top-down-beam-search.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/rules_cross-validation.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/rules_single-fold.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/rules_train-test.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/sequential-post-optimization.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/single-label-classification.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/single-label-probabilities.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/single-label-regression.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/statistics-sparse_label-format-dense.txt delete mode 100644 python/subprojects/testbed/tests/res/out/boomer/statistics-sparse_label-format-sparse.txt delete mode 100644 python/subprojects/testbed/tests/test_boosting.py delete mode 100644 python/subprojects/testbed/tests/test_common.py diff --git a/CHANGELOG.md b/CHANGELOG.md deleted file mode 100644 index ad215397..00000000 --- a/CHANGELOG.md +++ /dev/null @@ -1,181 +0,0 @@ -# Release Notes - -## Version 0.9.0 (Jul. 2nd, 2023) - -A major update to the BOOMER algorithm that introduces the following changes. - -***This release comes with several API changes. For an updated overview of the available parameters and command line arguments, please refer to the [documentation](https://mlrl-boomer.readthedocs.io/en/0.9.0/).*** - -### Algorithmic Enhancements - -* **Sparse matrices can now be used to store gradients and Hessians** if supported by the loss function. The desired behavior can be specified via a new parameter `--statistic-format`. -* **Rules with partial heads can now be learned** by setting the parameter `--head-type` to the value `partial-fixed`, if the number of predicted labels should be predefined, or `partial-dynamic`, if the subset of predicted labels should be determined dynamically. -* **A beam search can now be used** for the induction of individual rules by setting the parameter `--rule-induction` to the value `top-down-beam-search`. -* **Variants of the squared error loss and squared hinge loss**, which take all labels of an example into account at the same time, can now be used by setting the parameter `--loss` to the value `squared-error-example-wise` or `squared-hinge-example-wise`. -* **Probability estimates can be obtained for each label independently or via marginalization** over the label vectors encountered in the training data by setting the new parameter `--probability-predictor` to the value `label-wise` or `marginalized`. -* **Predictions that maximize the example-wise F1-measure can now be obtained** by setting the parameter `--classification-predictor` to the value `gfm`. -* **Binary predictions can now be derived from probability estimates** by specifying the new option `based_on_probabilities`. -* **Isotonic regression models can now be used** to calibrate marginal and joint probabilities predicted by a model via the new parameters `--marginal-probability-calibration` and `--joint-probability-calibration`. -* **The rules in a previously learned model can now be post-optimized** by reconstructing each one of them in the context of the other rules via the new parameter `--sequential-post-optimization`. -* **Early stopping or post-pruning can now be used** by setting the new parameter `--global-pruning` to the value `pre-pruning` or `post-pruning`. -* **Single labels can now be sampled in a round-robin fashion** by setting the parameter `--feature-sampling` to the new value `round-robin`. -* **A fixed number of trailing features can now be retained** when the parameter `--feature-sampling` is set to the value `without-replacement` by specifying the option `num_retained`. - -### Additions to the Command Line API - -* **Data sets in the MEKA format are now supported.** -* **Certain characteristics of binary predictions can be printed or written to output files** via the new arguments `--print-prediction-characteristics` and `--store-prediction-characteristics`. -* **Unique label vectors contained in the training data can be printed or written to output files** via the new arguments `--print-label-vectors` and `--store-label-vectors`. -* **Models for the calibration of marginal or joint probabilities can be printed or written to output files** via the new arguments `--print-marginal-probability-calibration-model`, `--store-marginal-probability-calibration-model`, `--print-joint-probability-calibration-model` and `--store-joint-probability-calibration-model`. -* **Models can now be evaluated repeatedly, using a subset of their rules with increasing size,** by specifying the argument `--incremental-prediction`. -* **More control of how data is split into training and test sets** is now provided by the argument `--data-split` that replaces the arguments `--folds` and `--current-fold`. -* **Binary labels, regression scores, or probabilities can now be predicted,** depending on the value of the new argument `--prediction-type`, which can be set to the values `binary`, `scores`, or `probabilities`. -* **Individual evaluation measures can now be enabled or disabled** via additional options that have been added to the arguments `--print-evaluation` and `--store-evaluation`. -* **The presentation of values printed on the console has vastly been improved.** In addition, options for controlling the presentation of values to be printed or written to output files have been added to various command line arguments. - -### Bugfixes - -* The behavior of the parameter `--label-format` has been fixed when set to the value `auto`. -* The behavior of the parameters `--holdout` and `--instance-sampling` has been fixed when set to the value `stratified-label-wise`. -* The behavior of the parameter `--binary-predictor` has been fixed when set to the value `example-wise` and using a model that has been loaded from disk. -* Rules are now guaranteed to not cover more examples than specified via the option `min_coverage`. The option is now also taken into account when using feature binning. Alternatively, the minimum coverage of rules can now also be specified as a fraction via the option `min_support`. - -### API Changes - -* The parameter `--early-stopping` has been replaced with a new parameter `--global-pruning`. -* The parameter `--pruning` has been renamed to `--rule-pruning`. -* The parameter `--classification-predictor` has been renamed to `--binary-predictor`. -* The command line argument `--predict-probabilities` has been replaced with a new argument `--prediction-type`. -* The command line argument `--predicted-label-format` has been renamed to `--prediction-format`. - -### Quality-of-Life Improvements - -* Continuous integration is now used to test the most common functionalites of the BOOMER algorithm and the corresponding command line API. -* Successful generation of the documentation is now tested via continuous integration. -* Style definitions for Python and C++ code are now enforced by applying the tools `clang-format`, `yapf`, and `isort` via continuous integration. - -## Version 0.8.2 (Apr. 11th, 2022) - -A bugfix release that solves the following issues: - -* Fixed prebuilt packages available at [PyPI](https://pypi.org/project/mlrl-boomer/). -* Fixed output of nominal values when using the option `--print-rules true`. - -## Version 0.8.1 (Mar. 4th, 2022) - -A bugfix release that solves the following issues: - -* Missing feature values are now dealt with correctly when using feature binning. -* A rare issue that may cause segmentation faults when using instance sampling has been fixed. - -## Version 0.8.0 (Jan. 31, 2022) - -A major update to the BOOMER algorithm that introduces the following changes. - -***This release comes with changes to the command line API. For an updated overview of the available parameters, please refer to the [documentation](https://mlrl-boomer.readthedocs.io/en/0.8.0/).*** - -* The programmatic C++ API was redesigned for a more convenient configuration of algorithms. This does also drastically reduce the amount of wrapper code that is necessary to access the API from other programming languages and therefore facilitates the support of additional languages in the future. -* An issue that may cause segmentation faults when using stratified sampling methods for the creation of holdout sets has been fixed. -* Pre-built packages for Windows systems are now available at [PyPI](https://pypi.org/project/mlrl-boomer/). -* Pre-built packages for Linux aarch64 systems are now provided. - -## Version 0.7.1 (Dec. 15, 2021) - -A bugfix release that solves the following issues: - -* Fixes an issue preventing the use of dense representations of ground truth label matrices that was introduced in version 0.7.0. -* Pre-built packages for MacOS systems are now available at [PyPI](https://pypi.org/project/mlrl-boomer/). -* Linux and MacOS packages for Python 3.10 are now provided. - -## Version 0.7.0 (Dec. 5, 2021) - -A major update to the BOOMER algorithm that introduces the following changes: - -* L1 regularization can now be used. -* A more space-efficient data structure is now used for the sparse representation of binary predictions. -* The Python API does now allow to access the rules in a model in a programmatic way. -* It is now possible to output certain characteristics of training datasets and rule models. -* Pre-built packages for the Linux platform are now available at [PyPI](https://pypi.org/project/mlrl-boomer/). -* The [documentation](https://mlrl-boomer.readthedocs.io) has vastly been improved. - -## Version 0.6.2 (Oct 4, 2021) - -A bugfix release that solves the following issues: - -* Fixes a segmentation fault when a sparse feature matrix should be used for prediction that was introduced in version 0.6.0. - -## Version 0.6.1 (Sep 30, 2021) - -A bugfix release that solves the following issues: - -* Fixes a mathematical problem when calculating the quality of potential single-label rules that was introduced in version 0.6.0. - -## Version 0.6.0 (Sep 6, 2021) - -A major update to the BOOMER algorithm that introduces the following changes. - -***This release comes with changes to the command line API. For brevity and consistency, some parameters and/or their values have been renamed. Moreover, some parameters have been updated to use more reasonable default values. For an updated overview of the available parameters, please refer to the [documentation](https://mlrl-boomer.readthedocs.io/en/0.6.0/).*** - -* The parameter `--instance-sampling` does now allow to use stratified sampling (`stratified-label-wise` and `stratified-example-wise`). -* The parameter `--holdout` does now allow to use stratified sampling (`stratified-label-wise` and `stratified-example-wise`). -* The parameter `--recalculate-predictions` does now allow to specify whether the predictions of rules should be recalculated on the entire training data, if instance sampling is used. -* An additional parameter (`--prediction-format`) that allows to specify whether predictions should be stored using dense or sparse matrices has been added. -* The code for the construction of rule heads has been reworked, resulting in minor performance improvements. -* The unnecessary calculation of Hessians is now avoided when used single-label rules for the minimization of a non-decomposable loss function, resulting in a significant performance improvement. -* A programmatic C++ API for configuring algorithms, including the validation of parameters, is now provided. -* A documentation is now available [online](https://mlrl-boomer.readthedocs.io). - -## Version 0.5.0 (Jun 27, 2021) - -A major update to the BOOMER algorithm that introduces the following changes: - -* Gradient-based label binning (GBLB) can be used to assign labels to a predefined number of bins. - -## Version 0.4.0 (Mar 31, 2021) - -A major update to the BOOMER algorithm that introduces the following changes: - -* Large parts of the code have been refactored, and the core algorithm has been migrated to C++ entirely. It is now built and compiled using Meson and Ninja, which results in drastically reduced compile times. -* The (label- and example-wise) logistic loss functions have been rewritten to better prevent numerical problems. -* Approximate methods for evaluating potential conditions of rules, based on unsupervised binning methods (currently equal-width- and equal-frequency-binning), have been added. -* The parameter `--predictor` does now allow using different algorithms for prediction (`label-wise` or `example-wise`). -* An early stopping mechanism has been added, which allows to stop the induction of rules as soon as the quality of the model does not improve on a holdout set. -* Multi-threading can be used to parallelize the prediction for different examples across multiple CPU cores. -* Multi-threading can be used to parallelize the calculation of gradients and Hessians for different examples across multiple CPU cores. -* Probability estimates can be predicted when using the loss function `label-wise-logistic-loss`. -* The algorithm does now support data sets with missing feature values. -* The loss function `label-wise-squared-hinge-loss` has been added. -* Experiments using single-label data sets are now supported out of the box. - -## Version 0.3.0 (Sep 14, 2020) - -A major update to the BOOMER algorithm that features the following changes: - -* Large parts of the code (loss functions, calculation of gradients/Hessians, calculation of predictions/quality scores) have been refactored and rewritten in C++. This comes with a constant speed-up of training times. -* Multi-threading can be used to parallelize the evaluation of a rule's potential refinements across multiple CPU cores. -* Sparse ground truth label matrices can now be used for training, which may reduce the memory footprint in case of large data sets. -* Additional parameters (`feature-format` and `label-format`) that allow to specify the preferred format of the feature and label matrix have been added. - -## Version 0.2.0 (Jun 28, 2020) - -A major update to the BOOMER algorithm that features the following changes: - -* Includes many refactorings and quality of live improvements. Code that is not directly related with the algorithm, such as the implementation of baselines, has been removed. -* The algorithm is now able to natively handle nominal features without the need for pre-processing techniques such as one-hot encoding. -* Sparse feature matrices can now be used for training and prediction, which reduces the memory footprint and results in a significant speed-up of training times on some data sets. -* Additional hyper-parameters (`min_coverage`, `max_conditions` and `max_head_refinements`) that provide fine-grained control over the specificity/generality of rules have been added. - -## Version 0.1.0 (Jun 22, 2020) - -The first version of the BOOMER algorithm used in the following publication: - -*Michael Rapp, Eneldo Loza Mencía, Johannes Fürnkranz and Eyke Hüllermeier. Gradient-based Label Binning in Multi-label Classification. In: Proceedings of the European Conference on Machine Learning and Knowledge Discovery in Databases (ECML-PKDD), 2021, Springer.* - -This version supports the following features to learn an ensemble of boosted classification rules: - -* Different label-wise or example-wise loss functions can be minimized during training (optionally using L2 regularization). -* The rules may predict for a single label, or for all labels (which enables to model local label dependencies). -* When learning a new rule, random samples of the training examples, features or labels may be used, including different techniques such as sampling with or without replacement. -* The impact of individual rules on the ensemble can be controlled using shrinkage. -* The conditions of a recently induced rule can be pruned based on a hold-out set. -* The algorithm currently only supports numerical or ordinal features. Nominal features can be handled by using one-hot encoding. diff --git a/CODE_OF_CONDUCT.md b/CODE_OF_CONDUCT.md deleted file mode 100644 index 569c6368..00000000 --- a/CODE_OF_CONDUCT.md +++ /dev/null @@ -1,128 +0,0 @@ -# Contributor Covenant Code of Conduct - -## Our Pledge - -We as members, contributors, and leaders pledge to make participation in our -community a harassment-free experience for everyone, regardless of age, body -size, visible or invisible disability, ethnicity, sex characteristics, gender -identity and expression, level of experience, education, socio-economic status, -nationality, personal appearance, race, religion, or sexual identity -and orientation. - -We pledge to act and interact in ways that contribute to an open, welcoming, -diverse, inclusive, and healthy community. - -## Our Standards - -Examples of behavior that contributes to a positive environment for our -community include: - -* Demonstrating empathy and kindness toward other people -* Being respectful of differing opinions, viewpoints, and experiences -* Giving and gracefully accepting constructive feedback -* Accepting responsibility and apologizing to those affected by our mistakes, - and learning from the experience -* Focusing on what is best not just for us as individuals, but for the - overall community - -Examples of unacceptable behavior include: - -* The use of sexualized language or imagery, and sexual attention or - advances of any kind -* Trolling, insulting or derogatory comments, and personal or political attacks -* Public or private harassment -* Publishing others' private information, such as a physical or email - address, without their explicit permission -* Other conduct which could reasonably be considered inappropriate in a - professional setting - -## Enforcement Responsibilities - -Community leaders are responsible for clarifying and enforcing our standards of -acceptable behavior and will take appropriate and fair corrective action in -response to any behavior that they deem inappropriate, threatening, offensive, -or harmful. - -Community leaders have the right and responsibility to remove, edit, or reject -comments, commits, code, wiki edits, issues, and other contributions that are -not aligned to this Code of Conduct, and will communicate reasons for moderation -decisions when appropriate. - -## Scope - -This Code of Conduct applies within all community spaces, and also applies when -an individual is officially representing the community in public spaces. -Examples of representing our community include using an official e-mail address, -posting via an official social media account, or acting as an appointed -representative at an online or offline event. - -## Enforcement - -Instances of abusive, harassing, or otherwise unacceptable behavior may be -reported to the community leaders responsible for enforcement at -michael.rapp.ml@gmail.com. -All complaints will be reviewed and investigated promptly and fairly. - -All community leaders are obligated to respect the privacy and security of the -reporter of any incident. - -## Enforcement Guidelines - -Community leaders will follow these Community Impact Guidelines in determining -the consequences for any action they deem in violation of this Code of Conduct: - -### 1. Correction - -**Community Impact**: Use of inappropriate language or other behavior deemed -unprofessional or unwelcome in the community. - -**Consequence**: A private, written warning from community leaders, providing -clarity around the nature of the violation and an explanation of why the -behavior was inappropriate. A public apology may be requested. - -### 2. Warning - -**Community Impact**: A violation through a single incident or series -of actions. - -**Consequence**: A warning with consequences for continued behavior. No -interaction with the people involved, including unsolicited interaction with -those enforcing the Code of Conduct, for a specified period of time. This -includes avoiding interactions in community spaces as well as external channels -like social media. Violating these terms may lead to a temporary or -permanent ban. - -### 3. Temporary Ban - -**Community Impact**: A serious violation of community standards, including -sustained inappropriate behavior. - -**Consequence**: A temporary ban from any sort of interaction or public -communication with the community for a specified period of time. No public or -private interaction with the people involved, including unsolicited interaction -with those enforcing the Code of Conduct, is allowed during this period. -Violating these terms may lead to a permanent ban. - -### 4. Permanent Ban - -**Community Impact**: Demonstrating a pattern of violation of community -standards, including sustained inappropriate behavior, harassment of an -individual, or aggression toward or disparagement of classes of individuals. - -**Consequence**: A permanent ban from any sort of public interaction within -the community. - -## Attribution - -This Code of Conduct is adapted from the [Contributor Covenant][homepage], -version 2.0, available at -https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. - -Community Impact Guidelines were inspired by [Mozilla's code of conduct -enforcement ladder](https://github.com/mozilla/diversity). - -[homepage]: https://www.contributor-covenant.org - -For answers to common questions about this code of conduct, see the FAQ at -https://www.contributor-covenant.org/faq. Translations are available at -https://www.contributor-covenant.org/translations. diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md deleted file mode 100644 index 658c86c6..00000000 --- a/CONTRIBUTORS.md +++ /dev/null @@ -1,22 +0,0 @@ -# Contributors - -BOOMER is open source software. Everyone is welcomed to contribute to the project to enhance its functionality and make it more accessible to a broader audience. - -## List of Contributors - -We highly appreciate the efforts of the following persons (listed in alphabetical order), who have actively contributed code to the project: - -* [Andreas Seidl Fernandez](https://github.com/AndreasSeidl) -* [Anna Kulischkin](https://github.com/Anna-inf) -* [Carsten Ostlender](https://github.com/CarstenOstlender) -* [Dennis Drössler](https://github.com/ddroessler) -* [Eneldo Loza Mencía](https://github.com/keelm) -* [Jakob Steeg](https://github.com/JayJayJay1) -* [Kevin Kampa](https://github.com/bapfelbaum) -* [Lukas Johannes Eberle](https://github.com/LukasEberle) -* [Michael Rapp](https://github.com/michael-rapp) -* [Paul Trojahn](https://github.com/ptrojahn) - -## Special Thanks - -We would also like to thank Johannes Fürnkranz and Eyke Hüllermeier for making the project possible in the first place and sharing their extensive knowledge. diff --git a/LICENSE.md b/LICENSE.md deleted file mode 100644 index f2571bd7..00000000 --- a/LICENSE.md +++ /dev/null @@ -1,9 +0,0 @@ -# MIT License - -**Copyright (c) 2020 - 2023 Michael Rapp** - -Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Makefile b/Makefile deleted file mode 100644 index 7ce0bdf8..00000000 --- a/Makefile +++ /dev/null @@ -1,242 +0,0 @@ -default_target: install -.PHONY: clean_venv clean_cpp clean_cython clean_compile clean_cpp_install clean_cython_install clean_wheel \ - clean_install clean_doc clean test_format_cpp test_format_python test_format format_cpp format_python format \ - compile_cpp compile_cython compile install_cpp install_cython wheel install apidoc_cpp apidoc_python doc - -UNAME = $(if $(filter Windows_NT,${OS}),Windows,$(shell uname)) -IS_WIN = $(filter Windows,${UNAME}) - -VENV_DIR = venv -CPP_SRC_DIR = cpp -CPP_BUILD_DIR = ${CPP_SRC_DIR}/build -CPP_PACKAGE_DIR = ${CPP_SRC_DIR}/subprojects -PYTHON_SRC_DIR = python -PYTHON_BUILD_DIR = ${PYTHON_SRC_DIR}/build -PYTHON_PACKAGE_DIR = ${PYTHON_SRC_DIR}/subprojects -DIST_DIR = dist -DOC_DIR = doc -DOC_API_DIR = ${DOC_DIR}/apidoc -DOC_TMP_DIR = ${DOC_DIR}/python -DOC_BUILD_DIR = ${DOC_DIR}/_build - -PS = powershell -Command -PYTHON = $(if ${IS_WIN},python,python3) -VENV_CREATE = ${PYTHON} -m venv ${VENV_DIR} -VENV_ACTIVATE = $(if ${IS_WIN},${PS} "${VENV_DIR}/Scripts/activate.bat;",. ${VENV_DIR}/bin/activate) -VENV_DEACTIVATE = $(if ${IS_WIN},${PS} "${VENV_DIR}/Scripts/deactivate.bat;",deactivate) -PIP_INSTALL = python -m pip install --prefer-binary -ISORT = isort --settings-path . --virtual-env ${VENV_DIR} --skip-gitignore -ISORT_DRYRUN = ${ISORT} --check -ISORT_INPLACE = ${ISORT} --overwrite-in-place -YAPF = yapf -r -p --style=.style.yapf --verbose -YAPF_DRYRUN = ${YAPF} --diff -YAPF_INPLACE = ${YAPF} -i -CLANG_FORMAT = clang-format --style=file --verbose -CLANG_FORMAT_DRYRUN = ${CLANG_FORMAT} -n --Werror -CLANG_FORMAT_INPLACE = ${CLANG_FORMAT} -i -MESON_SETUP = meson setup -MESON_COMPILE = meson compile -MESON_INSTALL = meson install -WHEEL_BUILD = python -m build --wheel -WHEEL_INSTALL = python -m pip install --force-reinstall --no-deps -PYTHON_UNITTEST = python -m unittest discover -v -f -s -DOXYGEN = $(if ${IS_WIN},for /f %%i in (./../VERSION) do set PROJECT_NUMBER=%%i && doxygen,PROJECT_NUMBER=${file < VERSION} doxygen) -SPHINX_APIDOC = sphinx-apidoc --tocfile index -f -SPHINX_BUILD = sphinx-build -M html - -define delete_dir - $(if ${IS_WIN},\ - ${PS} "if (Test-Path ${1}) {rm ${1} -Recurse -Force}",\ - rm -rf ${1}) -endef - -define delete_files_recursively - $(if ${IS_WIN},\ - ${PS} "rm ${1} -Recurse -Force -Include ${2}",\ - rm -f ${1}/**/${2}) -endef - -define delete_dirs_recursively - $(if ${IS_WIN},\ - ${PS} "rm ${1} -Recurse -Force -Include ${2}",\ - rm -rf ${1}/**/${2}) -endef - -define install_wheels - $(if ${IS_WIN},\ - ${PS} "${WHEEL_INSTALL} (Get-ChildItem -Path ${1} | Where Name -Match '\.whl' | Select-Object -ExpandProperty FullName);",\ - ${WHEEL_INSTALL} ${1}/*.whl) -endef - -define create_dir - $(if ${IS_WIN},\ - ${PS} "New-Item -Path ${1} -ItemType "directory" -Force",\ - mkdir -p ${1}) -endef - -define clang_format_dryrun_recursively - $(if ${IS_WIN},\ - (${PS} "Get-ChildItem -Path ${1} -Recurse | Where Name -Match '\.hpp|\.cpp' | Select-Object -ExpandProperty FullName | Out-File .cpp_files.tmp -Encoding utf8";\ - ${CLANG_FORMAT_DRYRUN} --files=.cpp_files.tmp;\ - ${PS} "rm .cpp_files.tmp -Force"),\ - find ${1} -type f \( -iname "*.hpp" -o -iname "*.cpp" \) -exec ${CLANG_FORMAT_DRYRUN} {} +) -endef - -define clang_format_inplace_recursively - $(if ${IS_WIN},\ - (${PS} "Get-ChildItem -Path ${1} -Recurse | Where Name -Match '\.hpp|\.cpp' | Select-Object -ExpandProperty FullName | Out-File .cpp_files.tmp -Encoding utf8";\ - ${CLANG_FORMAT_INPLACE} --files=.cpp_files.tmp;\ - ${PS} "rm .cpp_files.tmp -Force"),\ - find ${1} -type f \( -iname "*.hpp" -o -iname "*.cpp" \) -exec ${CLANG_FORMAT_INPLACE} {} +) -endef - -clean_venv: - @echo Removing virtual Python environment... - $(call delete_dir,${VENV_DIR}) - -clean_cpp: - @echo Removing C++ compilation files... - $(call delete_dir,${CPP_BUILD_DIR}) - -clean_cython: - @echo Removing Cython compilation files... - $(call delete_dir,${PYTHON_BUILD_DIR}) - -clean_compile: clean_cpp clean_cython - -clean_install: - @echo Removing shared libraries and extension modules from source tree... - $(call delete_files_recursively,${PYTHON_PACKAGE_DIR},*.so*) - $(call delete_files_recursively,${PYTHON_PACKAGE_DIR},*.dylib) - $(call delete_files_recursively,${PYTHON_PACKAGE_DIR},*.dll) - $(call delete_files_recursively,${PYTHON_PACKAGE_DIR},*.lib) - $(call delete_files_recursively,${PYTHON_PACKAGE_DIR},*.pyd) - -clean_wheel: - @echo Removing Python build files... - $(call delete_dirs_recursively,${PYTHON_PACKAGE_DIR},build) - $(call delete_dirs_recursively,${PYTHON_PACKAGE_DIR},${DIST_DIR}) - $(call delete_dirs_recursively,${PYTHON_PACKAGE_DIR},*.egg-info) - -clean_doc: - @echo Removing documentation... - $(call delete_dir,${DOC_BUILD_DIR}) - $(call delete_dir,${DOC_API_DIR}) - $(call delete_files_recursively,${DOC_TMP_DIR},*.rst) - -clean: clean_doc clean_wheel clean_compile clean_install clean_venv - -venv: - @echo Creating virtual Python environment... - ${VENV_CREATE} - ${VENV_ACTIVATE} \ - && ${PIP_INSTALL} -r ${PYTHON_SRC_DIR}/requirements.txt \ - && ${VENV_DEACTIVATE} - -test_format_python: venv - @echo Checking Python code style... - ${VENV_ACTIVATE} \ - && ${ISORT_DRYRUN} ${PYTHON_PACKAGE_DIR} \ - && ${YAPF_DRYRUN} ${PYTHON_PACKAGE_DIR} \ - && ${VENV_DEACTIVATE} - -test_format_cpp: venv - @echo Checking C++ code style... - ${VENV_ACTIVATE} \ - && $(call clang_format_dryrun_recursively,${CPP_PACKAGE_DIR}) \ - && ${VENV_DEACTIVATE} - -test_format: test_format_python test_format_cpp - -format_python: venv - @echo Formatting Python code... - ${VENV_ACTIVATE} \ - && ${ISORT_INPLACE} ${PYTHON_PACKAGE_DIR} \ - && ${YAPF_INPLACE} ${PYTHON_PACKAGE_DIR} \ - && ${VENV_DEACTIVATE} - -format_cpp: venv - @echo Formatting C++ code... - ${VENV_ACTIVATE} \ - && $(call clang_format_inplace_recursively,${CPP_PACKAGE_DIR}) \ - && ${VENV_DEACTIVATE} - -format: format_python format_cpp - -compile_cpp: venv - @echo Compiling C++ code... - ${VENV_ACTIVATE} \ - && ${MESON_SETUP} ${CPP_BUILD_DIR} ${CPP_SRC_DIR} \ - && ${MESON_COMPILE} -C ${CPP_BUILD_DIR} \ - && ${VENV_DEACTIVATE} - -compile_cython: venv - @echo Compiling Cython code... - ${VENV_ACTIVATE} \ - && ${MESON_SETUP} ${PYTHON_BUILD_DIR} ${PYTHON_SRC_DIR} \ - && ${MESON_COMPILE} -C ${PYTHON_BUILD_DIR} \ - && ${VENV_DEACTIVATE} - -compile: compile_cpp compile_cython - -install_cpp: compile_cpp - @echo Installing shared libraries into source tree... - ${VENV_ACTIVATE} \ - && ${MESON_INSTALL} -C ${CPP_BUILD_DIR} \ - && ${VENV_DEACTIVATE} - -install_cython: compile_cython - @echo Installing extension modules into source tree... - ${VENV_ACTIVATE} \ - && ${MESON_INSTALL} -C ${PYTHON_BUILD_DIR} \ - && ${VENV_DEACTIVATE} - -wheel: install_cpp install_cython - @echo Building wheel packages... - ${VENV_ACTIVATE} \ - && ${WHEEL_BUILD} ${PYTHON_PACKAGE_DIR}/common \ - && ${WHEEL_BUILD} ${PYTHON_PACKAGE_DIR}/boosting \ - && ${WHEEL_BUILD} ${PYTHON_PACKAGE_DIR}/testbed \ - && ${VENV_DEACTIVATE} - -install: wheel - @echo Installing wheel packages into virtual environment... - ${VENV_ACTIVATE} \ - && $(call install_wheels,${PYTHON_PACKAGE_DIR}/common/${DIST_DIR}) \ - && $(call install_wheels,${PYTHON_PACKAGE_DIR}/boosting/${DIST_DIR}) \ - && $(call install_wheels,${PYTHON_PACKAGE_DIR}/testbed/${DIST_DIR}) \ - && ${VENV_DEACTIVATE} - -tests: install - @echo Running integration tests... - ${VENV_ACTIVATE} \ - && ${PYTHON_UNITTEST} ${PYTHON_PACKAGE_DIR}/testbed/tests \ - && ${VENV_DEACTIVATE} - -apidoc_cpp: - @echo Generating C++ API documentation via Doxygen... - $(call create_dir,${DOC_API_DIR}/api/cpp/common) - cd ${DOC_DIR} && ${DOXYGEN} Doxyfile_common - $(call create_dir,${DOC_API_DIR}/api/cpp/boosting) - cd ${DOC_DIR} && ${DOXYGEN} Doxyfile_boosting - -apidoc_python: install - @echo Installing documentation dependencies into virtual environment... - ${VENV_ACTIVATE} \ - && ${PIP_INSTALL} -r ${DOC_DIR}/requirements.txt \ - && ${VENV_DEACTIVATE} - @echo Generating Python API documentation via Sphinx-Apidoc... - ${VENV_ACTIVATE} \ - && ${SPHINX_APIDOC} -o ${DOC_TMP_DIR}/common ${PYTHON_PACKAGE_DIR}/common/mlrl **/cython \ - && ${SPHINX_BUILD} ${DOC_TMP_DIR}/common ${DOC_API_DIR}/api/python/common \ - && ${SPHINX_APIDOC} -o ${DOC_TMP_DIR}/boosting ${PYTHON_PACKAGE_DIR}/boosting/mlrl **/cython \ - && ${SPHINX_BUILD} ${DOC_TMP_DIR}/boosting ${DOC_API_DIR}/api/python/boosting \ - && ${SPHINX_APIDOC} -o ${DOC_TMP_DIR}/testbed ${PYTHON_PACKAGE_DIR}/testbed/mlrl \ - && ${SPHINX_BUILD} ${DOC_TMP_DIR}/testbed ${DOC_API_DIR}/api/python/testbed \ - && ${VENV_DEACTIVATE} - -doc: apidoc_cpp apidoc_python - @echo Generating Sphinx documentation... - ${VENV_ACTIVATE} \ - && ${SPHINX_BUILD} ${DOC_DIR} ${DOC_BUILD_DIR} \ - && ${VENV_DEACTIVATE} diff --git a/VERSION b/VERSION deleted file mode 100644 index 899f24fc..00000000 --- a/VERSION +++ /dev/null @@ -1 +0,0 @@ -0.9.0 \ No newline at end of file diff --git a/assets/project_structure.svg b/assets/project_structure.svg deleted file mode 100644 index da6812e8..00000000 --- a/assets/project_structure.svg +++ /dev/null @@ -1,230 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - libmlrlboosting - - ... - C++ - - - mlrl-common - - mlrl-boomer - mlrl-testbed - Python - - Cython - ... - - diff --git a/cpp/meson.build b/cpp/meson.build deleted file mode 100644 index 89788128..00000000 --- a/cpp/meson.build +++ /dev/null @@ -1,2 +0,0 @@ -project('mlrl', default_options : ['cpp_std=c++14', 'buildtype=release', 'werror=true']) -subproject('boosting') diff --git a/cpp/subprojects/boosting/include/boosting/binning/feature_binning_auto.hpp b/cpp/subprojects/boosting/include/boosting/binning/feature_binning_auto.hpp deleted file mode 100644 index 00135c31..00000000 --- a/cpp/subprojects/boosting/include/boosting/binning/feature_binning_auto.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/feature_binning.hpp" -#include "common/multi_threading/multi_threading.hpp" - -namespace boosting { - - /** - * Allows to configure a method that automatically decides whether feature binning should be used or not. - */ - class AutomaticFeatureBinningConfig final : public IFeatureBinningConfig { - private: - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - AutomaticFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr); - - /** - * @see `IFeatureBinningConfig::createThresholdsFactory` - */ - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/binning/label_binning.hpp b/cpp/subprojects/boosting/include/boosting/binning/label_binning.hpp deleted file mode 100644 index 354f1e5d..00000000 --- a/cpp/subprojects/boosting/include/boosting/binning/label_binning.hpp +++ /dev/null @@ -1,222 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp" - -#include -#include - -namespace boosting { - - /** - * Stores information about a vector that provides access to the statistics for individual labels. This includes the - * number of positive and negative bins, the labels should be assigned to, as well as the minimum and maximum - * statistic in the vector. - */ - struct LabelInfo final { - public: - - /** - * The number of positive bins. - */ - uint32 numPositiveBins; - - /** - * The minimum among all statistics that belong to the positive bins. - */ - float64 minPositive; - - /** - * The maximum among all statistics that belong to the positive bins. - */ - float64 maxPositive; - - /** - * The number of negative bins. - */ - uint32 numNegativeBins; - - /** - * The minimum among all statistics that belong to the negative bins. - */ - float64 minNegative; - - /** - * The maximum among all statistics that belong to the negative bins. - */ - float64 maxNegative; - }; - - /** - * Defines an interface for methods that assign labels to bins, based on the corresponding gradients and Hessians. - */ - class ILabelBinning { - public: - - virtual ~ILabelBinning() {}; - - /** - * A callback function that is invoked when a label is assigned to a bin. It takes the index of the bin and - * the index of the label as arguments. - */ - typedef std::function Callback; - - /** - * A callback function that is invoked when a label with zero statistics is encountered. It takes the index - * of the label as an argument. - */ - typedef std::function ZeroCallback; - - /** - * Returns an upper bound for the number of bins used by the binning method, given a specific number of - * labels for which rules may predict. - * - * @param numLabels The number of labels for which rules may predict - * @return The maximum number of bins used by the binning method - */ - virtual uint32 getMaxBins(uint32 numLabels) const = 0; - - /** - * Retrieves and returns information that is required to apply the binning method. - * - * This function must be called prior to the function `createBins` to obtain information, e.g. the number of - * bins to be used, that is required to apply the binning method. This function may also be used to prepare, - * e.g. sort, the given vector. The `LabelInfo` returned by this function must be passed to the function - * `createBins` later on. - * - * @param criteria An array of type `float64` that stores the label-wise criteria that - * should be used to assign individual labels to bins - * @param numElements The number of elements in the array `criteria` - * @return A struct of type `LabelInfo` that stores the information - */ - virtual LabelInfo getLabelInfo(const float64* criteria, uint32 numElements) const = 0; - - /** - * Assigns the labels to bins based on label-wise criteria. - * - * @param labelInfo A struct of type `LabelInfo` that stores information that is required to - * apply the binning method - * @param criteria An array of type `float64` that stores the label-wise criteria that - * should be used to assign individual labels to bins - * @param numElements The number of elements in the array `criteria` - * @param callback A callback that is invoked when a label is assigned to a bin - * @param zeroCallback A callback that is invoked when a label for which the criterion is zero - * is encountered - */ - virtual void createBins(LabelInfo labelInfo, const float64* criteria, uint32 numElements, Callback callback, - ZeroCallback zeroCallback) const = 0; - }; - - /** - * Defines an interface for all factories that allows to create instances of the type `ILabelBinning`. - */ - class ILabelBinningFactory { - public: - - virtual ~ILabelBinningFactory() {}; - - /** - * Creates and returns a new object of type `ILabelBinning`. - * - * @return An unique pointer to an object of type `ILabelBinning` that has been created - */ - virtual std::unique_ptr create() const = 0; - }; - - /** - * Defines an interface for all classes that allow to configure a method that assigns labels to bins. - */ - class ILabelBinningConfig { - public: - - virtual ~ILabelBinningConfig() {}; - - /** - * Creates and returns a new object of type `ILabelWiseRuleEvaluationFactory` that allows to calculate the - * predictions of complete rules according to the specified configuration. - * - * @return An unique pointer to an object of type `ILabelWiseRuleEvaluationFactory` that has been created - */ - virtual std::unique_ptr createLabelWiseCompleteRuleEvaluationFactory() - const = 0; - - /** - * Creates and returns a new object of type `ISparseLabelWiseRuleEvaluationFactory` that allows to calculate - * the prediction of partial rules, which predict for a predefined number of labels, according to the - * specified configuration. - * - * @param labelRatio A percentage that specifies for how many labels the rule heads should predict - * @param minLabels The minimum number of labels for which the rule heads should predict - * @param maxLabels The maximum number of labels for which the rule heads should predict - * @return An unique pointer to an object of type `ISparseLabelWiseRuleEvaluationFactory` that - * has been created - */ - virtual std::unique_ptr - createLabelWiseFixedPartialRuleEvaluationFactory(float32 labelRatio, uint32 minLabels, - uint32 maxLabels) const = 0; - - /** - * Creates and returns a new object of type `ISparseLabelWiseRuleEvaluationFactory` that allows to calculate - * the prediction of partial rules, which predict for a subset of the available labels that is determined - * dynamically, according to the specified configuration. - * - * @param threshold A threshold that affects for how many labels the rule heads should predict - * @param exponent An exponent that is used to weigh the estimated predictive quality for individual labels - * @return An unique pointer to an object of type `ISparseLabelWiseRuleEvaluationFactory` that has - * been created - */ - virtual std::unique_ptr - createLabelWiseDynamicPartialRuleEvaluationFactory(float32 threshold, float32 exponent) const = 0; - - /** - * Creates and returns a new object of type `IExampleWiseRuleEvaluationFactory` that allows to calculate the - * predictions of complete rules according to the specified configuration. - * - * @param blas A reference to an object of type `Blas` that allows to execute BLAS routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK routines - * @return An unique pointer to an object of type `IExampleWiseRuleEvaluationFactory` that has been - * created - */ - virtual std::unique_ptr createExampleWiseCompleteRuleEvaluationFactory( - const Blas& blas, const Lapack& lapack) const = 0; - - /** - * Creates and returns a new object of type `IExampleWiseRuleEvaluationFactory` that allows to calculate the - * predictions of partial rules, which predict for a predefined number of labels, according to the specified - * configuration. - * - * @param labelRatio A percentage that specifies for how many labels the rule heads should predict - * @param minLabels The minimum number of labels for which the rule heads should predict - * @param maxLabels The maximum number of labels for which the rule heads should predict - * @param blas A reference to an object of type `Blas` that allows to execute BLAS routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK routines - * @return An unique pointer to an object of type `IExampleWiseRuleEvaluationFactory` that has - * been created - */ - virtual std::unique_ptr - createExampleWiseFixedPartialRuleEvaluationFactory(float32 labelRatio, uint32 minLabels, uint32 maxLabels, - const Blas& blas, const Lapack& lapack) const = 0; - - /** - * Creates and returns a new object of type `IExampleWiseRuleEvaluationFactory` that allows to calculate the - * predictions of partial rules, which predict for a subset of the available labels that is determined - * dynamically, according to the specified configuration. - * - * @param threshold A threshold that affects for how many labels the rule heads should predict - * @param exponent An exponent that is used to weigh the estimated predictive quality for individual labels - * @param blas A reference to an object of type `Blas` that allows to execute BLAS routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK routines - * @return An unique pointer to an object of type `IExampleWiseRuleEvaluationFactory` that has been - * created - */ - virtual std::unique_ptr - createExampleWiseDynamicPartialRuleEvaluationFactory(float32 threshold, float32 exponent, - const Blas& blas, const Lapack& lapack) const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/binning/label_binning_auto.hpp b/cpp/subprojects/boosting/include/boosting/binning/label_binning_auto.hpp deleted file mode 100644 index 9369b319..00000000 --- a/cpp/subprojects/boosting/include/boosting/binning/label_binning_auto.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/rule_evaluation/regularization.hpp" - -namespace boosting { - - /** - * Allows to configure a method that automatically decides whether label binning should be used or not. - */ - class AutomaticLabelBinningConfig final : public ILabelBinningConfig { - private: - - const std::unique_ptr& l1RegularizationConfigPtr_; - - const std::unique_ptr& l2RegularizationConfigPtr_; - - public: - - /** - * @param l1RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L1 - * regularization - * @param l2RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L2 - * regularization - */ - AutomaticLabelBinningConfig(const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr); - - std::unique_ptr createLabelWiseCompleteRuleEvaluationFactory() - const override; - - std::unique_ptr createLabelWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels) const override; - - std::unique_ptr createLabelWiseDynamicPartialRuleEvaluationFactory( - float32 threshold, float32 exponent) const override; - - std::unique_ptr createExampleWiseCompleteRuleEvaluationFactory( - const Blas& blas, const Lapack& lapack) const override; - - std::unique_ptr createExampleWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, const Blas& blas, - const Lapack& lapack) const override; - - std::unique_ptr createExampleWiseDynamicPartialRuleEvaluationFactory( - float32 threshold, float32 exponent, const Blas& blas, const Lapack& lapack) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/binning/label_binning_equal_width.hpp b/cpp/subprojects/boosting/include/boosting/binning/label_binning_equal_width.hpp deleted file mode 100644 index 5e31cb95..00000000 --- a/cpp/subprojects/boosting/include/boosting/binning/label_binning_equal_width.hpp +++ /dev/null @@ -1,134 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/macros.hpp" -#include "boosting/rule_evaluation/regularization.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a method that assigns labels to bins in a way such - * that each bin contains labels for which the predicted score is expected to belong to the same value range. - */ - class MLRLBOOSTING_API IEqualWidthLabelBinningConfig { - public: - - virtual ~IEqualWidthLabelBinningConfig() {}; - - /** - * Returns the percentage that specifies how many bins are used. - * - * @return The percentage that specifies how many bins are used - */ - virtual float32 getBinRatio() const = 0; - - /** - * Sets the percentage that specifies how many should be used. - * - * @param binRatio A percentage that specifies how many bins should be used, e.g., if 100 labels are - * available, a percentage of 0.5 means that `ceil(0.5 * 100) = 50` bins should be used. - * Must be in (0, 1) - * @return A reference to an object of type `EqualWidthLabelBinningConfig` that allows further - * configuration of the method that assigns labels to bins - */ - virtual IEqualWidthLabelBinningConfig& setBinRatio(float32 binRatio) = 0; - - /** - * Returns the minimum number of bins that is used. - * - * @return The minimum number of bins that is used - */ - virtual uint32 getMinBins() const = 0; - - /** - * Sets the minimum number of bins that should be used. - * - * @param minBins The minimum number of bins that should be used. Must be at least 1 - * @return A reference to an object of type `EqualWidthLabelBinningConfig` that allows further - * configuration of the method that assigns labels to bins - */ - virtual IEqualWidthLabelBinningConfig& setMinBins(uint32 minBins) = 0; - - /** - * Returns the maximum number of bins that is used. - * - * @return The maximum number of bins that is used - */ - virtual uint32 getMaxBins() const = 0; - - /** - * Sets the maximum number of bins that should be used. - * - * @param maxBins The maximum number of bins that should be used. Must be at least the minimum number of - * bins or 0, if the maximum number of bins should not be restricted - * @return A reference to an object of type `EqualWidthLabelBinningConfig` that allows further - * configuration of the method that assigns labels to bins - */ - virtual IEqualWidthLabelBinningConfig& setMaxBins(uint32 maxBins) = 0; - }; - - /** - * Allows to configure a method that assigns labels to bins in a way such that each bin contains labels for which - * the predicted score is expected to belong to the same value range. - */ - class EqualWidthLabelBinningConfig final : public ILabelBinningConfig, - public IEqualWidthLabelBinningConfig { - private: - - float32 binRatio_; - - uint32 minBins_; - - uint32 maxBins_; - - const std::unique_ptr& l1RegularizationConfigPtr_; - - const std::unique_ptr& l2RegularizationConfigPtr_; - - public: - - /** - * @param l1RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L1 - * regularization - * @param l2RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L2 - * regularization - */ - EqualWidthLabelBinningConfig(const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr); - - float32 getBinRatio() const override; - - IEqualWidthLabelBinningConfig& setBinRatio(float32 binRatio) override; - - uint32 getMinBins() const override; - - IEqualWidthLabelBinningConfig& setMinBins(uint32 minBins) override; - - uint32 getMaxBins() const override; - - IEqualWidthLabelBinningConfig& setMaxBins(uint32 maxBins) override; - - std::unique_ptr createLabelWiseCompleteRuleEvaluationFactory() - const override; - - std::unique_ptr createLabelWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels) const override; - - std::unique_ptr createLabelWiseDynamicPartialRuleEvaluationFactory( - float32 threshold, float32 exponent) const override; - - std::unique_ptr createExampleWiseCompleteRuleEvaluationFactory( - const Blas& blas, const Lapack& lapack) const override; - - std::unique_ptr createExampleWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, const Blas& blas, - const Lapack& lapack) const override; - - std::unique_ptr createExampleWiseDynamicPartialRuleEvaluationFactory( - float32 threshold, float32 exponent, const Blas& blas, const Lapack& lapack) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/binning/label_binning_no.hpp b/cpp/subprojects/boosting/include/boosting/binning/label_binning_no.hpp deleted file mode 100644 index f514a5f4..00000000 --- a/cpp/subprojects/boosting/include/boosting/binning/label_binning_no.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/rule_evaluation/regularization.hpp" - -namespace boosting { - - /** - * Allows to configure a method that does not assign labels to bins. - */ - class NoLabelBinningConfig final : public ILabelBinningConfig { - private: - - const std::unique_ptr& l1RegularizationConfigPtr_; - - const std::unique_ptr& l2RegularizationConfigPtr_; - - public: - - /** - * @param l1RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L1 - * regularization - * @param l2RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L2 - * regularization - */ - NoLabelBinningConfig(const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr); - - std::unique_ptr createLabelWiseCompleteRuleEvaluationFactory() - const override; - - std::unique_ptr createLabelWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels) const override; - - std::unique_ptr createLabelWiseDynamicPartialRuleEvaluationFactory( - float32 threshold, float32 exponent) const override; - - std::unique_ptr createExampleWiseCompleteRuleEvaluationFactory( - const Blas& blas, const Lapack& lapack) const override; - - std::unique_ptr createExampleWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, const Blas& blas, - const Lapack& lapack) const override; - - std::unique_ptr createExampleWiseDynamicPartialRuleEvaluationFactory( - float32 threshold, float32 exponent, const Blas& blas, const Lapack& lapack) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/arrays.hpp b/cpp/subprojects/boosting/include/boosting/data/arrays.hpp deleted file mode 100644 index e200729c..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/arrays.hpp +++ /dev/null @@ -1,122 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include - -namespace boosting { - - /** - * Adds the elements in an array `b` to the elements in another array `a`, such that `a = a + b`. - * - * @tparam T The type of the arrays `a` and `b` - * @param a A pointer to an array of template type `T` to be updated - * @param b A pointer to an array of template type `T` - * @param numElements The number of elements in the arrays `a` and `b` - * - */ - template - static inline void addToArray(T* a, const T* b, uint32 numElements) { - for (uint32 i = 0; i < numElements; i++) { - a[i] += b[i]; - } - } - - /** - * Adds the elements in an array `b` to the elements in another array `a`. The elements in the array `b` are - * multiplied by a given weight, such that `a = a + (b * weight)`. - * - * @tparam T The type of the arrays `a` and `b` - * @tparam W The type of the weight - * @param a A pointer to an array of template type `T` to be updated - * @param b A pointer to an array of template type `T` - * @param numElements The number of elements in the arrays `a` and `b` - * @param weight The weight, the elements in the array `b` should be multiplied by - * - */ - template - static inline void addToArray(T* a, const T* b, uint32 numElements, W weight) { - for (uint32 i = 0; i < numElements; i++) { - a[i] += (b[i] * weight); - } - } - - /** - * Adds the elements in an array `b` to the elements in another array `a`, such that `a = a + b`. The indices of - * elements in the array `b` that correspond to the elements in array `a` are given as an additional array. - * - * @param a A pointer to an array of template type `T` to be updated - * @param b A pointer to an array of template type `T` - * @param numElements The number of elements in the arrays `a` and `b` - * @param indices A pointer to an array of type `uint32` that stores the indices of the elements in the array - * `b` that correspond to the elements in array `a` - * - */ - template - static inline void addToArray(T* a, const T* b, const uint32* indices, uint32 numElements) { - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indices[i]; - a[i] += b[index]; - } - } - - /** - * Adds the elements in an array `b` to the elements in another array `a`. The elements in the array `b` are - * multiplied by a given weight, such that `a = a + (b * weight)`. The indices of elements in the array `b` that - * correspond to the elements in array `a` are given as an additional array. - * - * @tparam T The type of the arrays `a` and `b` - * @tparam W The type of the weight - * @param a A pointer to an array of template type `T` to be updated - * @param b A pointer to an array of template type `T` - * @param numElements The number of elements in the arrays `a` and `b` - * @param weight The weight, the elements in the array `b` should be multiplied by - * @param indices A pointer to an array of type `uint32` that stores the indices of the elements in the array - * `b` that correspond to the elements in array `a` - * - */ - template - static inline void addToArray(T* a, const T* b, const uint32* indices, uint32 numElements, W weight) { - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indices[i]; - a[i] += (b[index] * weight); - } - } - - /** - * Removes the elements in an array `b` from the elements in another array `a`, such that `a = a - b`. - * - * @tparam T The type of the arrays `a` and `b` - * @param a A pointer to an array of template type `T` to be updated - * @param b A pointer to an array of template type `T` - * @param numElements The number of elements in the arrays `a` and `b` - * - */ - template - static inline void removeFromArray(T* a, const T* b, uint32 numElements) { - for (uint32 i = 0; i < numElements; i++) { - a[i] -= b[i]; - } - } - - /** - * Removes the elements in an array `b` from the elements in another array `a`. The elements in the array `b` are - * multiplied by a given weight, such that `a = a - (b * weight)`. - * - * @tparam T The type of the arrays `a` and `b` - * @tparam W The type of the weight - * @param a A pointer to an array of template type `T` to be updated - * @param b A pointer to an array of template type `T` - * @param numElements The number of elements in the arrays `a` and `b` - * @param weight The weight, the elements in the array `b` should be multiplied by - * - */ - template - static inline void removeFromArray(T* a, const T* b, uint32 numElements, W weight) { - for (uint32 i = 0; i < numElements; i++) { - a[i] -= (b[i] * weight); - } - } - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/histogram_view_label_wise_sparse.hpp b/cpp/subprojects/boosting/include/boosting/data/histogram_view_label_wise_sparse.hpp deleted file mode 100644 index df125b35..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/histogram_view_label_wise_sparse.hpp +++ /dev/null @@ -1,142 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_view_label_wise_sparse.hpp" -#include "common/data/triple.hpp" - -namespace boosting { - - /** - * Implements row-wise read-only access to the gradients and Hessians that have been calculated using a label-wise - * decomposable loss function and are stored in a pre-allocated histogram in the list of lists (LIL) format. - */ - class SparseLabelWiseHistogramConstView { - protected: - - /** - * The number of rows in the view. - */ - const uint32 numRows_; - - /** - * The number of columns in the view. - */ - const uint32 numCols_; - - /** - * A pointer to an array that stores the gradients and Hessians of each bin. - */ - Triple* statistics_; - - /** - * A pointer to an array that stores the weight of each bin. - */ - float64* weights_; - - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param statistics A pointer to an array that stores the gradients and Hessians of each bin - * @param weights A pointer to an array that stores the weight of each bin - */ - SparseLabelWiseHistogramConstView(uint32 numRows, uint32 numCols, Triple* statistics, - float64* weights); - - virtual ~SparseLabelWiseHistogramConstView() {}; - - /** - * An iterator that provides read-only access to the gradients and Hessians. - */ - typedef const Triple* const_iterator; - - /** - * An iterator that provides read-only access to the weights that correspond to individual bins. - */ - typedef const float64* weight_const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the gradients and Hessians at a specific row. - * - * @param row The row - * @return A `const_iterator` to the beginning of the row - */ - const_iterator cbegin(uint32 row) const; - - /** - * Returns a `const_iterator` to the end of the gradients and Hessians at a specific row. - * - * @param row The row - * @return A `const_iterator` to the end of the row - */ - const_iterator cend(uint32 row) const; - - /** - * Returns a `weight_const_iterator` to the beginning of the weights that correspond to individual bins. - * - * @return A `weight_const_iterator` to the beginning - */ - weight_const_iterator weights_cbegin() const; - - /** - * Returns a `weight_const_iterator` to the end of the weights that correspond to individual bins. - * - * @return A `weight_const_iterator` to the end - */ - weight_const_iterator weights_cend() const; - - /** - * Returns the number of rows in the view. - * - * @return The number of rows - */ - uint32 getNumRows() const; - - /** - * Returns the number of columns in the view. - * - * @return The number of columns - */ - uint32 getNumCols() const; - }; - - /** - * Implements row-wise read and write access to the gradients and Hessians that have been calculated using a - * label-wise decomposable loss function and are stored in a pre-allocated histogram in the list of lists (LIL) - * format. - */ - class SparseLabelWiseHistogramView : public SparseLabelWiseHistogramConstView { - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param statistics A pointer to an array that stores the gradients and Hessians of each bin - * @param weights A pointer to an array that stores the weight of each bin - */ - SparseLabelWiseHistogramView(uint32 numRows, uint32 numCols, Triple* statistics, float64* weights); - - virtual ~SparseLabelWiseHistogramView() override {}; - - /** - * Sets all gradients and Hessians in the matrix to zero. - */ - void clear(); - - /** - * Adds all gradients and Hessians in a vector to a specific row of this histogram. The gradients and - * Hessians to be added are multiplied by a specific weight. - * - * @param row The row - * @param begin A `SparseLabelWiseStatisticConstView::const_iterator` to the beginning of the vector - * @param end A `SparseLabelWiseStatisticConstView::const_iterator` to the end of the vector - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToRow(uint32 row, SparseLabelWiseStatisticConstView::const_iterator begin, - SparseLabelWiseStatisticConstView::const_iterator end, float64 weight); - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/matrix_c_contiguous_numeric.hpp b/cpp/subprojects/boosting/include/boosting/data/matrix_c_contiguous_numeric.hpp deleted file mode 100644 index de69bc34..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/matrix_c_contiguous_numeric.hpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/matrix_c_contiguous.hpp" -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" - -namespace boosting { - - /** - * A two-dimensional matrix that provides random access to a fixed number of values stored in a C-contiguous array. - * - * @tparam T The type of the values that are stored in the matrix - */ - template - class NumericCContiguousMatrix final : public CContiguousMatrix { - public: - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - */ - NumericCContiguousMatrix(uint32 numRows, uint32 numCols); - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - * @param init True, if all elements in the matrix should be value-initialized, false otherwise - */ - NumericCContiguousMatrix(uint32 numRows, uint32 numCols, bool init); - - /** - * Adds all values in another vector to certain elements, whose positions are given as a - * `CompleteIndexVector`, at a specific row of this matrix. - * - * @param row The row - * @param begin An iterator to the beginning of the vector - * @param end An iterator to the end of the vector - * @param indicesBegin An iterator to the beginning of the indices - * @param indicesEnd An iterator to the end of the indices - */ - void addToRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd); - - /** - * Adds all values in another vector to certain elements, whose positions are given as a - * `PartialIndexVector`, at a specific row of this matrix. - * - * @param row The row - * @param begin An iterator to the beginning of the vector - * @param end An iterator to the end of the vector - * @param indicesBegin An iterator to the beginning of the indices - * @param indicesEnd An iterator to the end of the indices - */ - void addToRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd); - - /** - * Subtracts all values in another vector from certain elements, whose positions are given as a - * `CompleteIndexVector`, at a specific row of this matrix. - * - * @param row The row - * @param begin An iterator to the beginning of the vector - * @param end An iterator to the end of the vector - * @param indicesBegin An iterator to the beginning of the indices - * @param indicesEnd An iterator to the end of the indices - */ - void removeFromRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd); - - /** - * Subtracts all values in another vector from certain elements, whose positions are given as a - * `PartialIndexVector`, at a specific row of this matrix. - * - * @param row The row - * @param begin An iterator to the beginning of the vector - * @param end An iterator to the end of the vector - * @param indicesBegin An iterator to the beginning of the indices - * @param indicesEnd An iterator to the end of the indices - */ - void removeFromRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd); - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/matrix_sparse_set_numeric.hpp b/cpp/subprojects/boosting/include/boosting/data/matrix_sparse_set_numeric.hpp deleted file mode 100644 index 0ea1453d..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/matrix_sparse_set_numeric.hpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/matrix_sparse_set.hpp" -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" - -namespace boosting { - - /** - * A two-dimensional matrix that provides row-wise access to values that are stored in the list of lists (LIL) - * format. - * - * @tparam T The type of the values that are stored in the matrix - */ - template - class NumericSparseSetMatrix final : public SparseSetMatrix { - public: - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - */ - NumericSparseSetMatrix(uint32 numRows, uint32 numCols); - - /** - * Adds all values in another vector to certain elements, whose positions are given as a - * `CompleteIndexVector`, at a specific row of this matrix. - * - * @param row The row - * @param begin An iterator to the beginning of the vector - * @param end An iterator to the end of the vector - * @param indicesBegin An iterator to the beginning of the indices - * @param indicesEnd An iterator to the end of the indices - */ - void addToRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd); - - /** - * Adds all values in another vector to certain elements, whose positions are given as a - * `PartialIndexVector`, at a specific row of this matrix. - * - * @param row The row - * @param begin An iterator to the beginning of the vector - * @param end An iterator to the end of the vector - * @param indicesBegin An iterator to the beginning of the indices - * @param indicesEnd An iterator to the end of the indices - */ - void addToRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd); - - /** - * Subtracts all values in another vector from certain elements, whose positions are given as a - * `CompleteIndexVector`, at a specific row of this matrix. - * - * @param row The row - * @param begin An iterator to the beginning of the vector - * @param end An iterator to the end of the vector - * @param indicesBegin An iterator to the beginning of the indices - * @param indicesEnd An iterator to the end of the indices - */ - void removeFromRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd); - - /** - * Subtracts all values in another vector from certain elements, whose positions are given as a - * `PartialIndexVector`, at a specific row of this matrix. - * - * @param row The row - * @param begin An iterator to the beginning of the vector - * @param end An iterator to the end of the vector - * @param indicesBegin An iterator to the beginning of the indices - * @param indicesEnd An iterator to the end of the indices - */ - void removeFromRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd); - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/statistic_vector_example_wise_dense.hpp b/cpp/subprojects/boosting/include/boosting/data/statistic_vector_example_wise_dense.hpp deleted file mode 100644 index adde2220..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/statistic_vector_example_wise_dense.hpp +++ /dev/null @@ -1,313 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/iterator/diagonal_iterator.hpp" -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" - -namespace boosting { - - /** - * An one-dimensional vector that stores gradients and Hessians that have been calculated using a non-decomposable - * loss function in C-contiguous arrays. For each element in the vector a single gradient, but multiple Hessians are - * stored. In a vector that stores `n` gradients `(n * (n + 1)) / 2` Hessians are stored. The Hessians can be viewed - * as a symmetric Hessian matrix with `n` rows and columns. - */ - class DenseExampleWiseStatisticVector final { - private: - - const uint32 numGradients_; - - const uint32 numHessians_; - - float64* gradients_; - - float64* hessians_; - - public: - - /** - * @param numGradients The number of gradients in the vector - */ - DenseExampleWiseStatisticVector(uint32 numGradients); - - /** - * @param numGradients The number of gradients in the vector - * @param init True, if all gradients and Hessians in the vector should be initialized with zero, - * false otherwise - */ - DenseExampleWiseStatisticVector(uint32 numGradients, bool init); - - /** - * @param vector A reference to an object of type `DenseExampleWiseStatisticVector` to be copied - */ - DenseExampleWiseStatisticVector(const DenseExampleWiseStatisticVector& vector); - - ~DenseExampleWiseStatisticVector(); - - /** - * An iterator that provides access to the gradients in the vector and allows to modify them. - */ - typedef float64* gradient_iterator; - - /** - * An iterator that provides read-only access to the gradients in the vector. - */ - typedef const float64* gradient_const_iterator; - - /** - * An iterator that provides access to the Hessians in the vector and allows to modify them. - */ - typedef float64* hessian_iterator; - - /** - * An iterator that provides read-only access to the Hessians in the vector. - */ - typedef const float64* hessian_const_iterator; - - /** - * An iterator that provides read-only access to the Hessians that correspond to the diagonal of the Hessian - * matrix. - */ - typedef DiagonalConstIterator hessian_diagonal_const_iterator; - - /** - * Returns a `gradient_iterator` to the beginning of the gradients. - * - * @return A `gradient_iterator` to the beginning - */ - gradient_iterator gradients_begin(); - - /** - * Returns a `gradient_iterator` to the end of the gradients. - * - * @return A `gradient_iterator` to the end - */ - gradient_iterator gradients_end(); - - /** - * Returns a `gradient_const_iterator` to the beginning of the gradients. - * - * @return A `gradient_const_iterator` to the beginning - */ - gradient_const_iterator gradients_cbegin() const; - - /** - * Returns a `gradient_const_iterator` to the end of the gradients. - * - * @return A `gradient_const_iterator` to the end - */ - gradient_const_iterator gradients_cend() const; - - /** - * Returns a `hessian_iterator` to the beginning of the Hessians. - * - * @return A `hessian_iterator` to the beginning - */ - hessian_iterator hessians_begin(); - - /** - * Returns a `hessian_iterator` to the end of the Hessians. - * - * @return A `hessian_iterator` to the end - */ - hessian_iterator hessians_end(); - - /** - * Returns a `hessian_const_iterator` to the beginning of the Hessians. - * - * @return A `hessian_const_iterator` to the beginning - */ - hessian_const_iterator hessians_cbegin() const; - - /** - * Returns a `hessian_const_iterator` to the end of the Hessians. - * - * @return A `hessian_const_iterator` to the end - */ - hessian_const_iterator hessians_cend() const; - - /** - * Returns a `hessian_diagonal_const_iterator` to the beginning of the Hessians that correspond to the - * diagonal of the Hessian matrix. - * - * @return A `hessian_diagonal_const_iterator` to the beginning - */ - hessian_diagonal_const_iterator hessians_diagonal_cbegin() const; - - /** - * Returns a `hessian_diagonal_const_iterator` to the end of the Hessians that correspond to the diagonal of - * the Hessian matrix. - * - * @return A `hessian_diagonal_const_iterator` to the end - */ - hessian_diagonal_const_iterator hessians_diagonal_cend() const; - - /** - * Returns the number of gradients in the vector. - * - * @return The number of gradients - */ - uint32 getNumElements() const; - - /** - * Sets all gradients and Hessians in the vector to zero. - */ - void clear(); - - /** - * Adds all gradients and Hessians in another vector to this vector. - * - * @param gradientsBegin A `gradient_const_iterator` to the beginning of the gradients - * @param gradientsEnd A `gradient_const_iterator` to the end of the gradients - * @param hessiansBegin A `hessian_const_iterator` to the beginning of the Hessians - * @param hessiansEnd A `hessian_const_iterator` to the end of the Hessians - */ - void add(gradient_const_iterator gradientsBegin, gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd); - - /** - * Adds all gradients and Hessians in another vector to this vector. The gradients and Hessians to be added - * are multiplied by a specific weight. - * - * @param gradientsBegin A `gradient_const_iterator` to the beginning of the gradients - * @param gradientsEnd A `gradient_const_iterator` to the end of the gradients - * @param hessiansBegin A `hessian_const_iterator` to the beginning of the Hessians - * @param hessiansEnd A `hessian_const_iterator` to the end of the Hessians - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void add(gradient_const_iterator gradientsBegin, gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd, float64 weight); - - /** - * Removes all gradients and Hessians in another vector from this vector. - * - * @param gradientsBegin A `gradient_const_iterator` to the beginning of the gradients - * @param gradientsEnd A `gradient_const_iterator` to the end of the gradients - * @param hessiansBegin A `hessian_const_iterator` to the beginning of the Hessians - * @param hessiansEnd A `hessian_const_iterator` to the end of the Hessians - */ - void remove(gradient_const_iterator gradientsBegin, gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd); - - /** - * Removes all gradients and Hessians in another vector from this vector. The gradients and Hessians to be - * removed are multiplied by a specific weight. - * - * @param gradientsBegin A `gradient_const_iterator` to the beginning of the gradients - * @param gradientsEnd A `gradient_const_iterator` to the end of the gradients - * @param hessiansBegin A `hessian_const_iterator` to the beginning of the Hessians - * @param hessiansEnd A `hessian_const_iterator` to the end of the Hessians - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void remove(gradient_const_iterator gradientsBegin, gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd, float64 weight); - - /** - * Adds certain gradients and Hessians in another vector, whose positions are given as a - * `CompleteIndexVector`, to this vector. - * - * @param gradientsBegin A `gradient_const_iterator` to the beginning of the gradients - * @param gradientsEnd A `gradient_const_iterator` to the end of the gradients - * @param hessiansBegin A `hessian_const_iterator` to the beginning of the Hessians - * @param hessiansEnd A `hessian_const_iterator` to the end of the Hessians - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - */ - void addToSubset(gradient_const_iterator gradientsBegin, gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd, - const CompleteIndexVector& indices); - - /** - * Adds certain gradients and Hessians in another vector, whose positions are given as a - * `PartialIndexVector`, to this vector. - * - * @param gradientsBegin A `gradient_const_iterator` to the beginning of the gradients - * @param gradientsEnd A `gradient_const_iterator` to the end of the gradients - * @param hessiansBegin A `hessian_const_iterator` to the beginning of the Hessians - * @param hessiansEnd A `hessian_const_iterator` to the end of the Hessians - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - */ - void addToSubset(gradient_const_iterator gradientsBegin, gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd, - const PartialIndexVector& indices); - - /** - * Adds certain gradients and Hessians in another vector, whose positions are given as a - * `CompleteIndexVector`, to this vector. The gradients and Hessians to be added are multiplied by a - * specific weight. - * - * @param gradientsBegin A `gradient_const_iterator` to the beginning of the gradients - * @param gradientsEnd A `gradient_const_iterator` to the end of the gradients - * @param hessiansBegin A `hessian_const_iterator` to the beginning of the Hessians - * @param hessiansEnd A `hessian_const_iterator` to the end of the Hessians - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(gradient_const_iterator gradientsBegin, gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd, - const CompleteIndexVector& indices, float64 weight); - - /** - * Adds certain gradients and Hessians in another vector, whose positions are given as a - * `PartialIndexVector`, to this vector. The gradients and Hessians to be added are multiplied by a specific - * weight. - * - * @param gradientsBegin A `gradient_const_iterator` to the beginning of the gradients - * @param gradientsEnd A `gradient_const_iterator` to the end of the gradients - * @param hessiansBegin A `hessian_const_iterator` to the beginning of the Hessians - * @param hessiansEnd A `hessian_const_iterator` to the end of the Hessians - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(gradient_const_iterator gradientsBegin, gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd, - const PartialIndexVector& indices, float64 weight); - - /** - * Sets the gradients and Hessians in this vector to the difference `first - second` between the gradients - * and Hessians in two other vectors, considering only the gradients and Hessians in the first vector that - * correspond to the positions provided by a `CompleteIndexVector`. - * - * @param firstGradientsBegin A `gradient_const_iterator` to the beginning of the first gradients - * @param firstGradientsEnd A `gradient_const_iterator` to the end of the first gradients - * @param firstHessiansBegin A `hessian_const_iterator` to the beginning of the first Hessians - * @param firstHessiansEnd A `hessian_const_iterator` to the end of the first Hessians - * @param firstIndices A reference to an object of type `CompleteIndexVector` that provides access - * to the indices - * @param secondGradientsBegin A `gradient_const_iterator` to the beginning of the second gradients - * @param secondGradientsEnd A `gradient_const_iterator` to the end of the second gradients - * @param secondHessiansBegin A `hessian_const_iterator` to the beginning of the second Hessians - * @param secondHessiansEnd A `hessian_const_iterator` to the end of the second Hessians - */ - void difference(gradient_const_iterator firstGradientsBegin, gradient_const_iterator firstGradientsEnd, - hessian_const_iterator firstHessiansBegin, hessian_const_iterator firstHessiansEnd, - const CompleteIndexVector& firstIndices, gradient_const_iterator secondGradientsBegin, - gradient_const_iterator secondGradientsEnd, hessian_const_iterator secondHessiansBegin, - hessian_const_iterator secondHessiansEnd); - - /** - * Sets the gradients and Hessians in this vector to the difference `first - second` between the gradients - * and Hessians in two other vectors, considering only the gradients and Hessians in the first vector that - * correspond to the positions provided by a `PartialIndexVector`. - * - * @param firstGradientsBegin A `gradient_const_iterator` to the beginning of the first gradients - * @param firstGradientsEnd A `gradient_const_iterator` to the end of the first gradients - * @param firstHessiansBegin A `hessian_const_iterator` to the beginning of the first Hessians - * @param firstHessiansEnd A `hessian_const_iterator` to the end of the first Hessians - * @param firstIndices A reference to an object of type `PartialIndexVector` that provides access - * to the indices - * @param secondGradientsBegin A `gradient_const_iterator` to the beginning of the second gradients - * @param secondGradientsEnd A `gradient_const_iterator` to the end of the second gradients - * @param secondHessiansBegin A `hessian_const_iterator` to the beginning of the second Hessians - * @param secondHessiansEnd A `hessian_const_iterator` to the end of the second Hessians - */ - void difference(gradient_const_iterator firstGradientsBegin, gradient_const_iterator firstGradientsEnd, - hessian_const_iterator firstHessiansBegin, hessian_const_iterator firstHessiansEnd, - const PartialIndexVector& firstIndices, gradient_const_iterator secondGradientsBegin, - gradient_const_iterator secondGradientsEnd, hessian_const_iterator secondHessiansBegin, - hessian_const_iterator secondHessiansEnd); - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/statistic_vector_label_wise_dense.hpp b/cpp/subprojects/boosting/include/boosting/data/statistic_vector_label_wise_dense.hpp deleted file mode 100644 index 166faef4..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/statistic_vector_label_wise_dense.hpp +++ /dev/null @@ -1,227 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_view_label_wise_dense.hpp" -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" - -namespace boosting { - - /** - * An one-dimensional vector that stores aggregated gradients and Hessians that have been calculated using a - * label-wise decomposable loss function in a C-contiguous array. For each element in the vector a single gradient - * and Hessian is stored. - */ - class DenseLabelWiseStatisticVector final { - private: - - const uint32 numElements_; - - Tuple* statistics_; - - public: - - /** - * @param numElements The number of gradients and Hessians in the vector - */ - DenseLabelWiseStatisticVector(uint32 numElements); - - /** - * @param numElements The number of gradients and Hessians in the vector - * @param init True, if all gradients and Hessians in the vector should be initialized with zero, - * false otherwise - */ - DenseLabelWiseStatisticVector(uint32 numElements, bool init); - - /** - * @param vector A reference to an object of type `DenseLabelWiseStatisticVector` to be copied - */ - DenseLabelWiseStatisticVector(const DenseLabelWiseStatisticVector& vector); - - ~DenseLabelWiseStatisticVector(); - - /** - * An iterator that provides access to the elements in the vector and allows to modify them. - */ - typedef Tuple* iterator; - - /** - * An iterator that provides read-only access to the elements in the vector. - */ - typedef const Tuple* const_iterator; - - /** - * Returns an `iterator` to the beginning of the vector. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the vector. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns a `const_iterator` to the beginning of the vector. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the vector. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the number of gradients and Hessians in the vector. - * - * @return The number of gradients and Hessians - */ - uint32 getNumElements() const; - - /** - * Sets all gradients and Hessians in the vector to zero. - */ - void clear(); - - /** - * Adds all gradients and Hessians in another vector to this vector. - * - * @param vector A reference to an object of type `DenseLabelWiseStatisticVector` that stores the gradients - * and Hessians to be added to this vector - */ - void add(const DenseLabelWiseStatisticVector& vector); - - /** - * Adds all gradients and Hessians in a single row of a `DenseLabelWiseStatisticConstView` to this vector. - * - * @param view A reference to an object of type `DenseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - */ - void add(const DenseLabelWiseStatisticConstView& view, uint32 row); - - /** - * Adds all gradients and Hessians in a single row of a `DenseLabelWiseStatisticConstView` to this vector. - * The gradients and Hessians to be added are multiplied by a specific weight. - * - * @param view A reference to an object of type `DenseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void add(const DenseLabelWiseStatisticConstView& view, uint32 row, float64 weight); - - /** - * Removes all gradients and Hessians in a single row of a `DenseLabelWiseStatisticConstView` from this - * vector. - * - * @param view A reference to an object of type `DenseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be removed from this vector - * @param row The index of the row to be removed from this vector - */ - void remove(const DenseLabelWiseStatisticConstView& view, uint32 row); - - /** - * Removes all gradients and Hessians in a single row of a `DenseLabelWiseStatisticConstView` from this - * vector. The gradients and Hessians to be removed are multiplied by a specific weight. - * - * @param view A reference to an object of type `DenseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be removed from this vector - * @param row The index of the row to be removed from this vector - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void remove(const DenseLabelWiseStatisticConstView& view, uint32 row, float64 weight); - - /** - * Adds certain gradients and Hessians in a single row of a `DenseLabelWiseStatisticConstView`, whose - * positions are given as a `CompleteIndexVector`, to this vector. - * - * @param view A reference to an object of type `DenseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - */ - void addToSubset(const DenseLabelWiseStatisticConstView& view, uint32 row, - const CompleteIndexVector& indices); - - /** - * Adds certain gradients and Hessians in single row of a `DenseLabelWiseStatisticConstView`, whose - * positions are given as a `PartialIndexVector`, to this vector. - * - * @param view A reference to an object of type `DenseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - */ - void addToSubset(const DenseLabelWiseStatisticConstView& view, uint32 row, - const PartialIndexVector& indices); - - /** - * Adds certain gradients and Hessians in a single row of a `DenseLabelWiseStatisticConstView`, whose - * positions are given as a `CompleteIndexVector`, to this vector. The gradients and Hessians to be added - * are multiplied by a specific weight. - * - * @param view A reference to an object of type `DenseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(const DenseLabelWiseStatisticConstView& view, uint32 row, - const CompleteIndexVector& indices, float64 weight); - - /** - * Adds certain gradients and Hessians in single row of a `DenseLabelWiseStatisticConstView`, whose - * positions are given as a `PartialIndexVector`, to this vector. The gradients and Hessians to be added are - * multiplied by a specific weight. - * - * @param view A reference to an object of type `DenseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(const DenseLabelWiseStatisticConstView& view, uint32 row, - const PartialIndexVector& indices, float64 weight); - - /** - * Sets the gradients and Hessians in this vector to the difference `first - second` between the gradients - * and Hessians in two other vectors, considering only the gradients and Hessians in the first vector that - * correspond to the positions provided by a `CompleteIndexVector`. - * - * @param first A reference to an object of type `DenseLabelWiseStatisticVector` that stores the - * gradients and Hessians in the first vector - * @param firstIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices - * @param second A reference to an object of type `DenseLabelWiseStatisticVector` that stores the - * gradients and Hessians in the second vector - */ - void difference(const DenseLabelWiseStatisticVector& first, const CompleteIndexVector& firstIndices, - const DenseLabelWiseStatisticVector& second); - - /** - * Sets the gradients and Hessians in this vector to the difference `first - second` between the gradients - * and Hessians in two other vectors, considering only the gradients and Hessians in the first vector that - * correspond to the positions provided by a `PartialIndexVector`. - * - * @param first A reference to an object of type `DenseLabelWiseStatisticVector` that stores the - * gradients and Hessians in the first vector - * @param firstIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices - * @param second A reference to an object of type `DenseLabelWiseStatisticVector` that stores the - * gradients and Hessians in the second vector - */ - void difference(const DenseLabelWiseStatisticVector& first, const PartialIndexVector& firstIndices, - const DenseLabelWiseStatisticVector& second); - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/statistic_vector_label_wise_sparse.hpp b/cpp/subprojects/boosting/include/boosting/data/statistic_vector_label_wise_sparse.hpp deleted file mode 100644 index 785f804d..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/statistic_vector_label_wise_sparse.hpp +++ /dev/null @@ -1,377 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/histogram_view_label_wise_sparse.hpp" -#include "boosting/data/statistic_view_label_wise_sparse.hpp" -#include "common/data/triple.hpp" -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" - -namespace boosting { - - /** - * An one-dimensional vector that stores aggregated gradients and Hessians that have been calculated using a - * label-wise decomposable loss function in a C-contiguous array. For each element in the vector a single gradient - * and Hessian, as well as the sums of the weights of the aggregated gradients and Hessians, is stored. - */ - class SparseLabelWiseStatisticVector final { - private: - - /** - * An iterator that provides random read-only access to the statistics in a - * `SparseLabelWiseStatisticVector`. - */ - class ConstIterator final { - private: - - const Triple* iterator_; - - const float64 sumOfWeights_; - - public: - - /** - * @param iterator An iterator that provides access to the elements in a - * `SparseLabelWiseStatisticVector` - * @param sumOfWeights The sum of the weights of all statistics that have been added to the vector - */ - ConstIterator(const Triple* iterator, float64 sumOfWeights); - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef const Tuple value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef const Tuple* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef const Tuple& reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::random_access_iterator_tag iterator_category; - - /** - * Returns the element at a specific index. - * - * @param index The index of the element to be returned - * @return The element at the given index - */ - value_type operator[](uint32 index) const; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - value_type operator*() const; - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - ConstIterator& operator++(); - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - ConstIterator& operator++(int n); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - ConstIterator& operator--(); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - ConstIterator& operator--(int n); - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const ConstIterator& rhs) const; - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const ConstIterator& rhs) const; - - /** - * Returns the difference between this iterator and another one. - * - * @param rhs A reference to another iterator - * @return The difference between the iterators - */ - difference_type operator-(const ConstIterator& rhs) const; - }; - - const uint32 numElements_; - - Triple* statistics_; - - float64 sumOfWeights_; - - public: - - /** - * @param numElements The number of gradients and Hessians in the vector - */ - SparseLabelWiseStatisticVector(uint32 numElements); - - /** - * @param numElements The number of gradients and Hessians in the vector - * @param init True, if all gradients and Hessians in the vector should be initialized with zero, - * false otherwise - */ - SparseLabelWiseStatisticVector(uint32 numElements, bool init); - - /** - * @param vector A reference to an object of type `SparseLabelWiseStatisticVector` to be copied - */ - SparseLabelWiseStatisticVector(const SparseLabelWiseStatisticVector& vector); - - ~SparseLabelWiseStatisticVector(); - - /** - * An iterator that provides read-only access to the elements in the vector. - */ - typedef ConstIterator const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the vector. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the vector. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the number of elements in the vector. - * - * @return The number of elements in the vector - */ - uint32 getNumElements() const; - - /** - * Sets all gradients and Hessians in the vector to zero. - */ - void clear(); - - /** - * Adds all gradients and Hessians in another vector to this vector. - * - * @param vector A reference to an object of type `SparseLabelWiseStatisticVector` that stores the gradients - * and Hessians to be added to this vector - */ - void add(const SparseLabelWiseStatisticVector& vector); - - /** - * Adds all gradients and Hessians in a single row of a `SparseLabelWiseStatisticConstView` to this vector. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - */ - void add(const SparseLabelWiseStatisticConstView& view, uint32 row); - - /** - * Adds all gradients and Hessians in a single row of a `SparseLabelWiseStatisticConstView` to this vector. - * The gradients and Hessians to be added are multiplied by a specific weight. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void add(const SparseLabelWiseStatisticConstView& view, uint32 row, float64 weight); - - /** - * Removes all gradients and Hessians in a single row of a `SparseLabelWiseStatisticConstView` from this - * vector. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be removed from this vector - * @param row The index of the row to be removed from this vector - */ - void remove(const SparseLabelWiseStatisticConstView& view, uint32 row); - - /** - * Removes all gradients and Hessians in a single row of a `SparseLabelWiseStatisticConstView` from this - * vector. The gradients and Hessians to be added are multiplied by a specific weight. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be removed from this vector - * @param row The index of the row to be removed from this vector - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void remove(const SparseLabelWiseStatisticConstView& view, uint32 row, float64 weight); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseStatisticConstView`, whose - * positions are given as a `CompleteIndexVector`, to this vector. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - */ - void addToSubset(const SparseLabelWiseStatisticConstView& view, uint32 row, - const CompleteIndexVector& indices); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseStatisticConstView`, whose - * positions are given as a `PartialIndexVector`, to this vector. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - */ - void addToSubset(const SparseLabelWiseStatisticConstView& view, uint32 row, - const PartialIndexVector& indices); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseStatisticConstView`, whose - * positions are given as a `CompleteIndexVector`, to this vector. The gradients and Hessians to be added - * are multiplied by a specific weight. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(const SparseLabelWiseStatisticConstView& view, uint32 row, - const CompleteIndexVector& indices, float64 weight); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseStatisticConstView`, whose - * positions are given as a `PartialIndexVector`, to this vector. The gradients and Hessians to be added are - * multiplied by a specific weight. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(const SparseLabelWiseStatisticConstView& view, uint32 row, - const PartialIndexVector& indices, float64 weight); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseHistogramConstView`, whose - * positions are given as a `CompleteIndexVector`, to this vector. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - */ - void addToSubset(const SparseLabelWiseHistogramConstView& view, uint32 row, - const CompleteIndexVector& indices); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseHistogramConstView`, whose - * positions are given as a `PartialIndexVector`, to this vector. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - */ - void addToSubset(const SparseLabelWiseHistogramConstView& view, uint32 row, - const PartialIndexVector& indices); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseHistogramConstView`, whose - * positions are given as a `CompleteIndexVector`, to this vector. The gradients and Hessians to be added - * are multiplied by a specific weight. - * - * @param view A reference to an object of type `SparseLabelWiseStatisticConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `CompleteIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(const SparseLabelWiseHistogramConstView& view, uint32 row, - const CompleteIndexVector& indices, float64 weight); - - /** - * Adds certain gradients and Hessians in a single row of a `SparseLabelWiseHistogramConstView`, whose - * positions are given as a `PartialIndexVector`, to this vector. The gradients and Hessians to be added are - * multiplied by a specific weight. - * - * @param view A reference to an object of type `SparseLabelWiseHistogramConstView` that stores the - * gradients and Hessians to be added to this vector - * @param row The index of the row to be added to this vector - * @param indices A reference to a `PartialIndexVector' that provides access to the indices - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToSubset(const SparseLabelWiseHistogramConstView& view, uint32 row, - const PartialIndexVector& indices, float64 weight); - - /** - * Sets the gradients and Hessians in this vector to the difference `first - second` between the gradients - * and Hessians in two other vectors, considering only the gradients and Hessians in the first vector that - * correspond to the positions provided by a `CompleteIndexVector`. - * - * @param first A reference to an object of type `SparseLabelWiseStatisticVector` that stores the - * gradients and Hessians in the first vector - * @param firstIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices - * @param second A reference to an object of type `SparseLabelWiseStatisticVector` that stores the - * gradients and Hessians in the second vector - */ - void difference(const SparseLabelWiseStatisticVector& first, const CompleteIndexVector& firstIndices, - const SparseLabelWiseStatisticVector& second); - - /** - * Sets the gradients and Hessians in this vector to the difference `first - second` between the gradients - * and Hessians in two other vectors, considering only the gradients and Hessians in the first vector that - * correspond to the positions provided by a `PartialIndexVector`. - * - * @param first A reference to an object of type `SparseLabelWiseStatisticVector` that stores the - * gradients and Hessians in the first vector - * @param firstIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices - * @param second A reference to an object of type `SparseLabelWiseStatisticVector` that stores the - * gradients and Hessians in the second vector - */ - void difference(const SparseLabelWiseStatisticVector& first, const PartialIndexVector& firstIndices, - const SparseLabelWiseStatisticVector& second); - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/statistic_view_example_wise_dense.hpp b/cpp/subprojects/boosting/include/boosting/data/statistic_view_example_wise_dense.hpp deleted file mode 100644 index 149bc212..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/statistic_view_example_wise_dense.hpp +++ /dev/null @@ -1,221 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/iterator/diagonal_iterator.hpp" - -namespace boosting { - - /** - * Implements row-wise read-only access to the gradients and Hessians that have been calculated using a - * non-decomposable loss function and are stored in pre-allocated C-contiguous arrays. - */ - class DenseExampleWiseStatisticConstView { - protected: - - /** - * The number of rows in the view. - */ - const uint32 numRows_; - - /** - * The number of gradients per row. - */ - const uint32 numGradients_; - - /** - * The number of Hessians per row. - */ - const uint32 numHessians_; - - /** - * A pointer to an array that stores the gradients. - */ - float64* gradients_; - - /** - * A pointer to an array that stores the Hessians. - */ - float64* hessians_; - - public: - - /** - * @param numRows The number of rows in the view - * @param numGradients The number of gradients per row - * @param numHessians The number of Hessians per row - * @param gradients A pointer to an array of type `float64` that stores the gradients, the view provides - * access to - * @param hessians A pointer to an array of type `float64` that stores the Hessians, the view provides - * access to - */ - DenseExampleWiseStatisticConstView(uint32 numRows, uint32 numGradients, uint32 numHessians, - float64* gradients, float64* hessians); - - virtual ~DenseExampleWiseStatisticConstView() {}; - - /** - * An iterator that provides read-only access to the gradients. - */ - typedef const float64* gradient_const_iterator; - - /** - * An iterator that provides read-only access to the Hessians. - */ - typedef const float64* hessian_const_iterator; - - /** - * An iterator that provides read-only access to the Hessians that correspond to the diagonal of the matrix. - */ - typedef DiagonalConstIterator hessian_diagonal_const_iterator; - - /** - * Returns a `gradient_const_iterator` to the beginning of the gradients at a specific row. - * - * @param row The row - * @return A `gradient_const_iterator` to the beginning of the given row - */ - gradient_const_iterator gradients_cbegin(uint32 row) const; - - /** - * Returns a `gradient_const_iterator` to the end of the gradients at a specific row. - * - * @param row The row - * @return A `gradient_const_iterator` to the end of the given row - */ - gradient_const_iterator gradients_cend(uint32 row) const; - - /** - * Returns a `hessian_const_iterator` to the beginning of the Hessians at a specific row. - * - * @param row The row - * @return A `hessian_const_iterator` to the beginning of the given row - */ - hessian_const_iterator hessians_cbegin(uint32 row) const; - - /** - * Returns a `hessian_const_iterator` to the end of the Hessians at a specific row. - * - * @param row The row - * @return A `hessian_const_iterator` to the end of the given row - */ - hessian_const_iterator hessians_cend(uint32 row) const; - - /** - * Returns a `hessian_diagonal_const_iterator` to the beginning of the Hessians that correspond to the - * diagonal of the Hessian matrix at a specific row. - * - * @param row The row - * @return A `hessian_diagonal_const_iterator` to the beginning - */ - hessian_diagonal_const_iterator hessians_diagonal_cbegin(uint32 row) const; - - /** - * Returns a `hessian_diagonal_const_iterator` to the end of the Hessians that correspond to the diagonal of - * the Hessian matrix at a specific row. - * - * @param row The row - * @return A `hessian_diagonal_const_iterator` to the end - */ - hessian_diagonal_const_iterator hessians_diagonal_cend(uint32 row) const; - - /** - * Returns the number of rows in the view. - * - * @return The number of rows - */ - uint32 getNumRows() const; - - /** - * Returns the number of columns in the view. - * - * @return The number of columns - */ - uint32 getNumCols() const; - }; - - /** - * Implements row-wise read and write access to the gradients and Hessians that have been calculated using a - * non-decomposable loss function and are stored in pre-allocated C-contiguous arrays. - */ - class DenseExampleWiseStatisticView : public DenseExampleWiseStatisticConstView { - public: - - /** - * @param numRows The number of rows in the view - * @param numGradients The number of gradients per row - * @param numHessians The number of Hessians per row - * @param gradients A pointer to an array of type `float64` that stores the gradients, the view provides - * access to - * @param hessians A pointer to an array of type `float64` that stores the Hessians, the view provides - * access to - */ - DenseExampleWiseStatisticView(uint32 numRows, uint32 numGradients, uint32 numHessians, float64* gradients, - float64* hessians); - - virtual ~DenseExampleWiseStatisticView() override {}; - - /** - * An iterator that provides access to the gradients and allows to modify them. - */ - typedef float64* gradient_iterator; - - /** - * An iterator that provides access to the Hessians and allows to modify them. - */ - typedef float64* hessian_iterator; - - /** - * Returns a `gradient_iterator` to the beginning of the gradients at a specific row. - * - * @param row The row - * @return A `gradient_iterator` to the beginning of the given row - */ - gradient_iterator gradients_begin(uint32 row); - - /** - * Returns a `gradient_iterator` to the end of the gradients at a specific row. - * - * @param row The row - * @return A `gradient_iterator` to the end of the given row - */ - gradient_iterator gradients_end(uint32 row); - - /** - * Returns a `hessian_iterator` to the beginning of the Hessians at a specific row. - * - * @param row The row - * @return A `hessian_iterator` to the beginning of the given row - */ - hessian_iterator hessians_begin(uint32 row); - - /** - * Returns a `hessian_iterator` to the end of the Hessians at a specific row. - * - * @param row The row - * @return A `hessian_iterator` to the end of the given row - */ - hessian_iterator hessians_end(uint32 row); - - /** - * Sets all gradients and Hessians in the matrix to zero. - */ - void clear(); - - /** - * Adds all gradients and Hessians in a vector to a specific row of this matrix. The gradients and Hessians - * to be added are multiplied by a specific weight. - * - * @param row The row - * @param gradientsBegin A `gradient_const_iterator` to the beginning of the gradients in the vector - * @param gradientsEnd A `gradient_const_iterator` to the end of the gradients in the vector - * @param hessiansBegin A `hessian_const_iterator` to the beginning of the Hessians in the vector - * @param hessiansEnd A `hessian_const_iterator` to the end of the Hessians in the vector - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToRow(uint32 row, gradient_const_iterator gradientsBegin, gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd, float64 weight); - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/statistic_view_label_wise_dense.hpp b/cpp/subprojects/boosting/include/boosting/data/statistic_view_label_wise_dense.hpp deleted file mode 100644 index 24ce9411..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/statistic_view_label_wise_dense.hpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/tuple.hpp" - -namespace boosting { - - /** - * Implements row-wise read-only access to the gradients and Hessians that have been calculated using a label-wise - * decomposable loss function and are stored in pre-allocated C-contiguous arrays. - */ - class DenseLabelWiseStatisticConstView { - protected: - - /** - * The number of rows in the view. - */ - const uint32 numRows_; - - /** - * The number of columns in the view. - */ - const uint32 numCols_; - - /** - * A pointer to an array that stores the gradients and Hessians. - */ - Tuple* statistics_; - - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param statistics A pointer to a C-contiguous array fo type `Tuple` that stores the gradients - * and Hessians, the view provides access to - */ - DenseLabelWiseStatisticConstView(uint32 numRows, uint32 numCols, Tuple* statistics); - - virtual ~DenseLabelWiseStatisticConstView() {}; - - /** - * An iterator that provides read-only access to the elements in the view. - */ - typedef const Tuple* const_iterator; - - /** - * Returns a `const_iterator` to the beginning of a specific row. - * - * @param row The row - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin(uint32 row) const; - - /** - * Returns a `const_iterator` to the end of a specific row. - * - * @param row The row - * @return A `const_iterator` to the end - */ - const_iterator cend(uint32 row) const; - - /** - * Returns the number of rows in the view. - * - * @return The number of rows - */ - uint32 getNumRows() const; - - /** - * Returns the number of columns in the view. - * - * @return The number of columns - */ - uint32 getNumCols() const; - }; - - /** - * Implements row-wise read and write access to the gradients and Hessians that have been calculated using a - * label-wise decomposable loss function and are stored in pre-allocated C-contiguous arrays. - */ - class DenseLabelWiseStatisticView : public DenseLabelWiseStatisticConstView { - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param statistics A pointer to a C-contiguous array fo type `Tuple` that stores the gradients - * and Hessians, the view provides access to - */ - DenseLabelWiseStatisticView(uint32 numRows, uint32 numCols, Tuple* statistics); - - virtual ~DenseLabelWiseStatisticView() override {}; - - /** - * An iterator that provides access to the elements in the view and allows to modify them. - */ - typedef Tuple* iterator; - - /** - * Returns an `iterator` to the beginning of a specific row. - * - * @param row The row - * @return An `iterator` to the beginning - */ - iterator begin(uint32 row); - - /** - * Returns an `iterator` to the end of a specific row. - * - * @param row The row - * @return An `iterator` to the end - */ - iterator end(uint32 row); - - /** - * Sets all gradients and Hessians in the matrix to zero. - */ - void clear(); - - /** - * Adds all gradients and Hessians in a vector to a specific row of this matrix. The gradients and Hessians - * to be added are multiplied by a specific weight. - * - * @param row The row - * @param begin A `const_iterator` to the beginning of the vector - * @param end A `const_iterator` to the end of the vector - * @param weight The weight, the gradients and Hessians should be multiplied by - */ - void addToRow(uint32 row, const_iterator begin, const_iterator end, float64 weight); - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/data/statistic_view_label_wise_sparse.hpp b/cpp/subprojects/boosting/include/boosting/data/statistic_view_label_wise_sparse.hpp deleted file mode 100644 index 86e872a7..00000000 --- a/cpp/subprojects/boosting/include/boosting/data/statistic_view_label_wise_sparse.hpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/matrix_sparse_set.hpp" -#include "common/data/tuple.hpp" - -namespace boosting { - - /** - * Implements row-wise read-only access to the gradients and Hessians that have been calculated using a label-wise - * decomposable loss function and are stored in a pre-allocated matrix in the list of lists (LIL) format. - */ - class SparseLabelWiseStatisticConstView { - protected: - - /** - * The number of columns in the view. - */ - const uint32 numCols_; - - /** - * A pointer to an object of type `SparseSetMatrix` that stores the gradients and Hessians. - */ - SparseSetMatrix>* statistics_; - - public: - - /** - * @param numCols The number of columns in the view - * @param statistics A pointer to an object of type `SparseSetMatrix` that stores the gradients and - * Hessians - */ - SparseLabelWiseStatisticConstView(uint32 numCols, SparseSetMatrix>* statistics); - - virtual ~SparseLabelWiseStatisticConstView() {}; - - /** - * Provides read-only access to a row. - */ - typedef SparseSetMatrix>::const_row const_row; - - /** - * An iterator that provides read-only access to the elements in the view. - */ - typedef const_row::const_iterator const_iterator; - - /** - * Returns a `const_iterator` to the beginning of a specific row. - * - * @param row The row - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin(uint32 row) const; - - /** - * Returns a `const_iterator` to the end of a specific row. - * - * @param row The row - * @return A `const_iterator` to the end - */ - const_iterator cend(uint32 row) const; - - /** - * Provides read-only access to a specific row. - * - * @param row The index of the row - * @return A `const_row` - */ - const_row operator[](uint32 row) const; - - /** - * Returns the number of rows in the view. - * - * @return The number of rows - */ - uint32 getNumRows() const; - - /** - * Returns the number of columns in the view. - * - * @return The number of columns - */ - uint32 getNumCols() const; - }; - - /** - * Implements row-wise read and write access to the gradients and Hessians that have been calculated using a - * label-wise decomposable loss function and are stored in a pre-allocated matrix in the list of lists (LIL) format. - */ - class SparseLabelWiseStatisticView : public SparseLabelWiseStatisticConstView { - public: - - /** - * @param numCols The number of columns in the view - * @param statistics A pointer to an object of type `SparseSetMatrix` that stores the gradients and - * Hessians - */ - SparseLabelWiseStatisticView(uint32 numCols, SparseSetMatrix>* statistics); - - virtual ~SparseLabelWiseStatisticView() override {}; - - /** - * Provides access to a row and allows to modify its elements. - */ - typedef SparseSetMatrix>::row row; - - /** - * Provides access to a specific row and allows to modify its elements. - * - * @param row The index of the row - * @return A `row` - */ - row operator[](uint32 row); - - /** - * Sets all gradients and Hessians in the matrix to zero. - */ - void clear(); - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/iterator/diagonal_iterator.hpp b/cpp/subprojects/boosting/include/boosting/iterator/diagonal_iterator.hpp deleted file mode 100644 index e2172778..00000000 --- a/cpp/subprojects/boosting/include/boosting/iterator/diagonal_iterator.hpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -namespace boosting { - - /** - * An iterator that provides read-only access to the elements that correspond to the diagonal of a C-contiguous - * matrix. - * - * @tparam T The type of the elements that are stored in the matrix - */ - template - class DiagonalConstIterator final { - private: - - const T* ptr_; - - uint32 index_; - - public: - - /** - * @param ptr A pointer to a C-contiguous array of type `float64` that stores the elements of the matrix - * @param index The index to start at - */ - DiagonalConstIterator(const T* ptr, uint32 index); - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef T value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef const T* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef const T& reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::random_access_iterator_tag iterator_category; - - /** - * Returns the element at a specific index. - * - * @param index The index of the element to be returned - * @return The element at the given index - */ - reference operator[](uint32 index) const; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const; - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator to the next element - */ - DiagonalConstIterator& operator++(); - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator to the next element - */ - DiagonalConstIterator& operator++(int n); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator to the previous element - */ - DiagonalConstIterator& operator--(); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator to the previous element - */ - DiagonalConstIterator& operator--(int n); - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const DiagonalConstIterator& rhs) const; - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const DiagonalConstIterator& rhs) const; - - /** - * Returns the difference between this iterator and another one. - * - * @param rhs A reference to another iterator - * @return The difference between the iterators - */ - difference_type operator-(const DiagonalConstIterator& rhs) const; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/learner.hpp b/cpp/subprojects/boosting/include/boosting/learner.hpp deleted file mode 100644 index e1184b4d..00000000 --- a/cpp/subprojects/boosting/include/boosting/learner.hpp +++ /dev/null @@ -1,1083 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "boosting/binning/feature_binning_auto.hpp" -#include "boosting/binning/label_binning_auto.hpp" -#include "boosting/binning/label_binning_equal_width.hpp" -#include "boosting/binning/label_binning_no.hpp" -#include "boosting/losses/loss_example_wise_logistic.hpp" -#include "boosting/losses/loss_example_wise_squared_error.hpp" -#include "boosting/losses/loss_example_wise_squared_hinge.hpp" -#include "boosting/losses/loss_label_wise_logistic.hpp" -#include "boosting/losses/loss_label_wise_squared_error.hpp" -#include "boosting/losses/loss_label_wise_squared_hinge.hpp" -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "boosting/multi_threading/parallel_rule_refinement_auto.hpp" -#include "boosting/multi_threading/parallel_statistic_update_auto.hpp" -#include "boosting/post_processing/shrinkage_constant.hpp" -#include "boosting/prediction/predictor_binary_auto.hpp" -#include "boosting/prediction/predictor_binary_example_wise.hpp" -#include "boosting/prediction/predictor_binary_gfm.hpp" -#include "boosting/prediction/predictor_binary_label_wise.hpp" -#include "boosting/prediction/predictor_probability_auto.hpp" -#include "boosting/prediction/predictor_probability_label_wise.hpp" -#include "boosting/prediction/predictor_probability_marginalized.hpp" -#include "boosting/prediction/predictor_score_label_wise.hpp" -#include "boosting/prediction/probability_calibration_isotonic.hpp" -#include "boosting/rule_evaluation/head_type_auto.hpp" -#include "boosting/rule_evaluation/head_type_complete.hpp" -#include "boosting/rule_evaluation/head_type_partial_dynamic.hpp" -#include "boosting/rule_evaluation/head_type_partial_fixed.hpp" -#include "boosting/rule_evaluation/head_type_single.hpp" -#include "boosting/rule_evaluation/regularization_manual.hpp" -#include "boosting/rule_evaluation/regularization_no.hpp" -#include "boosting/rule_model_assemblage/default_rule_auto.hpp" -#include "boosting/sampling/partition_sampling_auto.hpp" -#include "boosting/statistics/statistic_format.hpp" -#include "boosting/statistics/statistic_format_auto.hpp" -#include "boosting/statistics/statistic_format_dense.hpp" -#include "boosting/statistics/statistic_format_sparse.hpp" -#include "common/learner.hpp" - -namespace boosting { - - /** - * Defines an interface for all rule learners that make use of gradient boosting. - */ - class MLRLBOOSTING_API IBoostingRuleLearner : virtual public IRuleLearner { - public: - - /** - * Defines an interface for all classes that allow to configure a rule learner that makes use of gradient - * boosting. - */ - class IConfig : virtual public IRuleLearner::IConfig { - friend class AbstractBoostingRuleLearner; - - protected: - - /** - * Returns an unique pointer to the configuration of the rule heads that should be induced by the - * rule learner. - * - * @return A reference to an unique pointer of type `IHeadConfig` that stores the configuration of - * the rule heads - */ - virtual std::unique_ptr& getHeadConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the statistics that should be used by the rule - * learner. - * - * @return A reference to an unique pointer of type `IStatisticsConfig` that stores the - * configuration of the statistics - */ - virtual std::unique_ptr& getStatisticsConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the L1 regularization term. - * - * @return A reference to an unique pointer of type `IRegularizationConfig` that stores the - * configuration of the L1 regularization term - */ - virtual std::unique_ptr& getL1RegularizationConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the L2 regularization term. - * - * @return A reference to an unique pointer of type `IRegularizationConfig` that stores the - * configuration of the L2 regularization term - */ - virtual std::unique_ptr& getL2RegularizationConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the loss function. - * - * @return A reference to an unique pointer of type `ILossConfig` that stores the configuration of - * the loss function - */ - virtual std::unique_ptr& getLossConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the method for the assignment of labels to - * bins. - * - * @return A reference to an unique pointer of type `ILabelBinningConfig` that stores the - * configuration of the method for the assignment of labels to bins - */ - virtual std::unique_ptr& getLabelBinningConfigPtr() = 0; - - public: - - virtual ~IConfig() override {}; - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide - * whether a holdout set should be used or not. - */ - class IAutomaticPartitionSamplingMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticPartitionSamplingMixin() override {}; - - /** - * Configures the rule learner to automatically decide whether a holdout set should be used or not. - */ - virtual void useAutomaticPartitionSampling() { - std::unique_ptr& partitionSamplingConfigPtr = - this->getPartitionSamplingConfigPtr(); - partitionSamplingConfigPtr = std::make_unique( - this->getGlobalPruningConfigPtr(), this->getMarginalProbabilityCalibratorConfigPtr(), - this->getJointProbabilityCalibratorConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide - * whether a method for the assignment of numerical feature values to bins should be used or not. - */ - class IAutomaticFeatureBinningMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticFeatureBinningMixin() override {}; - - /** - * Configures the rule learner to automatically decide whether a method for the assignment of - * numerical feature values to bins should be used or not. - */ - virtual void useAutomaticFeatureBinning() { - std::unique_ptr& featureBinningConfigPtr = - this->getFeatureBinningConfigPtr(); - featureBinningConfigPtr = - std::make_unique(this->getParallelStatisticUpdateConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide - * whether multi-threading should be used for the parallel refinement of rules or not. - */ - class IAutomaticParallelRuleRefinementMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticParallelRuleRefinementMixin() override {}; - - /** - * Configures the rule learner to automatically decide whether multi-threading should be used for - * the parallel refinement of rules or not. - */ - virtual void useAutomaticParallelRuleRefinement() { - std::unique_ptr& parallelRuleRefinementConfigPtr = - this->getParallelRuleRefinementConfigPtr(); - parallelRuleRefinementConfigPtr = std::make_unique( - this->getLossConfigPtr(), this->getHeadConfigPtr(), this->getFeatureSamplingConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide - * whether multi-threading should be used for the parallel update of statistics or not. - */ - class IAutomaticParallelStatisticUpdateMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticParallelStatisticUpdateMixin() override {}; - - /** - * Configures the rule learner to automatically decide whether multi-threading should be used for - * the parallel update of statistics or not. - */ - virtual void useAutomaticParallelStatisticUpdate() { - std::unique_ptr& parallelStatisticUpdateConfigPtr = - this->getParallelStatisticUpdateConfigPtr(); - parallelStatisticUpdateConfigPtr = - std::make_unique(this->getLossConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a post processor that - * shrinks the weights fo rules by a constant "shrinkage" parameter. - */ - class IConstantShrinkageMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IConstantShrinkageMixin() override {}; - - /** - * Configures the rule learner to use a post processor that shrinks the weights of rules by a - * constant "shrinkage" parameter. - * - * @return A reference to an object of type `IConstantShrinkageConfig` that allows further - * configuration of the loss function - */ - virtual IConstantShrinkageConfig& useConstantShrinkagePostProcessor() { - std::unique_ptr& postProcessorConfigPtr = - this->getPostProcessorConfigPtr(); - std::unique_ptr ptr = std::make_unique(); - IConstantShrinkageConfig& ref = *ptr; - postProcessorConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use L1 regularization. - */ - class INoL1RegularizationMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~INoL1RegularizationMixin() override {}; - - /** - * Configures the rule learner to not use L1 regularization. - */ - virtual void useNoL1Regularization() { - std::unique_ptr& l1RegularizationConfigPtr = - this->getL1RegularizationConfigPtr(); - l1RegularizationConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use L1 regularization. - */ - class IL1RegularizationMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IL1RegularizationMixin() override {}; - - /** - * Configures the rule learner to use L1 regularization. - * - * @return A reference to an object of type `IManualRegularizationConfig` that allows further - * configuration of the regularization term - */ - virtual IManualRegularizationConfig& useL1Regularization() { - std::unique_ptr& l1RegularizationConfigPtr = - this->getL1RegularizationConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - IManualRegularizationConfig& ref = *ptr; - l1RegularizationConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use L2 regularization. - */ - class INoL2RegularizationMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~INoL2RegularizationMixin() override {}; - - /** - * Configures the rule learner to not use L2 regularization. - */ - virtual void useNoL2Regularization() { - std::unique_ptr& l2RegularizationConfigPtr = - this->getL2RegularizationConfigPtr(); - l2RegularizationConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use L2 regularization. - */ - class IL2RegularizationMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IL2RegularizationMixin() override {}; - - /** - * Configures the rule learner to use L2 regularization. - * - * @return A reference to an object of type `IManualRegularizationConfig` that allows further - * configuration of the regularization term - */ - virtual IManualRegularizationConfig& useL2Regularization() { - std::unique_ptr& l2RegularizationConfigPtr = - this->getL2RegularizationConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - IManualRegularizationConfig& ref = *ptr; - l2RegularizationConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not induce a default rule. - */ - class INoDefaultRuleMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~INoDefaultRuleMixin() override {}; - - /** - * Configures the rule learner to not induce a default rule. - */ - virtual void useNoDefaultRule() { - std::unique_ptr& defaultRuleConfigPtr = this->getDefaultRuleConfigPtr(); - defaultRuleConfigPtr = std::make_unique(false); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide - * whether a default rule should be induced or not. - */ - class IAutomaticDefaultRuleMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticDefaultRuleMixin() override {}; - - /** - * Configures the rule learner to automatically decide whether a default rule should be induced or - * not. - */ - virtual void useAutomaticDefaultRule() { - std::unique_ptr& defaultRuleConfigPtr = this->getDefaultRuleConfigPtr(); - defaultRuleConfigPtr = std::make_unique( - this->getStatisticsConfigPtr(), this->getLossConfigPtr(), this->getHeadConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to induce rules with complete - * heads that predict for all available labels. - */ - class ICompleteHeadMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~ICompleteHeadMixin() override {}; - - /** - * Configures the rule learner to induce rules with complete heads that predict for all available - * labels. - */ - virtual void useCompleteHeads() { - std::unique_ptr& headConfigPtr = this->getHeadConfigPtr(); - headConfigPtr = std::make_unique( - this->getLabelBinningConfigPtr(), this->getParallelStatisticUpdateConfigPtr(), - this->getL1RegularizationConfigPtr(), this->getL2RegularizationConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to induce rules with partial - * heads that predict for a predefined number of labels. - */ - class IFixedPartialHeadMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IFixedPartialHeadMixin() override {}; - - /** - * Configures the rule learner to induce rules with partial heads that predict for a predefined - * number of labels. - * - * @return A reference to an object of type `IFixedPartialHeadConfig` that allows further - * configuration of the rule heads - */ - virtual IFixedPartialHeadConfig& useFixedPartialHeads() { - std::unique_ptr& headConfigPtr = this->getHeadConfigPtr(); - std::unique_ptr ptr = std::make_unique( - this->getLabelBinningConfigPtr(), this->getParallelStatisticUpdateConfigPtr()); - IFixedPartialHeadConfig& ref = *ptr; - headConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to induce rules with partial - * heads that predict for a subset of the available labels that is determined dynamically. - */ - class IDynamicPartialHeadMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IDynamicPartialHeadMixin() override {}; - - /** - * Configures the rule learner to induce rules with partial heads that predict for a subset of the - * available labels that is determined dynamically. Only those labels for which the square of the - * predictive quality exceeds a certain threshold are included in a rule head. - * - * @return A reference to an object of type `IDynamicPartialHeadConfig` that allows further - * configuration of the rule heads - */ - virtual IDynamicPartialHeadConfig& useDynamicPartialHeads() { - std::unique_ptr& headConfigPtr = this->getHeadConfigPtr(); - std::unique_ptr ptr = std::make_unique( - this->getLabelBinningConfigPtr(), this->getParallelStatisticUpdateConfigPtr()); - IDynamicPartialHeadConfig& ref = *ptr; - headConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to induce rules with - * single-label heads that predict for a single label. - */ - class ISingleLabelHeadMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~ISingleLabelHeadMixin() override {}; - - /** - * Configures the rule learner to induce rules with single-label heads that predict for a single - * label. - */ - virtual void useSingleLabelHeads() { - std::unique_ptr& headConfigPtr = this->getHeadConfigPtr(); - headConfigPtr = std::make_unique( - this->getLabelBinningConfigPtr(), this->getParallelStatisticUpdateConfigPtr(), - this->getL1RegularizationConfigPtr(), this->getL2RegularizationConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide for - * the type of rule heads that should be used. - */ - class IAutomaticHeadMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticHeadMixin() override {}; - - /** - * Configures the rule learner to automatically decide for the type of rule heads that should be - * used. - */ - virtual void useAutomaticHeads() { - std::unique_ptr& headConfigPtr = this->getHeadConfigPtr(); - headConfigPtr = std::make_unique( - this->getLossConfigPtr(), this->getLabelBinningConfigPtr(), - this->getParallelStatisticUpdateConfigPtr(), this->getL1RegularizationConfigPtr(), - this->getL2RegularizationConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a dense representation - * of gradients and Hessians. - */ - class IDenseStatisticsMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IDenseStatisticsMixin() override {}; - - /** - * Configures the rule learner to use a dense representation of gradients and Hessians. - */ - virtual void useDenseStatistics() { - std::unique_ptr& statisticsConfigPtr = this->getStatisticsConfigPtr(); - statisticsConfigPtr = std::make_unique(this->getLossConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a sparse - * representation of gradients and Hessians, if possible. - */ - class ISparseStatisticsMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~ISparseStatisticsMixin() override {}; - - /** - * Configures the rule learner to use a sparse representation of gradients and Hessians, if - * possible. - */ - virtual void useSparseStatistics() { - std::unique_ptr& statisticsConfigPtr = this->getStatisticsConfigPtr(); - statisticsConfigPtr = std::make_unique(this->getLossConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide - * whether a dense or sparse representation of gradients and Hessians should be used. - */ - class IAutomaticStatisticsMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticStatisticsMixin() override {}; - - /** - * Configures the rule learner to automatically decide whether a dense or sparse representation of - * gradients and Hessians should be used. - */ - virtual void useAutomaticStatistics() { - std::unique_ptr& statisticsConfigPtr = this->getStatisticsConfigPtr(); - statisticsConfigPtr = std::make_unique( - this->getLossConfigPtr(), this->getHeadConfigPtr(), this->getDefaultRuleConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a loss function that - * implements a multi-label variant of the logistic loss that is applied example-wise. - */ - class IExampleWiseLogisticLossMixin : virtual public IBoostingRuleLearner::IConfig { - public: - - virtual ~IExampleWiseLogisticLossMixin() override {}; - - /** - * Configures the rule learner to use a loss function that implements a multi-label variant of the - * logistic loss that is applied example-wise. - */ - virtual void useExampleWiseLogisticLoss() { - std::unique_ptr& lossConfigPtr = this->getLossConfigPtr(); - lossConfigPtr = std::make_unique(this->getHeadConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a loss function that - * implements a multi-label variant of the squared error loss that is applied example-wise. - */ - class IExampleWiseSquaredErrorLossMixin : virtual public IBoostingRuleLearner::IConfig { - public: - - virtual ~IExampleWiseSquaredErrorLossMixin() override {}; - - /** - * Configures the rule learner to use a loss function that implements a multi-label variant of the - * squared error loss that is applied example-wise. - */ - virtual void useExampleWiseSquaredErrorLoss() { - std::unique_ptr& lossConfigPtr = this->getLossConfigPtr(); - lossConfigPtr = std::make_unique(this->getHeadConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a loss function that - * implements a multi-label variant of the squared hinge loss that is applied example-wise. - */ - class IExampleWiseSquaredHingeLossMixin : virtual public IBoostingRuleLearner::IConfig { - public: - - virtual ~IExampleWiseSquaredHingeLossMixin() override {}; - - /** - * Configures the rule learner to use a loss function that implements a multi-label variant of the - * squared hinge loss that is applied example-wise. - */ - virtual void useExampleWiseSquaredHingeLoss() { - std::unique_ptr& lossConfigPtr = this->getLossConfigPtr(); - lossConfigPtr = std::make_unique(this->getHeadConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a loss function that - * implements a multi-label variant of the logistic loss that is applied label-wise. - */ - class ILabelWiseLogisticLossMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~ILabelWiseLogisticLossMixin() override {}; - - /** - * Configures the rule learner to use a loss function that implements a multi-label variant of the - * logistic loss that is applied label-wise. - */ - virtual void useLabelWiseLogisticLoss() { - std::unique_ptr& lossConfigPtr = this->getLossConfigPtr(); - lossConfigPtr = std::make_unique(this->getHeadConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a loss function that - * implements a multi-label variant of the squared error loss that is applied label-wise. - */ - class ILabelWiseSquaredErrorLossMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~ILabelWiseSquaredErrorLossMixin() override {}; - - /** - * Configures the rule learner to use a loss function that implements a multi-label variant of the - * squared error loss that is applied label-wise. - */ - virtual void useLabelWiseSquaredErrorLoss() { - std::unique_ptr& lossConfigPtr = this->getLossConfigPtr(); - lossConfigPtr = std::make_unique(this->getHeadConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a loss function that - * implements a multi-label variant of the squared hinge loss that is applied label-wise. - */ - class ILabelWiseSquaredHingeLossMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~ILabelWiseSquaredHingeLossMixin() override {}; - - /** - * Configures the rule learner to use a loss function that implements a multi-label variant of the - * squared hinge loss that is applied label-wise. - */ - virtual void useLabelWiseSquaredHingeLoss() { - std::unique_ptr& lossConfigPtr = this->getLossConfigPtr(); - lossConfigPtr = std::make_unique(this->getHeadConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use any method for the - * assignment of labels to bins. - */ - class INoLabelBinningMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~INoLabelBinningMixin() override {}; - - /** - * Configures the rule learner to not use any method for the assignment of labels to bins. - */ - virtual void useNoLabelBinning() { - std::unique_ptr& labelBinningConfigPtr = this->getLabelBinningConfigPtr(); - labelBinningConfigPtr = std::make_unique( - this->getL1RegularizationConfigPtr(), this->getL2RegularizationConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a method for the - * assignment of labels to bins. - */ - class IEqualWidthLabelBinningMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IEqualWidthLabelBinningMixin() override {}; - - /** - * Configures the rule learner to use a method for the assignment of labels to bins in a way such - * that each bin contains labels for which the predicted score is expected to belong to the same - * value range. - * - * @return A reference to an object of type `IEqualWidthLabelBinningConfig` that allows further - * configuration of the method for the assignment of labels to bins - */ - virtual IEqualWidthLabelBinningConfig& useEqualWidthLabelBinning() { - std::unique_ptr& labelBinningConfigPtr = this->getLabelBinningConfigPtr(); - std::unique_ptr ptr = - std::make_unique(this->getL1RegularizationConfigPtr(), - this->getL2RegularizationConfigPtr()); - IEqualWidthLabelBinningConfig& ref = *ptr; - labelBinningConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide - * whether a method for the assignment of labels to bins should be used or not. - */ - class IAutomaticLabelBinningMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticLabelBinningMixin() override {}; - - /** - * Configures the rule learner to automatically decide whether a method for the assignment of labels - * to bins should be used or not. - */ - virtual void useAutomaticLabelBinning() { - std::unique_ptr& labelBinningConfigPtr = this->getLabelBinningConfigPtr(); - labelBinningConfigPtr = std::make_unique( - this->getL1RegularizationConfigPtr(), this->getL2RegularizationConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to calibrate marginal - * probabilities via isotonic regression. - * - */ - class IIsotonicMarginalProbabilityCalibrationMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IIsotonicMarginalProbabilityCalibrationMixin() override {}; - - /** - * Configures the rule learner to calibrate marginal probabilities via isotonic regression. - * - * @return A reference to an object of type `IIsotonicMarginalProbabilityCalibratorConfig` that - * allows further configuration of the calibrator - */ - virtual IIsotonicMarginalProbabilityCalibratorConfig& useIsotonicMarginalProbabilityCalibration() { - std::unique_ptr& marginalProbabilityCalibratorConfigPtr = - this->getMarginalProbabilityCalibratorConfigPtr(); - std::unique_ptr ptr = - std::make_unique(this->getLossConfigPtr()); - IIsotonicMarginalProbabilityCalibratorConfig& ref = *ptr; - marginalProbabilityCalibratorConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to calibrate joint - * probabilities via isotonic regression. - */ - class IIsotonicJointProbabilityCalibrationMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IIsotonicJointProbabilityCalibrationMixin() override {}; - - /** - * Configures the rule learner to calibrate joint probabilities via isotonic regression. - * - * @return A reference to an object of type `IIsotonicJointProbabilityCalibratorConfig` that allows - * further configuration of the calibrator - */ - virtual IIsotonicJointProbabilityCalibratorConfig& useIsotonicJointProbabilityCalibration() { - std::unique_ptr& jointProbabilityCalibratorConfigPtr = - this->getJointProbabilityCalibratorConfigPtr(); - std::unique_ptr ptr = - std::make_unique(this->getLossConfigPtr()); - IIsotonicJointProbabilityCalibratorConfig& ref = *ptr; - jointProbabilityCalibratorConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a predictor that - * predicts whether individual labels of given query examples are relevant or irrelevant by discretizing the - * regression scores or probability estimates that are predicted for each label individually. - */ - class ILabelWiseBinaryPredictorMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~ILabelWiseBinaryPredictorMixin() override {}; - - /** - * Configures the rule learner to use a predictor that predicts whether individual labels of given - * query examples are relevant or irrelevant by discretizing the regression scores or probability - * estimates that are predicted for each label individually. - * - * @return A reference to an object of type `ILabelWiseBinaryPredictorConfig` that allows further - * configuration of the predictor - */ - virtual ILabelWiseBinaryPredictorConfig& useLabelWiseBinaryPredictor() { - std::unique_ptr& binaryPredictorConfigPtr = - this->getBinaryPredictorConfigPtr(); - std::unique_ptr ptr = - std::make_unique(this->getLossConfigPtr(), - this->getParallelPredictionConfigPtr()); - ILabelWiseBinaryPredictorConfig& ref = *ptr; - binaryPredictorConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a predictor that - * predicts known label vectors for given query examples by comparing the predicted regression scores or - * probability estimates to the label vectors encountered in the training data. - */ - class IExampleWiseBinaryPredictorMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IExampleWiseBinaryPredictorMixin() override {}; - - /** - * Configures the rule learner to use a predictor that predicts known label vectors for given query - * examples by comparing the predicted regression scores or probability estimates to the label - * vectors encountered in the training data. - * - * @return A reference to an object of type `IExampleWiseBinaryPredictorConfig` that allows further - * configuration of the predictor - */ - virtual IExampleWiseBinaryPredictorConfig& useExampleWiseBinaryPredictor() { - std::unique_ptr& binaryPredictorConfigPtr = - this->getBinaryPredictorConfigPtr(); - std::unique_ptr ptr = - std::make_unique(this->getLossConfigPtr(), - this->getParallelPredictionConfigPtr()); - IExampleWiseBinaryPredictorConfig& ref = *ptr; - binaryPredictorConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a predictor that - * predicts whether individual labels of given query examples are relevant or irrelevant by discretizing the - * regression scores or probability estimates that are predicted for each label according to the general - * F-measure maximizer (GFM). - */ - class IGfmBinaryPredictorMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IGfmBinaryPredictorMixin() override {}; - - /** - * Configures the rule learner to use a predictor that predicts whether individual labels of given - * query examples are relevant or irrelevant by discretizing the regression scores or probability - * estimates that are predicted for each label according to the general F-measure maximizer (GFM). - * - * @return A reference to an object of type `IGfmBinaryPredictorConfig` that allows further - * configuration of the predictor - */ - virtual IGfmBinaryPredictorConfig& useGfmBinaryPredictor() { - std::unique_ptr& binaryPredictorConfigPtr = - this->getBinaryPredictorConfigPtr(); - std::unique_ptr ptr = std::make_unique( - this->getLossConfigPtr(), this->getParallelPredictionConfigPtr()); - IGfmBinaryPredictorConfig& ref = *ptr; - binaryPredictorConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide for a - * predictor for predicting whether individual labels are relevant or irrelevant. - */ - class IAutomaticBinaryPredictorMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticBinaryPredictorMixin() override {}; - - /** - * Configures the rule learner to automatically decide for a predictor for predicting whether - * individual labels are relevant or irrelevant. - */ - virtual void useAutomaticBinaryPredictor() { - std::unique_ptr& binaryPredictorConfigPtr = - this->getBinaryPredictorConfigPtr(); - binaryPredictorConfigPtr = std::make_unique( - this->getLossConfigPtr(), this->getParallelPredictionConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a predictor that - * predicts label-wise regression scores for given query examples by summing up the scores that are provided - * by individual rules for each label individually. - */ - class ILabelWiseScorePredictorMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~ILabelWiseScorePredictorMixin() override {}; - - /** - * Configures the rule learner to use a predictor that predicts label-wise regression scores for - * given query examples by summing up the scores that are provided by individual rules for each - * label individually. - */ - virtual void useLabelWiseScorePredictor() { - std::unique_ptr& scorePredictorConfigPtr = - this->getScorePredictorConfigPtr(); - scorePredictorConfigPtr = - std::make_unique(this->getParallelPredictionConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a predictor that - * predicts label-wise probabilities for given query examples by transforming the regression scores that are - * predicted for each label individually into probabilities. - */ - class ILabelWiseProbabilityPredictorMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~ILabelWiseProbabilityPredictorMixin() override {}; - - /** - * Configures the rule learner to use a predictor that predicts label-wise probabilities for given - * query examples by transforming the regression scores that are predicted for each label - * individually into probabilities. - * - * @return A reference to an object of type `ILabelWiseProbabilityPredictorConfig` that allows - * further configuration of the predictor - */ - virtual ILabelWiseProbabilityPredictorConfig& useLabelWiseProbabilityPredictor() { - std::unique_ptr& probabilityPredictorConfigPtr = - this->getProbabilityPredictorConfigPtr(); - std::unique_ptr ptr = - std::make_unique(this->getLossConfigPtr(), - this->getParallelPredictionConfigPtr()); - ILabelWiseProbabilityPredictorConfig& ref = *ptr; - probabilityPredictorConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use predictor that - * predicts label-wise probabilities for given query examples by marginalizing over the joint probabilities - * of known label vectors. - */ - class IMarginalizedProbabilityPredictorMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IMarginalizedProbabilityPredictorMixin() override {}; - - /** - * Configures the rule learner to use a predictor that predicts label-wise probabilities for given - * query examples by marginalizing over the joint probabilities of known label vectors. - * - * @return A reference to an object of type `IMarginalizedProbabilityPredictorConfig` that allows - * further configuration of the predictor - */ - virtual IMarginalizedProbabilityPredictorConfig& useMarginalizedProbabilityPredictor() { - std::unique_ptr& probabilityPredictorConfigPtr = - this->getProbabilityPredictorConfigPtr(); - std::unique_ptr ptr = - std::make_unique( - this->getLossConfigPtr(), this->getParallelPredictionConfigPtr()); - IMarginalizedProbabilityPredictorConfig& ref = *ptr; - probabilityPredictorConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to automatically decide for a - * predictor for predicting probability estimates. - */ - class IAutomaticProbabilityPredictorMixin : public virtual IBoostingRuleLearner::IConfig { - public: - - virtual ~IAutomaticProbabilityPredictorMixin() override {}; - - /** - * Configures the rule learner to automatically decide for a predictor for predicting probability - * estimates. - */ - virtual void useAutomaticProbabilityPredictor() { - std::unique_ptr& probabilityPredictorConfigPtr = - this->getProbabilityPredictorConfigPtr(); - probabilityPredictorConfigPtr = std::make_unique( - this->getLossConfigPtr(), this->getParallelPredictionConfigPtr()); - } - }; - - virtual ~IBoostingRuleLearner() override {}; - }; - - /** - * An abstract base class for all rule learners that makes use of gradient boosting. - */ - class AbstractBoostingRuleLearner : public AbstractRuleLearner, - virtual public IBoostingRuleLearner { - public: - - /** - * Allows to configure a rule learner that makes use of gradient boosting. - */ - class Config : public AbstractRuleLearner::Config, - virtual public IBoostingRuleLearner::IConfig { - protected: - - /** - * An unique pointer that stores the configuration of the rule heads. - */ - std::unique_ptr headConfigPtr_; - - /** - * An unique pointer that stores the configuration of the statistics. - */ - std::unique_ptr statisticsConfigPtr_; - - /** - * An unique pointer that stores the configuration of the loss function. - */ - std::unique_ptr lossConfigPtr_; - - /** - * An unique pointer that stores the configuration of the L1 regularization term. - */ - std::unique_ptr l1RegularizationConfigPtr_; - - /** - * An unique pointer that stores the configuration of the L2 regularization term. - */ - std::unique_ptr l2RegularizationConfigPtr_; - - /** - * An unique pointer that stores the configuration of the method that is used to assign labels to - * bins. - */ - std::unique_ptr labelBinningConfigPtr_; - - private: - - std::unique_ptr& getHeadConfigPtr() override final; - - std::unique_ptr& getStatisticsConfigPtr() override final; - - std::unique_ptr& getL1RegularizationConfigPtr() override final; - - std::unique_ptr& getL2RegularizationConfigPtr() override final; - - std::unique_ptr& getLossConfigPtr() override final; - - std::unique_ptr& getLabelBinningConfigPtr() override final; - - public: - - Config(); - }; - - private: - - IBoostingRuleLearner::IConfig& config_; - - const Blas blas_; - - const Lapack lapack_; - - protected: - - /** - * @see `AbstractRuleLearner::createStatisticsProviderFactory` - */ - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix) const override; - - /** - * @see `AbstractRuleLearner::createModelBuilderFactory` - */ - std::unique_ptr createModelBuilderFactory() const override; - - public: - - /** - * @param config A reference to an object of type `IBoostingRuleLearner::IConfig` that specifies the - * configuration that should be used by the rule learner - * @param ddotFunction A function pointer to BLAS' DDOT routine - * @param dspmvFunction A function pointer to BLAS' DSPMV routine - * @param dsysvFunction A function pointer to LAPACK'S DSYSV routine - */ - AbstractBoostingRuleLearner(IBoostingRuleLearner::IConfig& config, Blas::DdotFunction ddotFunction, - Blas::DspmvFunction dspmvFunction, Lapack::DsysvFunction dsysvFunction); - }; - -} - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/boosting/include/boosting/learner_boomer.hpp b/cpp/subprojects/boosting/include/boosting/learner_boomer.hpp deleted file mode 100644 index cd169779..00000000 --- a/cpp/subprojects/boosting/include/boosting/learner_boomer.hpp +++ /dev/null @@ -1,177 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "boosting/learner.hpp" - -namespace boosting { - - /** - * Defines the interface of the BOOMER algorithm. - */ - class MLRLBOOSTING_API IBoomer : virtual public IBoostingRuleLearner { - public: - - /** - * Defines the interface for configuring the BOOMER algorithm. - */ - class IConfig : virtual public IBoostingRuleLearner::IConfig, - virtual public IBoostingRuleLearner::IAutomaticPartitionSamplingMixin, - virtual public IBoostingRuleLearner::IAutomaticFeatureBinningMixin, - virtual public IBoostingRuleLearner::IAutomaticParallelRuleRefinementMixin, - virtual public IBoostingRuleLearner::IAutomaticParallelStatisticUpdateMixin, - virtual public IBoostingRuleLearner::IConstantShrinkageMixin, - virtual public IBoostingRuleLearner::INoL1RegularizationMixin, - virtual public IBoostingRuleLearner::IL1RegularizationMixin, - virtual public IBoostingRuleLearner::INoL2RegularizationMixin, - virtual public IBoostingRuleLearner::IL2RegularizationMixin, - virtual public IBoostingRuleLearner::INoDefaultRuleMixin, - virtual public IBoostingRuleLearner::IAutomaticDefaultRuleMixin, - virtual public IBoostingRuleLearner::ICompleteHeadMixin, - virtual public IBoostingRuleLearner::IDynamicPartialHeadMixin, - virtual public IBoostingRuleLearner::IFixedPartialHeadMixin, - virtual public IBoostingRuleLearner::ISingleLabelHeadMixin, - virtual public IBoostingRuleLearner::IAutomaticHeadMixin, - virtual public IBoostingRuleLearner::IDenseStatisticsMixin, - virtual public IBoostingRuleLearner::ISparseStatisticsMixin, - virtual public IBoostingRuleLearner::IAutomaticStatisticsMixin, - virtual public IBoostingRuleLearner::IExampleWiseLogisticLossMixin, - virtual public IBoostingRuleLearner::IExampleWiseSquaredErrorLossMixin, - virtual public IBoostingRuleLearner::IExampleWiseSquaredHingeLossMixin, - virtual public IBoostingRuleLearner::ILabelWiseLogisticLossMixin, - virtual public IBoostingRuleLearner::ILabelWiseSquaredErrorLossMixin, - virtual public IBoostingRuleLearner::ILabelWiseSquaredHingeLossMixin, - virtual public IBoostingRuleLearner::INoLabelBinningMixin, - virtual public IBoostingRuleLearner::IEqualWidthLabelBinningMixin, - virtual public IBoostingRuleLearner::IAutomaticLabelBinningMixin, - virtual public IBoostingRuleLearner::IIsotonicMarginalProbabilityCalibrationMixin, - virtual public IBoostingRuleLearner::IIsotonicJointProbabilityCalibrationMixin, - virtual public IBoostingRuleLearner::ILabelWiseBinaryPredictorMixin, - virtual public IBoostingRuleLearner::IExampleWiseBinaryPredictorMixin, - virtual public IBoostingRuleLearner::IGfmBinaryPredictorMixin, - virtual public IBoostingRuleLearner::IAutomaticBinaryPredictorMixin, - virtual public IBoostingRuleLearner::ILabelWiseScorePredictorMixin, - virtual public IBoostingRuleLearner::ILabelWiseProbabilityPredictorMixin, - virtual public IBoostingRuleLearner::IMarginalizedProbabilityPredictorMixin, - virtual public IBoostingRuleLearner::IAutomaticProbabilityPredictorMixin, - virtual public IRuleLearner::ISequentialRuleModelAssemblageMixin, - virtual public IRuleLearner::IDefaultRuleMixin, - virtual public IRuleLearner::IGreedyTopDownRuleInductionMixin, - virtual public IRuleLearner::IBeamSearchTopDownRuleInductionMixin, - virtual public IRuleLearner::INoPostProcessorMixin, - virtual public IRuleLearner::INoFeatureBinningMixin, - virtual public IRuleLearner::IEqualWidthFeatureBinningMixin, - virtual public IRuleLearner::IEqualFrequencyFeatureBinningMixin, - virtual public IRuleLearner::INoLabelSamplingMixin, - virtual public IRuleLearner::IRoundRobinLabelSamplingMixin, - virtual public IRuleLearner::ILabelSamplingWithoutReplacementMixin, - virtual public IRuleLearner::INoInstanceSamplingMixin, - virtual public IRuleLearner::IInstanceSamplingWithoutReplacementMixin, - virtual public IRuleLearner::IInstanceSamplingWithReplacementMixin, - virtual public IRuleLearner::ILabelWiseStratifiedInstanceSamplingMixin, - virtual public IRuleLearner::IExampleWiseStratifiedInstanceSamplingMixin, - virtual public IRuleLearner::INoFeatureSamplingMixin, - virtual public IRuleLearner::IFeatureSamplingWithoutReplacementMixin, - virtual public IRuleLearner::INoPartitionSamplingMixin, - virtual public IRuleLearner::IRandomBiPartitionSamplingMixin, - virtual public IRuleLearner::ILabelWiseStratifiedBiPartitionSamplingMixin, - virtual public IRuleLearner::IExampleWiseStratifiedBiPartitionSamplingMixin, - virtual public IRuleLearner::INoRulePruningMixin, - virtual public IRuleLearner::IIrepRulePruningMixin, - virtual public IRuleLearner::INoParallelRuleRefinementMixin, - virtual public IRuleLearner::IParallelRuleRefinementMixin, - virtual public IRuleLearner::INoParallelStatisticUpdateMixin, - virtual public IRuleLearner::IParallelStatisticUpdateMixin, - virtual public IRuleLearner::INoParallelPredictionMixin, - virtual public IRuleLearner::IParallelPredictionMixin, - virtual public IRuleLearner::INoSizeStoppingCriterionMixin, - virtual public IRuleLearner::ISizeStoppingCriterionMixin, - virtual public IRuleLearner::INoTimeStoppingCriterionMixin, - virtual public IRuleLearner::ITimeStoppingCriterionMixin, - virtual public IRuleLearner::IPrePruningMixin, - virtual public IRuleLearner::INoGlobalPruningMixin, - virtual public IRuleLearner::IPostPruningMixin, - virtual public IRuleLearner::INoSequentialPostOptimizationMixin, - virtual public IRuleLearner::ISequentialPostOptimizationMixin, - virtual public IRuleLearner::INoMarginalProbabilityCalibrationMixin, - virtual public IRuleLearner::INoJointProbabilityCalibrationMixin { - public: - - virtual ~IConfig() override {}; - }; - - virtual ~IBoomer() override {}; - }; - - /** - * The BOOMER algorithm. - */ - class Boomer final : public AbstractBoostingRuleLearner, - virtual public IBoomer { - public: - - /** - * Allows to configure the BOOMER algorithm. - */ - class Config final : public AbstractBoostingRuleLearner::Config, - virtual public IBoomer::IConfig { - public: - - Config(); - - /** - * @see `IRuleLearner::ISizeStoppingCriterionMixin::useSizeStoppingCriterion` - */ - ISizeStoppingCriterionConfig& useSizeStoppingCriterion() override; - }; - - private: - - const std::unique_ptr configPtr_; - - public: - - /** - * @param configPtr An unique pointer to an object of type `IBoomer::IConfig` that specifies the - * configuration that should be used by the rule learner - * @param ddotFunction A function pointer to BLAS' DDOT routine - * @param dspmvFunction A function pointer to BLAS' DSPMV routine - * @param dsysvFunction A function pointer to LAPACK'S DSYSV routine - */ - Boomer(std::unique_ptr configPtr, Blas::DdotFunction ddotFunction, - Blas::DspmvFunction dspmvFunction, Lapack::DsysvFunction dsysvFunction); - }; - - /** - * Creates and returns a new object of type `IBoomer::IConfig`. - * - * @return An unique pointer to an object of type `IBoomer::IConfig` that has been created - */ - MLRLBOOSTING_API std::unique_ptr createBoomerConfig(); - - /** - * Creates and returns a new object of type `IBoomer`. - * - * @param configPtr An unique pointer to an object of type `IBoomer::IConfig` that specifies the configuration - * that should be used by the rule learner - * @param ddotFunction A function pointer to BLAS' DDOT routine - * @param dspmvFunction A function pointer to BLAS' DSPMV routine - * @param dsysvFunction A function pointer to LAPACK'S DSYSV routine - * @return An unique pointer to an object of type `IBoomer` that has been created - */ - MLRLBOOSTING_API std::unique_ptr createBoomer(std::unique_ptr configPtr, - Blas::DdotFunction ddotFunction, - Blas::DspmvFunction dspmvFunction, - Lapack::DsysvFunction dsysvFunction); - -} - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss.hpp deleted file mode 100644 index 888519a2..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "boosting/prediction/probability_function_joint.hpp" -#include "boosting/prediction/probability_function_marginal.hpp" -#include "common/input/feature_matrix.hpp" -#include "common/input/label_matrix_row_wise.hpp" -#include "common/measures/measure_distance.hpp" -#include "common/measures/measure_evaluation.hpp" -#include "common/statistics/statistics_provider.hpp" - -namespace boosting { - - /** - * Defines an interface for all loss functions. - */ - class ILoss : public IEvaluationMeasure, - public IDistanceMeasure { - public: - - virtual ~ILoss() override {}; - }; - - /** - * Defines an interface for all classes that allow to configure a loss function. - */ - class ILossConfig { - public: - - virtual ~ILossConfig() {}; - - /** - * Creates and returns a new object of type `IStatisticsProviderFactory` according to the specified - * configuration. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access - * to the feature values of the training examples - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides - * access to the labels of the training examples - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK - * routines - * @param preferSparseStatistics True, if a sparse representation of statistics should be preferred, if - * possible, false otherwise - * @return An unique pointer to an object of type `IStatisticsProviderFactory` that - * has been created - */ - virtual std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const = 0; - - /** - * Creates and returns a new object of type `IEvaluationMeasureFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `IEvaluationMeasureFactory` that has been created - */ - virtual std::unique_ptr createEvaluationMeasureFactory() const = 0; - - /** - * Creates and returns a new object of type `IDistanceMeasureFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `IDistanceMeasureFactory` that has been created - */ - virtual std::unique_ptr createDistanceMeasureFactory() const = 0; - - /** - * Creates and returns a new object of type `IMarginalProbabilityFunctionFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `IMarginalProbabilityFunctionFactory` that has been - * created or a null pointer, if the loss function does not support the prediction of marginal - * probabilities - */ - virtual std::unique_ptr createMarginalProbabilityFunctionFactory() - const = 0; - - /** - * Creates and returns a new object of type `IJointProbabilityFunctionFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `IJointProbabilityFunctionFactory` that has been created - * to a null pointer, if the loss function does not support the prediction of joint probabilities - */ - virtual std::unique_ptr createJointProbabilityFunctionFactory() const = 0; - - /** - * Returns whether the loss function is decomposable or not. - * - * @return True, if the loss function is decomposable, false otherwise - */ - virtual bool isDecomposable() const = 0; - - /** - * Returns whether the loss function supports to use a sparse format for storing statistics or not. - * - * @return True, if the loss function supports to use a sparse format for storing statistics, false - * otherwise - */ - virtual bool isSparse() const = 0; - - /** - * Returns the default prediction for an example that is not covered by any rules. - * - * @return The default prediction - */ - virtual float64 getDefaultPrediction() const = 0; - }; - -}; diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise.hpp deleted file mode 100644 index 2055fa32..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise.hpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_view_example_wise_dense.hpp" -#include "boosting/losses/loss_label_wise.hpp" - -namespace boosting { - - /** - * Defines an interface for all (non-decomposable) loss functions that are applied example-wise. - */ - class IExampleWiseLoss : public ILabelWiseLoss { - public: - - virtual ~IExampleWiseLoss() override {}; - - /** - * Updates the statistics of the example at a specific index. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `CContiguousConstView` that provides random access - * to the labels of the training examples - * @param scoreMatrix A reference to an object of type `CContiguousConstView` that stores the currently - * predicted scores - * @param statisticView A reference to an object of type `DenseExampleWiseStatisticView` to be updated - */ - virtual void updateExampleWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - DenseExampleWiseStatisticView& statisticView) const = 0; - - /** - * Updates the statistics of the example at a specific index. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` that provides row-wise access - * to the labels of the training examples - * @param scoreMatrix A reference to an object of type `CContiguousConstView` that stores the currently - * predicted scores - * @param statisticView A reference to an object of type `DenseExampleWiseStatisticView` to be updated - */ - virtual void updateExampleWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - DenseExampleWiseStatisticView& statisticView) const = 0; - }; - - /** - * Defines an interface for all factories that allow to create instances of the type `IExampleWiseLoss`. - */ - class IExampleWiseLossFactory : public ILabelWiseLossFactory { - public: - - virtual ~IExampleWiseLossFactory() override {}; - - /** - * Creates and returns a new object of type `IExampleWiseLoss`. - * - * @return An unique pointer to an object of type `IExampleWiseLoss` that has been created - */ - virtual std::unique_ptr createExampleWiseLoss() const = 0; - - std::unique_ptr createLabelWiseLoss() const override final { - return this->createExampleWiseLoss(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a (non-decomposable) loss function that is applied - * example-wise. - */ - class IExampleWiseLossConfig : public ILossConfig { - public: - - virtual ~IExampleWiseLossConfig() override {}; - - /** - * Creates and returns a new object of type `IExampleWiseLossFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `IExampleWiseLossFactory` that has been created - */ - virtual std::unique_ptr createExampleWiseLossFactory() const = 0; - - std::unique_ptr createEvaluationMeasureFactory() const override final { - return this->createExampleWiseLossFactory(); - } - - std::unique_ptr createDistanceMeasureFactory() const override final { - return this->createExampleWiseLossFactory(); - } - - bool isDecomposable() const override final { - return false; - } - - bool isSparse() const override { - return true; - } - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_logistic.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_logistic.hpp deleted file mode 100644 index edcf1bcd..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_logistic.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss_example_wise.hpp" -#include "boosting/rule_evaluation/head_type.hpp" - -namespace boosting { - - /** - * Allows to configure a loss function that implements a multi-label variant of the logistic loss that is applied - * example-wise. - */ - class ExampleWiseLogisticLossConfig final : public IExampleWiseLossConfig { - private: - - const std::unique_ptr& headConfigPtr_; - - public: - - /** - * @param headConfigPtr A reference to an unique pointer that stores the configuration of rule heads - */ - ExampleWiseLogisticLossConfig(const std::unique_ptr& headConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const override; - - std::unique_ptr createMarginalProbabilityFunctionFactory() - const override; - - std::unique_ptr createJointProbabilityFunctionFactory() const override; - - float64 getDefaultPrediction() const override; - - std::unique_ptr createExampleWiseLossFactory() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_squared_error.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_squared_error.hpp deleted file mode 100644 index 0e09d3c9..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_squared_error.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss_example_wise.hpp" -#include "boosting/rule_evaluation/head_type.hpp" - -namespace boosting { - - /** - * Allows to configure a loss function that implements a multi-label variant of the squared error loss that is - * applied example-wise. - */ - class ExampleWiseSquaredErrorLossConfig final : public IExampleWiseLossConfig { - private: - - const std::unique_ptr& headConfigPtr_; - - public: - - /** - * @param headConfigPtr A reference to an unique pointer that stores the configuration of rule heads - */ - ExampleWiseSquaredErrorLossConfig(const std::unique_ptr& headConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const override; - - std::unique_ptr createMarginalProbabilityFunctionFactory() - const override; - - std::unique_ptr createJointProbabilityFunctionFactory() const override; - - float64 getDefaultPrediction() const override; - - std::unique_ptr createExampleWiseLossFactory() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_squared_hinge.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_squared_hinge.hpp deleted file mode 100644 index c690ee29..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss_example_wise_squared_hinge.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss_example_wise.hpp" -#include "boosting/rule_evaluation/head_type.hpp" - -namespace boosting { - - /** - * Allows to configure a loss function that implements a multi-label variant of the squared hinge loss that is - * applied example-wise. - */ - class ExampleWiseSquaredHingeLossConfig final : public IExampleWiseLossConfig { - private: - - const std::unique_ptr& headConfigPtr_; - - public: - - /** - * @param headConfigPtr A reference to an unique pointer that stores the configuration of rule heads - */ - ExampleWiseSquaredHingeLossConfig(const std::unique_ptr& headConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const override; - - std::unique_ptr createMarginalProbabilityFunctionFactory() - const override; - - std::unique_ptr createJointProbabilityFunctionFactory() const override; - - float64 getDefaultPrediction() const override; - - std::unique_ptr createExampleWiseLossFactory() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise.hpp deleted file mode 100644 index c4bc34c6..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise.hpp +++ /dev/null @@ -1,167 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_view_label_wise_dense.hpp" -#include "boosting/losses/loss.hpp" -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" - -namespace boosting { - - /** - * Defines an interface for all (decomposable) loss functions that are applied label-wise. - */ - class ILabelWiseLoss : public ILoss { - public: - - virtual ~ILabelWiseLoss() override {}; - - /** - * Updates the statistics of the example at a specific index, considering only the labels, whose indices are - * provided by a `CompleteIndexVector`. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `CContiguousConstView` that provides random - * access to the labels of the training examples - * @param scoreMatrix A reference to an object of type `CContiguousConstView` that stores the - * currently predicted scores - * @param labelIndicesBegin A `CompleteIndexVector::const_iterator` to the beginning of the label indices - * @param labelIndicesEnd A `CompleteIndexVector::const_iterator` to the end of the label indices - * @param statisticView A reference to an object of type `DenseLabelWiseStatisticView` to be updated - */ - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const = 0; - - /** - * Updates the statistics of the example at a specific index, considering only the labels, whose indices are - * provided by a `PartialIndexVector`. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `CContiguousConstView` that provides random - * access to the labels of the training examples - * @param scoreMatrix A reference to an object of type `CContiguousConstView` that stores the - * currently predicted scores - * @param labelIndicesBegin A `PartialIndexVector::const_iterator` to the beginning of the label indices - * @param labelIndicesEnd A `PartialIndexVector::const_iterator` to the end of the label indices - * @param statisticView A reference to an object of type `DenseLabelWiseStatisticView` to be updated - */ - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const = 0; - - /** - * Updates the statistics of the example at a specific index, considering only the labels, whose indices are - * provided by a `CompleteIndexVector`. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` that provides row-wise - * access to the labels of the training examples - * @param scoreMatrix A reference to an object of type `CContiguousConstView` that stores the - * currently predicted scores - * @param labelIndicesBegin A `CompleteIndexVector::const_iterator` to the beginning of the label indices - * @param labelIndicesEnd A `CompleteIndexVector::const_iterator` to the end of the label indices - * @param statisticView A reference to an object of type `DenseLabelWiseStatisticView` to be updated - */ - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const = 0; - - /** - * Updates the statistics of the example at a specific index, considering only the labels, whose indices are - * provided by a `PartialIndexVector`. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` that provides row-wise - * access to the labels of the training examples - * @param scoreMatrix A reference to an object of type `CContiguousConstView` that stores the - * currently predicted scores - * @param labelIndicesBegin A `PartialIndexVector::const_iterator` to the beginning of the label indices - * @param labelIndicesEnd A `PartialIndexVector::const_iterator` to the end of the label indices - * @param statisticView A reference to an object of type `DenseLabelWiseStatisticView` to be updated - */ - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const = 0; - }; - - /** - * Defines an interface for all factories that allow to create instances of the type `ILabelWiseLoss`. - */ - class ILabelWiseLossFactory : public IEvaluationMeasureFactory, - public IDistanceMeasureFactory { - public: - - virtual ~ILabelWiseLossFactory() override {}; - - /** - * Creates and returns a new object of type `ILabelWiseLoss`. - * - * @return An unique pointer to an object of type `ILabelWiseLoss` that has been created - */ - virtual std::unique_ptr createLabelWiseLoss() const = 0; - - /** - * @see `IEvaluationMeasureFactory::createEvaluationMeasure` - */ - std::unique_ptr createEvaluationMeasure() const override final { - return this->createLabelWiseLoss(); - } - - /** - * @see `IDistanceMeasureFactory::createDistanceMeasure` - */ - std::unique_ptr createDistanceMeasure( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) const override final { - return this->createLabelWiseLoss(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a (decomposable) loss function that is applied - * label-wise. - */ - class ILabelWiseLossConfig : public ILossConfig { - public: - - virtual ~ILabelWiseLossConfig() override {}; - - /** - * Creates and returns a new object of type `ILabelWiseLossFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `ILabelWiseLossFactory` that has been created - */ - virtual std::unique_ptr createLabelWiseLossFactory() const = 0; - - std::unique_ptr createEvaluationMeasureFactory() const override final { - return this->createLabelWiseLossFactory(); - } - - std::unique_ptr createDistanceMeasureFactory() const override final { - return this->createLabelWiseLossFactory(); - } - - bool isDecomposable() const override final { - return true; - } - - bool isSparse() const override { - return false; - } - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_logistic.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_logistic.hpp deleted file mode 100644 index 701441ef..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_logistic.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss_label_wise.hpp" -#include "boosting/rule_evaluation/head_type.hpp" - -namespace boosting { - - /** - * Allows to configure a loss function that implements a multi-label variant of the logistic loss that is applied - * label-wise. - */ - class LabelWiseLogisticLossConfig final : public ILabelWiseLossConfig { - private: - - const std::unique_ptr& headConfigPtr_; - - public: - - /** - * @param headConfigPtr A reference to an unique pointer that stores the configuration of rule heads - */ - LabelWiseLogisticLossConfig(const std::unique_ptr& headConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const override; - - std::unique_ptr createMarginalProbabilityFunctionFactory() - const override; - - std::unique_ptr createJointProbabilityFunctionFactory() const override; - - float64 getDefaultPrediction() const override; - - std::unique_ptr createLabelWiseLossFactory() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_sparse.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_sparse.hpp deleted file mode 100644 index d35d6301..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_sparse.hpp +++ /dev/null @@ -1,172 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_view_label_wise_sparse.hpp" -#include "boosting/losses/loss_label_wise.hpp" -#include "common/measures/measure_evaluation_sparse.hpp" - -namespace boosting { - - /** - * Defines an interface for all (decomposable) loss functions that are applied label-wise and are suited for the use - * of sparse data structures. To meet this requirement, the gradients and Hessians that are computed by the loss - * function should be zero, if the prediction for a label is correct. - */ - class ISparseLabelWiseLoss : virtual public ILabelWiseLoss, - public ISparseEvaluationMeasure { - public: - - virtual ~ISparseLabelWiseLoss() override {}; - - // Keep "updateLabelWiseStatistics" functions from the parent class rather than hiding them - using ILabelWiseLoss::updateLabelWiseStatistics; - - /** - * Updates the statistics of the example at a specific index, considering only the labels, whose indices are - * provided by a `CompleteIndexVector`. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `CContiguousConstView` that provides random - * access to the labels of the training examples - * @param scoreMatrix A reference to an object of type `SparseSetMatrix` that stores the currently - * predicted scores - * @param labelIndicesBegin A `CompleteIndexVector::const_iterator` to the beginning of the label indices - * @param labelIndicesEnd A `CompleteIndexVector::const_iterator` to the end of the label indices - * @param statisticView A reference to an object of type `SparseLabelWiseStatisticView` to be updated - */ - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - SparseLabelWiseStatisticView& statisticView) const = 0; - - /** - * Updates the statistics of the example at a specific index, considering only the labels, whose indices are - * provided by a `PartialIndexVector`. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `CContiguousConstView` that provides random - * access to the labels of the training examples - * @param scoreMatrix A reference to an object of type `SparseSetMatrix` that stores the currently - * predicted scores - * @param labelIndicesBegin A `PartialIndexVector::const_iterator` to the beginning of the label indices - * @param labelIndicesEnd A `PartialIndexVector::const_iterator` to the end of the label indices - * @param statisticView A reference to an object of type `SparseLabelWiseStatisticView` to be updated - */ - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - SparseLabelWiseStatisticView& statisticView) const = 0; - - /** - * Updates the statistics of the example at a specific index, considering only the labels, whose indices are - * provided by a `CompleteIndexVector`. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` that provides row-wise - * access to the labels of the training examples - * @param scoreMatrix A reference to an object of type `SparseSetMatrix` that stores the currently - * predicted scores - * @param labelIndicesBegin A `CompleteIndexVector::const_iterator` to the beginning of the label indices - * @param labelIndicesEnd A `CompleteIndexVector::const_iterator` to the end of the label indices - * @param statisticView A reference to an object of type `SparseLabelWiseStatisticView` to be updated - */ - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - SparseLabelWiseStatisticView& statisticView) const = 0; - - /** - * Updates the statistics of the example at a specific index, considering only the labels, whose indices are - * provided by a `PartialIndexVector`. - * - * @param exampleIndex The index of the example for which the gradients and Hessians should be updated - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` that provides row-wise - * access to the labels of the training examples - * @param scoreMatrix A reference to an object of type `SparseSetMatrix` that stores the currently - * predicted scores - * @param labelIndicesBegin A `PartialIndexVector::const_iterator` to the beginning of the label indices - * @param labelIndicesEnd A `PartialIndexVector::const_iterator` to the end of the label indices - * @param statisticView A reference to an object of type `SparseLabelWiseStatisticView` to be updated - */ - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - SparseLabelWiseStatisticView& statisticView) const = 0; - }; - - /** - * Defines an interface for all factories that allow to create instances of the type `ISparseLabelWiseLoss`. - */ - class ISparseLabelWiseLossFactory : public ILabelWiseLossFactory, - public ISparseEvaluationMeasureFactory { - public: - - virtual ~ISparseLabelWiseLossFactory() override {}; - - /** - * Creates and returns a new object of type `ISparseLabelWiseLoss`. - * - * @return An unique pointer to an object of type `ISparseLabelWiseLoss` that has been created - */ - virtual std::unique_ptr createSparseLabelWiseLoss() const = 0; - - /** - * @see `ILabelWiseLossFactory::createLabelWiseLoss` - */ - std::unique_ptr createLabelWiseLoss() const override final { - return this->createSparseLabelWiseLoss(); - } - - /** - * @see `ISparseEvaluationMeasureFactory::createSparseEvaluationMeasure` - */ - std::unique_ptr createSparseEvaluationMeasure() const override final { - return this->createSparseLabelWiseLoss(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a (decomposable) loss function that is applied - * label-wise and is suited for the use of sparse data structures. - */ - class ISparseLabelWiseLossConfig : public ILabelWiseLossConfig { - public: - - virtual ~ISparseLabelWiseLossConfig() override {}; - - /** - * Creates and returns a new object of type `ISparseLabelWiseLossFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `ISparseLabelWiseLossFactory` that has been created - */ - virtual std::unique_ptr createSparseLabelWiseLossFactory() const = 0; - - /** - * Creates and returns a new object of type `ISparseEvaluationMeasureFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `ISparseEvaluationMeasureFactory` that has been created - */ - std::unique_ptr createSparseEvaluationMeasureFactory() const { - return this->createSparseLabelWiseLossFactory(); - } - - std::unique_ptr createLabelWiseLossFactory() const override final { - return this->createSparseLabelWiseLossFactory(); - } - - bool isSparse() const override final { - return true; - } - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_squared_error.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_squared_error.hpp deleted file mode 100644 index 5d601b23..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_squared_error.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss_label_wise.hpp" -#include "boosting/rule_evaluation/head_type.hpp" - -namespace boosting { - - /** - * Allows to configure a loss function that implements a multi-label variant of the squared error loss that is - * applied label-wise. - */ - class LabelWiseSquaredErrorLossConfig final : public ILabelWiseLossConfig { - private: - - const std::unique_ptr& headConfigPtr_; - - public: - - /** - * @param headConfigPtr A reference to an unique pointer that stores the configuration of rule heads - */ - LabelWiseSquaredErrorLossConfig(const std::unique_ptr& headConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const override; - - std::unique_ptr createMarginalProbabilityFunctionFactory() - const override; - - std::unique_ptr createJointProbabilityFunctionFactory() const override; - - float64 getDefaultPrediction() const override; - - std::unique_ptr createLabelWiseLossFactory() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_squared_hinge.hpp b/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_squared_hinge.hpp deleted file mode 100644 index ba8159c7..00000000 --- a/cpp/subprojects/boosting/include/boosting/losses/loss_label_wise_squared_hinge.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss_label_wise_sparse.hpp" -#include "boosting/rule_evaluation/head_type.hpp" - -namespace boosting { - - /** - * Allows to configure a loss function that implements a multi-label variant of the squared hinge loss that is - * applied label-wise. - */ - class LabelWiseSquaredHingeLossConfig final : public ISparseLabelWiseLossConfig { - private: - - const std::unique_ptr& headConfigPtr_; - - public: - - /** - * @param headConfigPtr A reference to an unique pointer that stores the configuration of rule heads - */ - LabelWiseSquaredHingeLossConfig(const std::unique_ptr& headConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const override; - - std::unique_ptr createMarginalProbabilityFunctionFactory() - const override; - - std::unique_ptr createJointProbabilityFunctionFactory() const override; - - float64 getDefaultPrediction() const override; - - std::unique_ptr createSparseLabelWiseLossFactory() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/macros.hpp b/cpp/subprojects/boosting/include/boosting/macros.hpp deleted file mode 100644 index 2ff7fb12..00000000 --- a/cpp/subprojects/boosting/include/boosting/macros.hpp +++ /dev/null @@ -1,14 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#ifdef _WIN32 - #ifdef MLRLBOOSTING_EXPORTS - #define MLRLBOOSTING_API __declspec(dllexport) - #else - #define MLRLBOOSTING_API __declspec(dllimport) - #endif -#else - #define MLRLBOOSTING_API -#endif diff --git a/cpp/subprojects/boosting/include/boosting/math/blas.hpp b/cpp/subprojects/boosting/include/boosting/math/blas.hpp deleted file mode 100644 index b87a8cc8..00000000 --- a/cpp/subprojects/boosting/include/boosting/math/blas.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -namespace boosting { - - /** - * Allows to execute BLAS routines. - */ - class Blas final { - public: - - /** - * A function pointer to BLAS' DDOT routine. - */ - typedef double (*DdotFunction)(int* n, double* dx, int* incx, double* dy, int* incy); - - /** - * A function pointer to BLAS' DSPMV routine. - */ - typedef void (*DspmvFunction)(char* uplo, int* n, double* alpha, double* ap, double* x, int* incx, - double* beta, double* y, int* incy); - - private: - - const DdotFunction ddotFunction_; - - const DspmvFunction dspmvFunction_; - - public: - - /** - * @param ddotFunction A function pointer to BLAS' DDOT routine - * @param dspmvFunction A function pointer to BLAS' DSPMV routine - */ - Blas(DdotFunction ddotFunction, DspmvFunction dspmvFunction); - - /** - * Computes and returns the dot product x * y of two vectors x and y using BLAS' DDOT routine (see - * http://www.netlib.org/lapack/explore-html/de/da4/group__double__blas__level1_ga75066c4825cb6ff1c8ec4403ef8c843a.html). - * - * @param x A pointer to an array of type `float64`, shape `(n)`, representing the first vector x - * @param y A pointer to an array of type `float64`, shape `(n)`, representing the second vector y - * @param n The number of elements in the arrays `x` and `y` - * @return A scalar of type `float64`, representing the result of the dot product x * y - */ - float64 ddot(float64* x, float64* y, int n) const; - - /** - * Computes and returns the solution to the matrix-vector operation A * x using BLAS' DSPMV routine (see - * http://www.netlib.org/lapack/explore-html/d7/d15/group__double__blas__level2_gab746575c4f7dd4eec72e8110d42cefe9.html). - * - * DSPMV expects the matrix A to be a symmetric matrix with shape `(n, n)` and x to be an array with shape - * `(n)`. The matrix A must be supplied in packed form, i.e., as an array with shape `(n * (n + 1) / 2 )` - * that consists of the columns of A appended to each other and omitting all unspecified elements. - * - * @param a A pointer to an array of type `float64`, shape `(n * (n + 1) / 2)`, representing the - * elements in the upper-right triangle of the matrix A in a packed form - * @param x A pointer to an array of type `float64`, shape `(n)`, representing the elements in the - * array x - * @param output A pointer to an array of type `float64`, shape `(n)`, the result of the matrix-vector - * operation A * x should be written to. May contain arbitrary values - * @param n The number of elements in the arrays `a` and `x` - */ - void dspmv(float64* a, float64* x, float64* output, int n) const; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/math/lapack.hpp b/cpp/subprojects/boosting/include/boosting/math/lapack.hpp deleted file mode 100644 index ddc2da91..00000000 --- a/cpp/subprojects/boosting/include/boosting/math/lapack.hpp +++ /dev/null @@ -1,90 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -namespace boosting { - - /** - * Allows to execute LAPACK routines. - */ - class Lapack final { - public: - - /** - * A function pointer to LAPACK'S DSYSV routine. - */ - typedef void (*DsysvFunction)(char* uplo, int* n, int* nrhs, double* a, int* lda, int* ipiv, double* b, - int* ldb, double* work, int* lwork, int* info); - - private: - - const DsysvFunction dsysvFunction_; - - public: - - /** - * @param dsysvFunction A function pointer to LAPACK's DSYSV routine - */ - Lapack(DsysvFunction dsysvFunction); - - /** - * Determines and returns the optimal value for the parameter "lwork" as used by LAPACK'S DSYSV routine. - * - * This function must be run before attempting to solve a linear system using the function `dsysv` to - * determine the optimal value for the parameter "lwork". - * - * @param tmpArray1 A pointer to an array of type `float64`, shape `(n, n)` that will be used by the - * function `dsysv` to temporarily store values computed by the DSYSV routine. May contain - * arbitrary values - * @param output A pointer to an array of type `float64`, shape `(n)`, the solution of the system of - * linear equations should be written to by the function `dsysv`. May contain arbitrary - * values - * @param n The number of equations in the linear system to be solved by the function `dsysv` - * @return The optimal value for the parameter "lwork" - */ - int queryDsysvLworkParameter(float64* tmpArray1, float64* output, int n) const; - - /** - * Computes and returns the solution to a linear system A * X = B using LAPACK's DSYSV solver (see - * http://www.netlib.org/lapack/explore-html/d6/d0e/group__double_s_ysolve_ga9995c47692c9885ed5d6a6b431686f41.html). - * - * The function `queryDsysvLworkParameter` must be run beforehand to determine the optimal value for the - * parameter "lwork" and to allocate a temporary array depending on this value. - * - * DSYSV requires A to be a matrix with shape `(n, n)`, representing the coefficients, and B to be a matrix - * with shape `(n, nrhs)`, representing the ordinates. X is a matrix of unknowns with shape `(n, nrhs)`. - * - * DSYSV will overwrite the matrices A and B. When terminated successfully, B will contain the solution to - * the system of linear equations. To retain their state, this function will copy the given arrays before - * invoking DSYSV. - * - * Furthermore, DSYSV assumes the matrix of coefficients A to be symmetrical, i.e., it will only use the - * upper-right triangle of A, whereas the remaining elements are ignored. For reasons of space efficiency, - * this function expects the coefficients to be given as an array with shape `n * (n + 1) / 2`, representing - * the elements of the upper-right triangle of A, where the columns are appended to each other and - * unspecified elements are omitted. This function will implicitly convert the given array into a matrix - * that is suited for DSYSV. - * - * @param tmpArray1 A pointer to an array of type `float64`, shape `(n, n)` that stores the - * coefficients in the matrix A. It will be used to temporarily store - * values computed by the DSYSV routine - * @param tmpArray2 A pointer to an array of type `int`, shape `(n)` that will be used to - * temporarily store values computed by the DSYSV routine. May contain - * arbitrary values - * @param tmpArray3 A pointer to an array of type `double`, shape `(lwork)` that will be - * used to temporarily store values computed by the DSYSV routine. May - * contain arbitrary values - * @param output A pointer to an array of type `float64`, shape `(n)` that stores the - * ordinates in the matrix A. The solution of the system of linear - * equations will be written to this array - * @param n The number of equations - * @param lwork The value for the parameter "lwork" to be used by the DSYSV routine. - * Must have been determined using the function `queryDsysvLworkParameter` - */ - void dsysv(float64* tmpArray1, int* tmpArray2, double* tmpArray3, float64* output, int n, int lwork) const; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/math/math.hpp b/cpp/subprojects/boosting/include/boosting/math/math.hpp deleted file mode 100644 index e2c6f531..00000000 --- a/cpp/subprojects/boosting/include/boosting/math/math.hpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -namespace boosting { - - /** - * Calculates and returns the n-th triangular number, i.e., the number of elements in a n times n triangle. - * - * @param n A scalar of type `uint32`, representing the order of the triangular number - * @return A scalar of type `uint32`, representing the n-th triangular number - */ - static inline constexpr uint32 triangularNumber(uint32 n) { - return (n * (n + 1)) / 2; - } - - /** - * Computes and returns the L1 norm of a specific vector, i.e., the sum of the absolute values of its elements. - * - * @tparam Iterator The type of the iterator that provides access to the elements in the vector - * @param iterator An iterator of template type `Iterator` that provides random access to the elements in the - * vector - * @param n The number of elements in the vector - * @return The L1 norm - */ - template - static inline constexpr float64 l1Norm(Iterator iterator, uint32 n) { - float64 result = 0; - - for (uint32 i = 0; i < n; i++) { - float64 value = iterator[i]; - result += std::abs(value); - } - - return result; - } - - /** - * Computes and returns the L1 norm of a specific vector, i.e., the sum of the absolute values of its elements, - * where each element has a specific weight. - * - * @tparam Iterator The type of the iterator that provides access to the elements in the vector - * @tparam WeightIterator The type of the iterator that provides access to the weights of the elements - * @param iterator An iterator of template type `Iterator` that provides random access to the elements in - * the vector - * @param weightIterator An iterator of template type `WeightIterator` that provides random access to the weights - * of the elements - * @param n The number of elements in the vector - * @return The L1 norm - */ - template - static inline constexpr float64 l1Norm(Iterator iterator, WeightIterator weightIterator, uint32 n) { - float64 result = 0; - - for (uint32 i = 0; i < n; i++) { - float64 value = iterator[i]; - float64 weight = weightIterator[i]; - result += (std::abs(value) * weight); - } - - return result; - } - - /** - * Computes and returns the square of the L2 norm of a specific vector, i.e. the sum of the squares of its elements. - * To obtain the actual L2 norm, the square-root of the result provided by this function must be computed. - * - * @tparam Iterator The type of the iterator that provides access to the elements in the vector - * @param iterator An iterator of template type `Iterator` that provides random access to the elements in the - * vector - * @param n The number of elements in the vector - * @return The square of the L2 norm - */ - template - static inline constexpr float64 l2NormPow(Iterator iterator, uint32 n) { - float64 result = 0; - - for (uint32 i = 0; i < n; i++) { - float64 value = iterator[i]; - result += (value * value); - } - - return result; - } - - /** - * Computes and returns the square of the L2 norm of a specific vector, i.e. the sum of the squares of its elements, - * where each elements has a specific weight. To obtain the actual L2 norm, the square-root of the result provided - * by this function must be computed. - * - * @tparam Iterator The type of the iterator that provides access to the elements in the vector - * @tparam WeightIterator The type of the iterator that provides access to the weights of the elements - * @param iterator An iterator of template type `Iterator` that provides random access to the elements in - * the vector - * @param weightIterator An iterator of template type `WeightIterator` that provides random access to the weights - * of the elements - * @param n The number of elements in the vector - * @return The square of the L2 norm - */ - template - static inline constexpr float64 l2NormPow(Iterator iterator, WeightIterator weightIterator, uint32 n) { - float64 result = 0; - - for (uint32 i = 0; i < n; i++) { - float64 value = iterator[i]; - float64 weight = (float64) weightIterator[i]; - result += ((value * value) * weight); - } - - return result; - } - - /** - * Calculates and returns the logistic function `1 / (1 + exp(-x))`, given a specific value `x`. - * - * This implementation exploits the identity `1 / (1 + exp(-x)) = exp(x) / (1 + exp(x))` to increase numerical - * stability (see, e.g., section "Numerically stable sigmoid function" in - * https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/). - * - * @param x The value `x` - * @return The value that has been calculated - */ - static inline constexpr float64 logisticFunction(float64 x) { - if (x >= 0) { - float64 exponential = std::exp(-x); // Evaluates to 0 for large x, resulting in 1 ultimately - return 1 / (1 + exponential); - } else { - float64 exponential = std::exp(x); // Evaluates to 0 for large x, resulting in 0 ultimately - return exponential / (1 + exponential); - } - } - -} diff --git a/cpp/subprojects/boosting/include/boosting/model/rule_list_builder.hpp b/cpp/subprojects/boosting/include/boosting/model/rule_list_builder.hpp deleted file mode 100644 index f516b77b..00000000 --- a/cpp/subprojects/boosting/include/boosting/model/rule_list_builder.hpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/model_builder.hpp" - -namespace boosting { - - /** - * Allows to create instances of the type `IModelBuilder` that build models that store several rules in the order - * they have been added. - */ - class RuleListBuilderFactory final : public IModelBuilderFactory { - public: - - /** - * @see `IModelBuilderFactory::create` - */ - std::unique_ptr create() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/multi_threading/parallel_rule_refinement_auto.hpp b/cpp/subprojects/boosting/include/boosting/multi_threading/parallel_rule_refinement_auto.hpp deleted file mode 100644 index 7331e6fa..00000000 --- a/cpp/subprojects/boosting/include/boosting/multi_threading/parallel_rule_refinement_auto.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/rule_evaluation/head_type.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/sampling/feature_sampling.hpp" - -namespace boosting { - - /** - * Allows to configure the multi-threading behavior that is used for the parallel refinement of rules by - * automatically deciding for the number of threads to be used. - */ - class AutoParallelRuleRefinementConfig final : public IMultiThreadingConfig { - private: - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& headConfigPtr_; - - const std::unique_ptr& featureSamplingConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the - * loss function - * @param headConfigPtr A reference to an unique pointer that stores the configuration of rule - * heads - * @param featureSamplingConfigPtr A reference to an unique pointer that stores the configuration of the - * method for sampling features - */ - AutoParallelRuleRefinementConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& headConfigPtr, - const std::unique_ptr& featureSamplingConfigPtr); - - /** - * @see `IMultiThreadingConfig::getNumThreads` - */ - uint32 getNumThreads(const IFeatureMatrix& featureMatrix, uint32 numLabels) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/multi_threading/parallel_statistic_update_auto.hpp b/cpp/subprojects/boosting/include/boosting/multi_threading/parallel_statistic_update_auto.hpp deleted file mode 100644 index 08f2e9d0..00000000 --- a/cpp/subprojects/boosting/include/boosting/multi_threading/parallel_statistic_update_auto.hpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "common/multi_threading/multi_threading.hpp" - -namespace boosting { - - /** - * Allows to configure the multi-threading behavior that is used for the parallel update of statistics by - * automatically deciding for the number of threads to be used. - */ - class AutoParallelStatisticUpdateConfig final : public IMultiThreadingConfig { - private: - - const std::unique_ptr& lossConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the loss function - */ - AutoParallelStatisticUpdateConfig(const std::unique_ptr& lossConfigPtr); - - /** - * @see `IMultiThreadingConfig::getNumThreads` - */ - uint32 getNumThreads(const IFeatureMatrix& featureMatrix, uint32 numLabels) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/post_processing/shrinkage_constant.hpp b/cpp/subprojects/boosting/include/boosting/post_processing/shrinkage_constant.hpp deleted file mode 100644 index bbdab005..00000000 --- a/cpp/subprojects/boosting/include/boosting/post_processing/shrinkage_constant.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/macros.hpp" -#include "common/post_processing/post_processor.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a post-processor that shrinks the weights of rules - * by a constant "shrinkage" parameter. - */ - class MLRLBOOSTING_API IConstantShrinkageConfig { - public: - - virtual ~IConstantShrinkageConfig() {}; - - /** - * Returns the value of the "shrinkage" parameter. - * - * @return The value of the "shrinkage" parameter - */ - virtual float64 getShrinkage() const = 0; - - /** - * Sets the value of the "shrinkage" parameter. - * - * @param shrinkage The value of the "shrinkage" parameter. Must be in (0, 1) - * @return A reference to an object of type `IConstantShrinkageConfig` that allows further - * configuration of the post-processor - */ - virtual IConstantShrinkageConfig& setShrinkage(float64 shrinkage) = 0; - }; - - /** - * Allows to configure a post-processor that shrinks the weights of rules by a constant "shrinkage" parameter. - */ - class ConstantShrinkageConfig final : public IPostProcessorConfig, - public IConstantShrinkageConfig { - private: - - float64 shrinkage_; - - public: - - ConstantShrinkageConfig(); - - float64 getShrinkage() const override; - - IConstantShrinkageConfig& setShrinkage(float64 shrinkage) override; - - /** - * @see `IPostProcessorConfig::createPostProcessorFactory` - */ - std::unique_ptr createPostProcessorFactory() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/discretization_function.hpp b/cpp/subprojects/boosting/include/boosting/prediction/discretization_function.hpp deleted file mode 100644 index 68d6cd8b..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/discretization_function.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/prediction/probability_calibration_marginal.hpp" - -#include - -namespace boosting { - - /** - * Defines an interface for all classes that allow to discretize regression scores. - */ - class IDiscretizationFunction { - public: - - virtual ~IDiscretizationFunction() {}; - - /** - * Discretizes the regression score that is predicted for a specific label. - * - * @param labelIndex The index of the label, the regression score is predicted for - * @param score The regression score to be discretized - * @return A binary value the given regression score has been turned into - */ - virtual bool discretizeScore(uint32 labelIndex, float64 score) const = 0; - }; - - /** - * Defines an interface for all factories that allow to create instances of the type `IDiscretizationFunction`. - */ - class IDiscretizationFunctionFactory { - public: - - virtual ~IDiscretizationFunctionFactory() {}; - - /** - * Creates and returns a new object of the type `IDiscretizationFunction`. - * - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that should be used - * for the calibration of marginal probabilities - * @return An unique pointer to an object of type - * `IDiscretizationFunction` that has been created - */ - virtual std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/discretization_function_probability.hpp b/cpp/subprojects/boosting/include/boosting/prediction/discretization_function_probability.hpp deleted file mode 100644 index ebf37e22..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/discretization_function_probability.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/discretization_function.hpp" -#include "boosting/prediction/probability_function_marginal.hpp" - -namespace boosting { - - /** - * Allow to create instances of the type `IDiscretizationFunction` that discretize regression scores by transforming - * them into marginal probabilities. - */ - class ProbabilityDiscretizationFunctionFactory : public IDiscretizationFunctionFactory { - private: - - std::unique_ptr marginalProbabilityFunctionFactoryPtr_; - - public: - - /** - * @param marginalProbabilityFunctionFactoryPtr An unique pointer to an object of type - * `IMarginalProbabilityFunctionFactory` that allows to create - * the implementation to be used to transform regression scores - * into marginal probabilities - */ - ProbabilityDiscretizationFunctionFactory( - std::unique_ptr marginalProbabilityFunctionFactoryPtr); - - std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/discretization_function_score.hpp b/cpp/subprojects/boosting/include/boosting/prediction/discretization_function_score.hpp deleted file mode 100644 index 3c3af9aa..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/discretization_function_score.hpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/discretization_function.hpp" - -namespace boosting { - - /** - * Allow to create instances of the type `IDiscretizationFunction` that discretize regression scores by comparing - * them to a threshold. - */ - class ScoreDiscretizationFunctionFactory : public IDiscretizationFunctionFactory { - private: - - float64 threshold_; - - public: - - /** - * @param threshold The threshold that should be used for discretization - */ - ScoreDiscretizationFunctionFactory(float64 threshold); - - std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_auto.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_auto.hpp deleted file mode 100644 index 898c9b7d..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_auto.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/prediction/predictor_binary.hpp" - -namespace boosting { - - /** - * Allows to configure a predictor that automatically decides for a method that is used to predict whether - * individual labels of given query examples are relevant or not - */ - class AutomaticBinaryPredictorConfig : public IBinaryPredictorConfig { - private: - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the - * loss function - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used to predict for several - * query examples in parallel - */ - AutomaticBinaryPredictorConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - /** - * @see `IPredictorConfig::createPredictorFactory` - */ - std::unique_ptr createPredictorFactory(const IRowWiseFeatureMatrix& featureMatrix, - uint32 numLabels) const override; - - /** - * @see `IBinaryPredictorConfig::createSparsePredictorFactory` - */ - std::unique_ptr createSparsePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - /** - * @see `IPredictorConfig::isLabelVectorSetNeeded` - */ - bool isLabelVectorSetNeeded() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_common.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_common.hpp deleted file mode 100644 index 4855f852..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_common.hpp +++ /dev/null @@ -1,388 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/predictor_score_common.hpp" -#include "boosting/prediction/transformation_binary.hpp" -#include "common/data/arrays.hpp" -#include "common/data/matrix_c_contiguous.hpp" -#include "common/prediction/predictor_binary.hpp" - -namespace boosting { - - /** - * An implementation of the type `IBinaryPredictor` that allows to predict binary labels for given query examples by - * summing up the scores that are predicted by individual rules in a rule-based model and transforming the - * aggregated scores into binary predictions in {0, 1} according to an `IBinaryTransformation`. - * - * @tparam FeatureMatrix The type of the feature matrix that provides row-wise access to the feature values of - * the query examples - * @tparam Model The type of the rule-based model that is used to obtain predictions - */ - template - class BinaryPredictor final : public IBinaryPredictor { - private: - - class IncrementalPredictor final - : public AbstractIncrementalPredictor> { - private: - - class IncrementalPredictionDelegate final - : public PredictionDispatcher::IPredictionDelegate { - private: - - CContiguousView& realMatrix_; - - CContiguousView& predictionMatrix_; - - const IBinaryTransformation& binaryTransformation_; - - public: - - IncrementalPredictionDelegate(CContiguousView& realMatrix, - CContiguousView& predictionMatrix, - const IBinaryTransformation& binaryTransformation) - : realMatrix_(realMatrix), predictionMatrix_(predictionMatrix), - binaryTransformation_(binaryTransformation) {} - - void predictForExample(const FeatureMatrix& featureMatrix, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd, uint32 threadIndex, - uint32 exampleIndex, uint32 predictionIndex) const override { - ScorePredictionDelegate(realMatrix_) - .predictForExample(featureMatrix, rulesBegin, rulesEnd, threadIndex, exampleIndex, - predictionIndex); - binaryTransformation_.apply(realMatrix_.values_cbegin(predictionIndex), - realMatrix_.values_cend(predictionIndex), - predictionMatrix_.values_begin(predictionIndex), - predictionMatrix_.values_end(predictionIndex)); - } - }; - - const std::shared_ptr binaryTransformationPtr_; - - DensePredictionMatrix realMatrix_; - - DensePredictionMatrix predictionMatrix_; - - protected: - - DensePredictionMatrix& applyNext(const FeatureMatrix& featureMatrix, uint32 numThreads, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd) override { - if (binaryTransformationPtr_) { - IncrementalPredictionDelegate delegate(realMatrix_, predictionMatrix_, - *binaryTransformationPtr_); - PredictionDispatcher().predict( - delegate, featureMatrix, rulesBegin, rulesEnd, numThreads); - } - - return predictionMatrix_; - } - - public: - - IncrementalPredictor(const BinaryPredictor& predictor, uint32 maxRules, - std::shared_ptr binaryTransformationPtr) - : AbstractIncrementalPredictor>( - predictor.featureMatrix_, predictor.model_, predictor.numThreads_, maxRules), - binaryTransformationPtr_(binaryTransformationPtr), - realMatrix_(DensePredictionMatrix(predictor.featureMatrix_.getNumRows(), - predictor.numLabels_, - binaryTransformationPtr_ != nullptr)), - predictionMatrix_(DensePredictionMatrix(predictor.featureMatrix_.getNumRows(), - predictor.numLabels_, - binaryTransformationPtr_ == nullptr)) {} - }; - - class PredictionDelegate final - : public PredictionDispatcher::IPredictionDelegate { - private: - - CContiguousView& realMatrix_; - - CContiguousView& predictionMatrix_; - - const IBinaryTransformation& binaryTransformation_; - - public: - - PredictionDelegate(CContiguousView& realMatrix, CContiguousView& predictionMatrix, - const IBinaryTransformation& binaryTransformation) - : realMatrix_(realMatrix), predictionMatrix_(predictionMatrix), - binaryTransformation_(binaryTransformation) {} - - void predictForExample(const FeatureMatrix& featureMatrix, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd, uint32 threadIndex, - uint32 exampleIndex, uint32 predictionIndex) const override { - uint32 numLabels = realMatrix_.getNumCols(); - CContiguousView::value_iterator realIterator = realMatrix_.values_begin(threadIndex); - setArrayToZeros(realIterator, numLabels); - ScorePredictionDelegate(realMatrix_) - .predictForExample(featureMatrix, rulesBegin, rulesEnd, threadIndex, exampleIndex, - threadIndex); - binaryTransformation_.apply(realIterator, realMatrix_.values_end(threadIndex), - predictionMatrix_.values_begin(predictionIndex), - predictionMatrix_.values_end(predictionIndex)); - } - }; - - const FeatureMatrix& featureMatrix_; - - const Model& model_; - - const uint32 numLabels_; - - const uint32 numThreads_; - - const std::shared_ptr binaryTransformationPtr_; - - public: - - /** - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that provides - * row-wise access to the feature values of the query examples - * @param model A reference to an object of template type `Model` that should be used to - * obtain predictions - * @param numLabels The number of labels to predict for - * @param numThreads The number of CPU threads to be used to make predictions for different - * query examples in parallel. Must be at least 1 - * @param binaryTransformationPtr An unique pointer to an object of type `IBinaryTransformation` that - * should be used to transform aggregated scores into binary predictions or - * a null pointer, if all labels should be predicted as irrelevant - */ - BinaryPredictor(const FeatureMatrix& featureMatrix, const Model& model, uint32 numLabels, uint32 numThreads, - std::unique_ptr binaryTransformationPtr) - : featureMatrix_(featureMatrix), model_(model), numLabels_(numLabels), numThreads_(numThreads), - binaryTransformationPtr_(std::move(binaryTransformationPtr)) {} - - /** - * @see `IPredictor::predict` - */ - std::unique_ptr> predict(uint32 maxRules) const override { - uint32 numExamples = featureMatrix_.getNumRows(); - std::unique_ptr> predictionMatrixPtr = - std::make_unique>(numExamples, numLabels_, - binaryTransformationPtr_ == nullptr); - - if (binaryTransformationPtr_) { - CContiguousMatrix scoreMatrix(numThreads_, numLabels_); - PredictionDelegate delegate(scoreMatrix, *predictionMatrixPtr, *binaryTransformationPtr_); - PredictionDispatcher().predict( - delegate, featureMatrix_, model_.used_cbegin(maxRules), model_.used_cend(maxRules), numThreads_); - } - - return predictionMatrixPtr; - } - - /** - * @see `IPredictor::canPredictIncrementally` - */ - bool canPredictIncrementally() const override { - return true; - } - - /** - * @see `IPredictor::createIncrementalPredictor` - */ - std::unique_ptr>> createIncrementalPredictor( - uint32 maxRules) const override { - if (maxRules != 0) assertGreaterOrEqual("maxRules", maxRules, 1); - return std::make_unique(*this, maxRules, binaryTransformationPtr_); - } - }; - - /** - * An implementation of the type `ISparseBinaryPredictor` that allows to predict sparse binary labels for given - * query examples by summing up the scores that are predicted by individual rules in a rule-based model and - * transforming the aggregated scores into binary predictions in {0, 1} according to an `IBinaryTransformation`. - * - * @tparam FeatureMatrix The type of the feature matrix that provides row-wise access to the feature values of - * the query examples - * @tparam Model The type of the rule-based model that is used to obtain predictions - */ - template - class SparseBinaryPredictor final : public ISparseBinaryPredictor { - private: - - class IncrementalPredictor final - : public AbstractIncrementalPredictor { - private: - - class IncrementalPredictionDelegate final - : public BinarySparsePredictionDispatcher::IPredictionDelegate { - private: - - CContiguousView& realMatrix_; - - BinaryLilMatrix& predictionMatrix_; - - const IBinaryTransformation& binaryTransformation_; - - public: - - IncrementalPredictionDelegate(CContiguousView& realMatrix, - BinaryLilMatrix& predictionMatrix, - const IBinaryTransformation& binaryTransformation) - : realMatrix_(realMatrix), predictionMatrix_(predictionMatrix), - binaryTransformation_(binaryTransformation) {} - - uint32 predictForExample(const FeatureMatrix& featureMatrix, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd, uint32 threadIndex, - uint32 exampleIndex, uint32 predictionIndex) const override { - ScorePredictionDelegate(realMatrix_) - .predictForExample(featureMatrix, rulesBegin, rulesEnd, threadIndex, exampleIndex, - predictionIndex); - BinaryLilMatrix::row predictionRow = predictionMatrix_[predictionIndex]; - predictionRow.clear(); - binaryTransformation_.apply(realMatrix_.values_cbegin(predictionIndex), - realMatrix_.values_cend(predictionIndex), predictionRow); - return (uint32) predictionRow.size(); - } - }; - - const std::shared_ptr binaryTransformationPtr_; - - DensePredictionMatrix realMatrix_; - - BinaryLilMatrix predictionMatrix_; - - std::unique_ptr predictionMatrixPtr_; - - protected: - - BinarySparsePredictionMatrix& applyNext(const FeatureMatrix& featureMatrix, uint32 numThreads, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd) override { - uint32 numNonZeroElements; - - if (binaryTransformationPtr_) { - IncrementalPredictionDelegate delegate(realMatrix_, predictionMatrix_, - *binaryTransformationPtr_); - numNonZeroElements = BinarySparsePredictionDispatcher().predict( - delegate, featureMatrix, rulesBegin, rulesEnd, numThreads); - } else { - numNonZeroElements = 0; - } - - predictionMatrixPtr_ = createBinarySparsePredictionMatrix( - predictionMatrix_, realMatrix_.getNumCols(), numNonZeroElements); - return *predictionMatrixPtr_; - } - - public: - - IncrementalPredictor(const SparseBinaryPredictor& predictor, uint32 maxRules, - std::shared_ptr binaryTransformationPtr) - : AbstractIncrementalPredictor( - predictor.featureMatrix_, predictor.model_, predictor.numThreads_, maxRules), - binaryTransformationPtr_(binaryTransformationPtr), - realMatrix_(DensePredictionMatrix(predictor.featureMatrix_.getNumRows(), - predictor.numLabels_, - binaryTransformationPtr_ != nullptr)), - predictionMatrix_(BinaryLilMatrix(predictor.featureMatrix_.getNumRows())) {} - }; - - class PredictionDelegate final - : public BinarySparsePredictionDispatcher::IPredictionDelegate { - private: - - CContiguousView& realMatrix_; - - BinaryLilMatrix& predictionMatrix_; - - const IBinaryTransformation& binaryTransformation_; - - public: - - PredictionDelegate(CContiguousView& realMatrix, BinaryLilMatrix& predictionMatrix, - const IBinaryTransformation& binaryTransformation) - : realMatrix_(realMatrix), predictionMatrix_(predictionMatrix), - binaryTransformation_(binaryTransformation) {} - - uint32 predictForExample(const FeatureMatrix& featureMatrix, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd, uint32 threadIndex, - uint32 exampleIndex, uint32 predictionIndex) const override { - uint32 numLabels = realMatrix_.getNumCols(); - CContiguousView::value_iterator realIterator = realMatrix_.values_begin(threadIndex); - setArrayToZeros(realIterator, numLabels); - ScorePredictionDelegate(realMatrix_) - .predictForExample(featureMatrix, rulesBegin, rulesEnd, threadIndex, exampleIndex, - threadIndex); - BinaryLilMatrix::row predictionRow = predictionMatrix_[predictionIndex]; - binaryTransformation_.apply(realIterator, realMatrix_.values_end(threadIndex), predictionRow); - return (uint32) predictionRow.size(); - } - }; - - const FeatureMatrix& featureMatrix_; - - const Model& model_; - - const uint32 numLabels_; - - const uint32 numThreads_; - - const std::shared_ptr binaryTransformationPtr_; - - public: - - /** - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that provides - * row-wise access to the feature values of the query examples - * @param model A reference to an object of template type `Model` that should be used to - * obtain predictions - * @param numLabels The number of labels to predict for - * @param numThreads The number of CPU threads to be used to make predictions for different - * query examples in parallel. Must be at least 1 - * @param binaryTransformationPtr An unique pointer to an object of type `IBinaryTransformation` that - * should be used to transform real-valued predictions into binary - * predictions or a null pointer, if no such object is available - */ - SparseBinaryPredictor(const FeatureMatrix& featureMatrix, const Model& model, uint32 numLabels, - uint32 numThreads, std::unique_ptr binaryTransformationPtr) - : featureMatrix_(featureMatrix), model_(model), numLabels_(numLabels), numThreads_(numThreads), - binaryTransformationPtr_(std::move(binaryTransformationPtr)) {} - - /** - * @see `IPredictor::predict` - */ - std::unique_ptr predict(uint32 maxRules) const override { - uint32 numExamples = featureMatrix_.getNumRows(); - BinaryLilMatrix predictionMatrix(numExamples); - uint32 numNonZeroElements; - - if (binaryTransformationPtr_) { - CContiguousMatrix scoreMatrix(numThreads_, numLabels_); - PredictionDelegate delegate(scoreMatrix, predictionMatrix, *binaryTransformationPtr_); - numNonZeroElements = BinarySparsePredictionDispatcher().predict( - delegate, featureMatrix_, model_.used_cbegin(maxRules), model_.used_cend(maxRules), numThreads_); - } else { - numNonZeroElements = 0; - } - - return createBinarySparsePredictionMatrix(predictionMatrix, numLabels_, numNonZeroElements); - } - - /** - * @see `IPredictor::canPredictIncrementally` - */ - bool canPredictIncrementally() const override { - return true; - } - - /** - * @see `IPredictor::createIncrementalPredictor` - */ - std::unique_ptr> createIncrementalPredictor( - uint32 maxRules) const override { - if (maxRules != 0) assertGreaterOrEqual("maxRules", maxRules, 1); - return std::make_unique(*this, maxRules, binaryTransformationPtr_); - } - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_example_wise.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_example_wise.hpp deleted file mode 100644 index af376e3e..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_example_wise.hpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/prediction/predictor_binary.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a predictor that predicts known label vectors for - * given query examples by comparing the predicted regression scores or probability estimates to the label vectors - * encountered in the training data. - */ - class MLRLBOOSTING_API IExampleWiseBinaryPredictorConfig { - public: - - virtual ~IExampleWiseBinaryPredictorConfig() {} - - /** - * Returns whether binary predictions are derived from probability estimates rather than regression scores - * or not. - * - * @return True, if binary predictions are derived from probability estimates rather than regression scores, - * false otherwise - */ - virtual bool isBasedOnProbabilities() const = 0; - - /** - * Sets whether binary predictions should be derived from probability estimates rather than regression - * scores or not. - * - * @param basedOnProbabilities True, if binary predictions should be derived from probability estimates - * rather than regression scores, false otherwise - * @return A reference to an object of type `IExampleWiseBinaryPredictorConfig` that - * allows further configuration of the predictor - */ - virtual IExampleWiseBinaryPredictorConfig& setBasedOnProbabilities(bool basedOnProbabilities) = 0; - - /** - * Returns whether a model for the calibration of probabilities is used, if available, or not. - * - * @return True, if a model for the calibration of probabilities is used, if available, false otherwise - */ - virtual bool isProbabilityCalibrationModelUsed() const = 0; - - /** - * Sets whether a model for the calibration of probabilities should be used, if available, or not. - * - * @param useProbabilityCalibrationModel True, if a model for the calibration of probabilities should be - * used, if available, false otherwise - * @return A reference to an object of type - * `IExampleWiseBinaryPredictorConfig` that allows further - * configuration of the predictor - */ - virtual IExampleWiseBinaryPredictorConfig& setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) = 0; - }; - - /** - * Allows to configure a predictor that predicts known label vectors for given query examples by comparing the - * predicted regression scores or probability estimates to the label vectors encountered in the training data. - */ - class ExampleWiseBinaryPredictorConfig final : public IExampleWiseBinaryPredictorConfig, - public IBinaryPredictorConfig { - private: - - bool basedOnProbabilities_; - - std::unique_ptr noMarginalProbabilityCalibrationModelPtr_; - - std::unique_ptr noJointProbabilityCalibrationModelPtr_; - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the - * loss function - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used to predict for several - * query examples in parallel - */ - ExampleWiseBinaryPredictorConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - bool isBasedOnProbabilities() const override; - - IExampleWiseBinaryPredictorConfig& setBasedOnProbabilities(bool basedOnProbabilities) override; - - bool isProbabilityCalibrationModelUsed() const override; - - IExampleWiseBinaryPredictorConfig& setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) override; - - /** - * @see `IPredictorConfig::createPredictorFactory` - */ - std::unique_ptr createPredictorFactory(const IRowWiseFeatureMatrix& featureMatrix, - uint32 numLabels) const override; - - /** - * @see `IBinaryPredictorConfig::createSparsePredictorFactory` - */ - std::unique_ptr createSparsePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - /** - * @see `IPredictorConfig::isLabelVectorSetNeeded` - */ - bool isLabelVectorSetNeeded() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_gfm.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_gfm.hpp deleted file mode 100644 index 0106b163..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_gfm.hpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/prediction/predictor_binary.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a predictor that predicts whether individual labels - * of given query examples are relevant or irrelevant by discretizing the regression scores or probability estimates - * that are predicted for each label according to the general F-measure maximizer (GFM). - */ - class MLRLBOOSTING_API IGfmBinaryPredictorConfig { - public: - - virtual ~IGfmBinaryPredictorConfig() {}; - - /** - * Returns whether a model for the calibration of probabilities is used, if available, or not. - * - * @return True, if a model for the calibration of probabilities is used, if available, false otherwise - */ - virtual bool isProbabilityCalibrationModelUsed() const = 0; - - /** - * Sets whether a model for the calibration of probabilities should be used, if available, or not. - * - * @param useProbabilityCalibrationModel True, if a model for the calibration of probabilities should be - * used, if available, false otherwise - * @return A reference to an object of type `IGfmBinaryPredictorConfig` that - * allows further configuration of the predictor - */ - virtual IGfmBinaryPredictorConfig& setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) = 0; - }; - - /** - * Allows to configure a predictor that predicts whether individual labels of given query examples are relevant or - * irrelevant by discretizing the regression scores or probability estimates that are predicted for each label - * according to the general F-measure maximizer (GFM). - */ - class GfmBinaryPredictorConfig final : public IGfmBinaryPredictorConfig, - public IBinaryPredictorConfig { - private: - - std::unique_ptr noMarginalProbabilityCalibrationModelPtr_; - - std::unique_ptr noJointProbabilityCalibrationModelPtr_; - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the - * loss function - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used to predict for several - * query examples in parallel - */ - GfmBinaryPredictorConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - bool isProbabilityCalibrationModelUsed() const override; - - IGfmBinaryPredictorConfig& setUseProbabilityCalibrationModel(bool useProbabilityCalibrationModel) override; - - /** - * @see `IPredictorFactory::createPredictorFactory` - */ - std::unique_ptr createPredictorFactory(const IRowWiseFeatureMatrix& featureMatrix, - uint32 numLabels) const override; - - /** - * @see `IBinaryPredictorFactory::createSparsePredictorFactory` - */ - std::unique_ptr createSparsePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - /** - * @see `IPredictorConfig::isLabelVectorSetNeeded` - */ - bool isLabelVectorSetNeeded() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_label_wise.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_label_wise.hpp deleted file mode 100644 index dc53212a..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_binary_label_wise.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/prediction/predictor_binary.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a predictor that predicts whether individual labels - * of given query examples are relevant or irrelevant by discretizing the regression scores or probability estimates - * that are predicted for each label individually. - */ - class MLRLBOOSTING_API ILabelWiseBinaryPredictorConfig { - public: - - virtual ~ILabelWiseBinaryPredictorConfig() {}; - - /** - * Returns whether binary predictions are derived from probability estimates rather than regression scores - * or not. - * - * @return True, if binary predictions are derived from probability estimates rather than regression scores, - * false otherwise - */ - virtual bool isBasedOnProbabilities() const = 0; - - /** - * Sets whether binary predictions should be derived from probability estimates rather than regression - * scores or not. - * - * @param basedOnProbabilities True, if binary predictions should be derived from probability estimates - * rather than regression scores, false otherwise - * @return A reference to an object of type `ILabelWiseBinaryPredictorConfig` that - * allows further configuration of the predictor - */ - virtual ILabelWiseBinaryPredictorConfig& setBasedOnProbabilities(bool basedOnProbabilities) = 0; - - /** - * Returns whether a model for the calibration of probabilities is used, if available, or not. - * - * @return True, if a model for the calibration of probabilities is used, if available, false otherwise - */ - virtual bool isProbabilityCalibrationModelUsed() const = 0; - - /** - * Sets whether a model for the calibration of probabilities should be used, if available, or not. - * - * @param useProbabilityCalibrationModel True, if a model for the calibration of probabilities should be - * used, if available, false otherwise - * @return A reference to an object of type `ILabelWiseBinaryPredictorConfig` - * that allows further configuration of the predictor - */ - virtual ILabelWiseBinaryPredictorConfig& setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) = 0; - }; - - /** - * Allows to configure a predictor that predicts whether individual labels of given query examples are relevant or - * irrelevant by discretizing the regression scores or probability estimates that are predicted for each label - * individually. - */ - class LabelWiseBinaryPredictorConfig final : public ILabelWiseBinaryPredictorConfig, - public IBinaryPredictorConfig { - private: - - bool basedOnProbabilities_; - - std::unique_ptr noMarginalProbabilityCalibrationModelPtr_; - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the - * loss function - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used to predict for several - * query examples in parallel - */ - LabelWiseBinaryPredictorConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - bool isBasedOnProbabilities() const override; - - ILabelWiseBinaryPredictorConfig& setBasedOnProbabilities(bool basedOnProbabilities) override; - - bool isProbabilityCalibrationModelUsed() const override; - - ILabelWiseBinaryPredictorConfig& setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) override; - - /** - * @see `IPredictorFactory::createPredictorFactory` - */ - std::unique_ptr createPredictorFactory(const IRowWiseFeatureMatrix& featureMatrix, - uint32 numLabels) const override; - - /** - * @see `IBinaryPredictorFactory::createSparsePredictorFactory` - */ - std::unique_ptr createSparsePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - /** - * @see `IPredictorConfig::isLabelVectorSetNeeded` - */ - bool isLabelVectorSetNeeded() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_auto.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_auto.hpp deleted file mode 100644 index ca7fc7b9..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_auto.hpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/prediction/predictor_probability.hpp" - -namespace boosting { - - /** - * Allows to configure a predictor that automatically decides for a method that is used to predict probabilities for - * given query examples, which estimate the chance of individual labels to be relevant. - */ - class AutomaticProbabilityPredictorConfig final : public IProbabilityPredictorConfig { - private: - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the - * loss function - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used to predict for several - * query examples in parallel - */ - AutomaticProbabilityPredictorConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - /** - * @see `IProbabilityPredictorConfig::createPredictorFactory` - */ - std::unique_ptr createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - /** - * @see `IPredictorConfig::isLabelVectorSetNeeded` - */ - bool isLabelVectorSetNeeded() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_common.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_common.hpp deleted file mode 100644 index 142f894d..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_common.hpp +++ /dev/null @@ -1,164 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/predictor_score_common.hpp" -#include "boosting/prediction/transformation_probability.hpp" -#include "common/prediction/predictor_probability.hpp" - -namespace boosting { - - /** - * An implementation of the type `IProbabilityPredictor` that allows to predict label-wise probability estimates for - * given query examples, estimating the chance of individual labels to be relevant, by summing up the scores that - * are predicted by individual rules in a rule-based model and transforming the aggregated scores into probabilities - * in [0, 1] according to an `IProbabilityTransformation`. - * - * @tparam FeatureMatrix The type of the feature matrix that provides row-wise access to the feature values of - * the query examples - * @tparam Model The type of the rule-based model that is used to obtain predictions - */ - template - class ProbabilityPredictor final : public IProbabilityPredictor { - private: - - class PredictionDelegate final - : public PredictionDispatcher::IPredictionDelegate { - private: - - CContiguousView& scoreMatrix_; - - CContiguousView& predictionMatrix_; - - const IProbabilityTransformation& probabilityTransformation_; - - public: - - PredictionDelegate(CContiguousView& scoreMatrix, - CContiguousView& predictionMatrix, - const IProbabilityTransformation& probabilityTransformation) - : scoreMatrix_(scoreMatrix), predictionMatrix_(predictionMatrix), - probabilityTransformation_(probabilityTransformation) {} - - void predictForExample(const FeatureMatrix& featureMatrix, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd, uint32 threadIndex, - uint32 exampleIndex, uint32 predictionIndex) const override { - ScorePredictionDelegate(scoreMatrix_) - .predictForExample(featureMatrix, rulesBegin, rulesEnd, threadIndex, exampleIndex, - predictionIndex); - probabilityTransformation_.apply(scoreMatrix_.values_cbegin(predictionIndex), - scoreMatrix_.values_cend(predictionIndex), - predictionMatrix_.values_begin(predictionIndex), - predictionMatrix_.values_end(predictionIndex)); - } - }; - - class IncrementalPredictor final - : public AbstractIncrementalPredictor> { - private: - - const std::shared_ptr probabilityTransformationPtr_; - - DensePredictionMatrix scoreMatrix_; - - DensePredictionMatrix predictionMatrix_; - - protected: - - DensePredictionMatrix& applyNext(const FeatureMatrix& featureMatrix, uint32 numThreads, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd) override { - if (probabilityTransformationPtr_) { - PredictionDelegate delegate(scoreMatrix_, predictionMatrix_, - *probabilityTransformationPtr_); - PredictionDispatcher().predict( - delegate, featureMatrix, rulesBegin, rulesEnd, numThreads); - } - - return predictionMatrix_; - } - - public: - - IncrementalPredictor(const ProbabilityPredictor& predictor, uint32 maxRules, - std::shared_ptr probabilityTransformationPtr) - : AbstractIncrementalPredictor>( - predictor.featureMatrix_, predictor.model_, predictor.numThreads_, maxRules), - probabilityTransformationPtr_(probabilityTransformationPtr), - scoreMatrix_(DensePredictionMatrix(predictor.featureMatrix_.getNumRows(), - predictor.numLabels_, - probabilityTransformationPtr_ != nullptr)), - predictionMatrix_(DensePredictionMatrix(predictor.featureMatrix_.getNumRows(), - predictor.numLabels_, - probabilityTransformationPtr_ == nullptr)) {} - }; - - const FeatureMatrix& featureMatrix_; - - const Model& model_; - - const uint32 numLabels_; - - const uint32 numThreads_; - - const std::shared_ptr probabilityTransformationPtr_; - - public: - - /** - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that - * provides row-wise access to the feature values of the query examples - * @param model A reference to an object of template type `Model` that should be - * used to obtain predictions - * @param numLabels The number of labels to predict for - * @param numThreads The number of CPU threads to be used to make predictions for - * different query examples in parallel. Must be at least 1 - * @param probabilityTransformationPtr An unique pointer to an object of type `IProbabilityTransformation` - * that should be used to transform aggregated scores into probability - * estimates or a null pointer, if all probabilities should be set to - * zero - */ - ProbabilityPredictor(const FeatureMatrix& featureMatrix, const Model& model, uint32 numLabels, - uint32 numThreads, - std::unique_ptr probabilityTransformationPtr) - : featureMatrix_(featureMatrix), model_(model), numLabels_(numLabels), numThreads_(numThreads), - probabilityTransformationPtr_(std::move(probabilityTransformationPtr)) {} - - /** - * @see `IPredictor::predict` - */ - std::unique_ptr> predict(uint32 maxRules) const override { - uint32 numExamples = featureMatrix_.getNumRows(); - std::unique_ptr> predictionMatrixPtr = - std::make_unique>(numExamples, numLabels_, true); - - if (probabilityTransformationPtr_) { - PredictionDelegate delegate(*predictionMatrixPtr, *predictionMatrixPtr, - *probabilityTransformationPtr_); - PredictionDispatcher().predict( - delegate, featureMatrix_, model_.used_cbegin(maxRules), model_.used_cend(maxRules), numThreads_); - } - - return predictionMatrixPtr; - } - - /** - * @see `IPredictor::canPredictIncrementally` - */ - bool canPredictIncrementally() const override { - return true; - } - - /** - * @see `IPredictor::createIncrementalPredictor` - */ - std::unique_ptr>> createIncrementalPredictor( - uint32 maxRules) const override { - if (maxRules != 0) assertGreaterOrEqual("maxRules", maxRules, 1); - return std::make_unique(*this, maxRules, probabilityTransformationPtr_); - } - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_label_wise.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_label_wise.hpp deleted file mode 100644 index 330d9c95..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_label_wise.hpp +++ /dev/null @@ -1,90 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/prediction/predictor_probability.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a predictor that predicts label-wise probabilities - * for given query examples by transforming the regression scores that are predicted for each label individually - * into probabilities. - */ - class MLRLBOOSTING_API ILabelWiseProbabilityPredictorConfig { - public: - - virtual ~ILabelWiseProbabilityPredictorConfig() {}; - - /** - * Returns whether a model for the calibration of probabilities is used, if available, or not. - * - * @return True, if a model for the calibration of probabilities is used, if available, false otherwise - */ - virtual bool isProbabilityCalibrationModelUsed() const = 0; - - /** - * Sets whether a model for the calibration of probabilities should be used, if available, or not. - * - * @param useProbabilityCalibrationModel True, if a model for the calibration of probabilities should be - * used, if available, false otherwise - * @return A reference to an object of type - * `ILabelWiseProbabilityPredictorConfig` that allows further - * configuration of the predictor - */ - virtual ILabelWiseProbabilityPredictorConfig& setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) = 0; - }; - - /** - * Allows to configure a predictor that predicts label-wise probabilities for given query examples by transforming - * the regression scores that are predicted for each label individually into probabilities. - * - * summing up the scores that are provided by individual rules of - * an existing rule-based model and transforming the aggregated scores into probabilities in [0, 1] according to a - * certain transformation function that is applied to each label individually. - */ - class LabelWiseProbabilityPredictorConfig final : public ILabelWiseProbabilityPredictorConfig, - public IProbabilityPredictorConfig { - private: - - std::unique_ptr noMarginalProbabilityCalibrationModelPtr_; - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the - * loss function - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used to predict for several - * query examples in parallel - */ - LabelWiseProbabilityPredictorConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - bool isProbabilityCalibrationModelUsed() const override; - - ILabelWiseProbabilityPredictorConfig& setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) override; - - /** - * @see `IProbabilityPredictorConfig::createPredictorFactory` - */ - std::unique_ptr createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - /** - * @see `IPredictorConfig::isLabelVectorSetNeeded` - */ - bool isLabelVectorSetNeeded() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_marginalized.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_marginalized.hpp deleted file mode 100644 index 6af28042..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_probability_marginalized.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/prediction/predictor_probability.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a predictor that predicts label-wise probabilities - * for given query examples by marginalizing over the joint probabilities of known label vectors. - */ - class MLRLBOOSTING_API IMarginalizedProbabilityPredictorConfig { - public: - - virtual ~IMarginalizedProbabilityPredictorConfig() {}; - - /** - * Returns whether a model for the calibration of probabilities is used, if available, or not. - * - * @return True, if a model for the calibration of probabilities is used, if available, false otherwise - */ - virtual bool isProbabilityCalibrationModelUsed() const = 0; - - /** - * Sets whether a model for the calibration of probabilities should be used, if available, or not. - * - * @param useProbabilityCalibrationModel True, if a model for the calibration of probabilities should be - * used, if available, false otherwise - * @return A reference to an object of type - * `IMarginalizedProbabilityPredictorConfig` that allows further - * configuration of the predictor - */ - virtual IMarginalizedProbabilityPredictorConfig& setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) = 0; - }; - - /** - * Allows to configure a predictor that predicts label-wise probabilities for given query examples by marginalizing - * over the joint probabilities of known label vectors. - */ - class MarginalizedProbabilityPredictorConfig final : public IMarginalizedProbabilityPredictorConfig, - public IProbabilityPredictorConfig { - private: - - std::unique_ptr noMarginalProbabilityCalibrationModelPtr_; - - std::unique_ptr noJointProbabilityCalibrationModelPtr_; - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the - * loss function - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used to predict for several - * query examples in parallel - */ - MarginalizedProbabilityPredictorConfig( - const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - bool isProbabilityCalibrationModelUsed() const override; - - IMarginalizedProbabilityPredictorConfig& setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) override; - - /** - * @see `IProbabilityPredictorConfig::createPredictorFactory` - */ - std::unique_ptr createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - /** - * @see `IPredictorConfig::isLabelVectorSetNeeded` - */ - bool isLabelVectorSetNeeded() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_score_common.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_score_common.hpp deleted file mode 100644 index 3e55e6e7..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_score_common.hpp +++ /dev/null @@ -1,247 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/head_complete.hpp" -#include "common/model/head_partial.hpp" -#include "common/prediction/predictor_common.hpp" -#include "common/prediction/predictor_score.hpp" -#include "common/util/validation.hpp" - -namespace boosting { - - static inline void applyHead(const CompleteHead& head, VectorView::iterator iterator) { - CompleteHead::score_const_iterator scoreIterator = head.scores_cbegin(); - uint32 numElements = head.getNumElements(); - - for (uint32 i = 0; i < numElements; i++) { - iterator[i] += scoreIterator[i]; - } - } - - static inline void applyHead(const PartialHead& head, VectorView::iterator iterator) { - PartialHead::score_const_iterator scoreIterator = head.scores_cbegin(); - PartialHead::index_const_iterator indexIterator = head.indices_cbegin(); - uint32 numElements = head.getNumElements(); - - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indexIterator[i]; - iterator[index] += scoreIterator[i]; - } - } - - static inline void applyHead(const IHead& head, VectorView::iterator scoreIterator) { - auto completeHeadVisitor = [=](const CompleteHead& head) { - applyHead(head, scoreIterator); - }; - auto partialHeadVisitor = [=](const PartialHead& head) { - applyHead(head, scoreIterator); - }; - head.visit(completeHeadVisitor, partialHeadVisitor); - } - - static inline void applyRule(const RuleList::Rule& rule, - VectorConstView::const_iterator featureValuesBegin, - VectorConstView::const_iterator featureValuesEnd, - VectorView::iterator scoreIterator) { - const IBody& body = rule.getBody(); - - if (body.covers(featureValuesBegin, featureValuesEnd)) { - const IHead& head = rule.getHead(); - applyHead(head, scoreIterator); - } - } - - static inline void applyRules(RuleList::const_iterator rulesBegin, RuleList::const_iterator rulesEnd, - VectorConstView::const_iterator featureValuesBegin, - VectorConstView::const_iterator featureValuesEnd, - VectorView::iterator scoreIterator) { - for (; rulesBegin != rulesEnd; rulesBegin++) { - const RuleList::Rule& rule = *rulesBegin; - applyRule(rule, featureValuesBegin, featureValuesEnd, scoreIterator); - } - } - - static inline void applyRule(const RuleList::Rule& rule, - CsrConstView::index_const_iterator featureIndicesBegin, - CsrConstView::index_const_iterator featureIndicesEnd, - CsrConstView::value_const_iterator featureValuesBegin, - CsrConstView::value_const_iterator featureValuesEnd, - VectorView::iterator scoreIterator, float32* tmpArray1, uint32* tmpArray2, - uint32 n) { - const IBody& body = rule.getBody(); - - if (body.covers(featureIndicesBegin, featureIndicesEnd, featureValuesBegin, featureValuesEnd, tmpArray1, - tmpArray2, n)) { - const IHead& head = rule.getHead(); - applyHead(head, scoreIterator); - } - } - - static inline void applyRules(RuleList::const_iterator rulesBegin, RuleList::const_iterator rulesEnd, - uint32 numFeatures, - CsrConstView::index_const_iterator featureIndicesBegin, - CsrConstView::index_const_iterator featureIndicesEnd, - CsrConstView::value_const_iterator featureValuesBegin, - CsrConstView::value_const_iterator featureValuesEnd, - VectorView::iterator scoreIterator) { - float32* tmpArray1 = new float32[numFeatures]; - uint32* tmpArray2 = new uint32[numFeatures] {}; - uint32 n = 1; - - for (; rulesBegin != rulesEnd; rulesBegin++) { - const RuleList::Rule& rule = *rulesBegin; - applyRule(rule, featureIndicesBegin, featureIndicesEnd, featureValuesBegin, featureValuesEnd, scoreIterator, - &tmpArray1[0], &tmpArray2[0], n); - n++; - } - - delete[] tmpArray1; - delete[] tmpArray2; - } - - static inline void aggregatePredictedScores(const CContiguousConstView& featureMatrix, - RuleList::const_iterator rulesBegin, RuleList::const_iterator rulesEnd, - CContiguousView& scoreMatrix, uint32 exampleIndex, - uint32 predictionIndex) { - applyRules(rulesBegin, rulesEnd, featureMatrix.values_cbegin(exampleIndex), - featureMatrix.values_cend(exampleIndex), scoreMatrix.values_begin(predictionIndex)); - } - - static inline void aggregatePredictedScores(const CsrConstView& featureMatrix, - RuleList::const_iterator rulesBegin, RuleList::const_iterator rulesEnd, - CContiguousView& scoreMatrix, uint32 exampleIndex, - uint32 predictionIndex) { - uint32 numFeatures = featureMatrix.getNumCols(); - applyRules(rulesBegin, rulesEnd, numFeatures, featureMatrix.indices_cbegin(exampleIndex), - featureMatrix.indices_cend(exampleIndex), featureMatrix.values_cbegin(exampleIndex), - featureMatrix.values_cend(exampleIndex), scoreMatrix.values_begin(predictionIndex)); - } - - /** - * An implementation of the type `PredictionDispatcher::IPredictionDelegate` that aggregates the scores that are - * predicted by the individual rules in a model and stores them in a matrix. - * - * @tparam FeatureMatrix The type of the feature matrix that provides row-wise access to the feature values of - * the query examples - * @tparam Model The type of the rule-based model that is used to obtain predictions - */ - template - class ScorePredictionDelegate final - : public PredictionDispatcher::IPredictionDelegate { - private: - - CContiguousView& scoreMatrix_; - - public: - - /** - * @param scoreMatrix A reference to an object of type `CContiguousView` that should be used to store the - * aggregated scores - */ - ScorePredictionDelegate(CContiguousView& scoreMatrix) : scoreMatrix_(scoreMatrix) {} - - /** - * @see `PredictionDispatcher::IPredictionDelegate::predictForExample` - */ - void predictForExample(const FeatureMatrix& featureMatrix, typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd, uint32 threadIndex, uint32 exampleIndex, - uint32 predictionIndex) const override { - aggregatePredictedScores(featureMatrix, rulesBegin, rulesEnd, scoreMatrix_, exampleIndex, - predictionIndex); - } - }; - - /** - * An implementation of the type `IScorePredictor` that allows to predict label-wise regression scores for given - * query examples by summing up the scores that are predicted by individual rules in a rule-based model for each - * label individually. - * - * @tparam FeatureMatrix The type of the feature matrix that provides row-wise access to the feature values of - * the query examples - * @tparam Model The type of the rule-based model that is used to obtain predictions - */ - template - class ScorePredictor final : public IScorePredictor { - private: - - class IncrementalPredictor final - : public AbstractIncrementalPredictor> { - private: - - DensePredictionMatrix predictionMatrix_; - - protected: - - DensePredictionMatrix& applyNext(const FeatureMatrix& featureMatrix, uint32 numThreads, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd) override { - ScorePredictionDelegate delegate(predictionMatrix_); - PredictionDispatcher().predict(delegate, featureMatrix, - rulesBegin, rulesEnd, numThreads); - return predictionMatrix_; - } - - public: - - IncrementalPredictor(const ScorePredictor& predictor, uint32 maxRules) - : AbstractIncrementalPredictor>( - predictor.featureMatrix_, predictor.model_, predictor.numThreads_, maxRules), - predictionMatrix_(DensePredictionMatrix(predictor.featureMatrix_.getNumRows(), - predictor.numLabels_, true)) {} - }; - - const FeatureMatrix& featureMatrix_; - - const Model& model_; - - const uint32 numLabels_; - - const uint32 numThreads_; - - public: - - /** - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that provides row-wise - * access to the feature values of the query examples - * @param model A reference to an object of template type `Model` that should be used to obtain - * predictions - * @param numLabels The number of labels to predict for - * @param numThreads The number of CPU threads to be used to make predictions for different query - * examples in parallel. Must be at least 1 - */ - ScorePredictor(const FeatureMatrix& featureMatrix, const Model& model, uint32 numLabels, uint32 numThreads) - : featureMatrix_(featureMatrix), model_(model), numLabels_(numLabels), numThreads_(numThreads) {} - - /** - * @see `IPredictor::predict` - */ - std::unique_ptr> predict(uint32 maxRules) const override { - uint32 numExamples = featureMatrix_.getNumRows(); - std::unique_ptr> predictionMatrixPtr = - std::make_unique>(numExamples, numLabels_, true); - ScorePredictionDelegate delegate(*predictionMatrixPtr); - PredictionDispatcher().predict( - delegate, featureMatrix_, model_.used_cbegin(maxRules), model_.used_cend(maxRules), numThreads_); - return predictionMatrixPtr; - } - - /** - * @see `IPredictor::canPredictIncrementally` - */ - bool canPredictIncrementally() const override { - return true; - } - - /** - * @see `IPredictor::createIncrementalPredictor` - */ - std::unique_ptr>> createIncrementalPredictor( - uint32 maxRules) const override { - if (maxRules != 0) assertGreaterOrEqual("maxRules", maxRules, 1); - return std::make_unique(*this, maxRules); - } - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/predictor_score_label_wise.hpp b/cpp/subprojects/boosting/include/boosting/prediction/predictor_score_label_wise.hpp deleted file mode 100644 index 8fed97bc..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/predictor_score_label_wise.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/multi_threading/multi_threading.hpp" -#include "common/prediction/predictor_score.hpp" - -namespace boosting { - - /** - * Allows to configure a predictor that predicts label-wise regression scores for given query examples by summing up - * the scores that are provided by individual rules for each label individually. - */ - class LabelWiseScorePredictorConfig final : public IScorePredictorConfig { - private: - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used to predict for several query - * examples in parallel - */ - LabelWiseScorePredictorConfig(const std::unique_ptr& multiThreadingConfigPtr); - - /** - * @see `IPredictorConfig::createPredictorFactory` - */ - std::unique_ptr createPredictorFactory(const IRowWiseFeatureMatrix& featureMatrix, - uint32 numLabels) const override; - - /** - * @see `IPredictorConfig::isLabelVectorSetNeeded` - */ - bool isLabelVectorSetNeeded() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/probability_calibration_isotonic.hpp b/cpp/subprojects/boosting/include/boosting/prediction/probability_calibration_isotonic.hpp deleted file mode 100644 index e4d4a53a..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/probability_calibration_isotonic.hpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/macros.hpp" -#include "common/prediction/probability_calibration_isotonic.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a calibrator that fits a model for the calibration - * of marginal probabilities via isotonic regression. - */ - class MLRLBOOSTING_API IIsotonicMarginalProbabilityCalibratorConfig { - public: - - virtual ~IIsotonicMarginalProbabilityCalibratorConfig() {}; - - /** - * Returns whether the calibration model is fit to the examples in the holdout set, if available, or not. - * - * @return True, if the calibration model is fit to the examples in the holdout set, if available, false - * if the training set is used instead - */ - virtual bool isHoldoutSetUsed() const = 0; - - /** - * Sets whether the calibration model should be fit to the examples in the holdout set, if available, or - * not. - * - * @param useHoldoutSet True, if the calibration model should be fit to the examples in the holdout set, if - * available, false if the training set should be used instead - * @return A reference to an object of type `IIsotonicMarginalProbabilityCalibratorConfig` that - * allows further configuration of the calibrator - */ - virtual IIsotonicMarginalProbabilityCalibratorConfig& setUseHoldoutSet(bool useHoldoutSet) = 0; - }; - - /** - * Allows to configure a calibrator that fits a model for the calibration of marginal probabilities via isotonic - * regression. - */ - class IsotonicMarginalProbabilityCalibratorConfig final : public IIsotonicMarginalProbabilityCalibratorConfig, - public IMarginalProbabilityCalibratorConfig { - private: - - bool useHoldoutSet_; - - const std::unique_ptr& lossConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the loss function - */ - IsotonicMarginalProbabilityCalibratorConfig(const std::unique_ptr& lossConfigPtr); - - bool isHoldoutSetUsed() const override; - - IIsotonicMarginalProbabilityCalibratorConfig& setUseHoldoutSet(bool useHoldoutSet) override; - - /** - * @see `IMarginalProbabilityCalibratorConfig::shouldUseHoldoutSet` - */ - bool shouldUseHoldoutSet() const override; - - /** - * @see `IMarginalProbabilityCalibratorConfig::createMarginalProbabilityCalibratorFactory` - */ - std::unique_ptr createMarginalProbabilityCalibratorFactory() - const override; - }; - - /** - * Defines an interface for all classes that allow to configure a calibrator that fits a model for the calibration - * of joint probabilities via isotonic regression. - */ - class MLRLBOOSTING_API IIsotonicJointProbabilityCalibratorConfig { - public: - - virtual ~IIsotonicJointProbabilityCalibratorConfig() {}; - - /** - * Returns whether the calibration model is fit to the examples in the holdout set, if available, or not. - * - * @return True, if the calibration model is fit to the examples in the holdout set, if available, false - * if the training set is used instead - */ - virtual bool isHoldoutSetUsed() const = 0; - - /** - * Sets whether the calibration model should be fit to the examples in the holdout set, if available, or - * not. - * - * @param useHoldoutSet True, if the calibration model should be fit to the examples in the holdout set, if - * available, false if the training set should be used instead - * @return A reference to an object of type `IIsotonicJointProbabilityCalibratorConfig` that - * allows further configuration of the calibrator - */ - virtual IIsotonicJointProbabilityCalibratorConfig& setUseHoldoutSet(bool useHoldoutSet) = 0; - }; - - /** - * Allows to configure a calibrator that fits a model for the calibration of joint probabilities via isotonic - * regression. - */ - class IsotonicJointProbabilityCalibratorConfig final : public IIsotonicJointProbabilityCalibratorConfig, - public IJointProbabilityCalibratorConfig { - private: - - bool useHoldoutSet_; - - const std::unique_ptr& lossConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the loss function - */ - IsotonicJointProbabilityCalibratorConfig(const std::unique_ptr& lossConfigPtr); - - bool isHoldoutSetUsed() const override; - - IIsotonicJointProbabilityCalibratorConfig& setUseHoldoutSet(bool useHoldoutSet) override; - - /** - * @see `IJointProbabilityCalibratorConfig::shouldUseHoldoutSet` - */ - bool shouldUseHoldoutSet() const override; - - /** - * @see `IJointProbabilityCalibratorConfig::isLabelVectorSetNeeeded` - */ - bool isLabelVectorSetNeeded() const override; - - /** - * @see `IJointProbabilityCalibratorConfig::createJointProbabilityCalibratorFactory` - */ - std::unique_ptr createJointProbabilityCalibratorFactory() - const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/probability_function_chain_rule.hpp b/cpp/subprojects/boosting/include/boosting/prediction/probability_function_chain_rule.hpp deleted file mode 100644 index c2be786a..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/probability_function_chain_rule.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/probability_function_joint.hpp" -#include "boosting/prediction/probability_function_marginal.hpp" - -namespace boosting { - - /** - * Allows to create instances of the type `IJointProbabilityFunction` that transform regression scores that are - * predicted for an example into joint probabilities by applying an `IMarginalProbabilityFunction` to each one and - * calculating the product of the resulting marginal probabilities according to the probabilistic chain rule. - */ - class ChainRuleFactory final : public IJointProbabilityFunctionFactory { - private: - - std::unique_ptr marginalProbabilityFunctionFactoryPtr_; - - public: - - /** - * @param marginalProbabilityFunctionFactoryPtr An unique pointer to an object of type - * `IMarginalProbabilityFunctionFactory` that allows to create - * implementations of the function to be used to transform - * regression scores into marginal probabilities - */ - ChainRuleFactory( - std::unique_ptr marginalProbabilityFunctionFactoryPtr); - - std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/probability_function_joint.hpp b/cpp/subprojects/boosting/include/boosting/prediction/probability_function_joint.hpp deleted file mode 100644 index 9b87bbc3..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/probability_function_joint.hpp +++ /dev/null @@ -1,141 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/probability_function_marginal.hpp" -#include "common/data/matrix_sparse_set.hpp" -#include "common/data/vector_dense.hpp" -#include "common/math/math.hpp" -#include "common/measures/measure_distance.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to transform the regression scores that are predicted an example - * into a joint probability that corresponds to the chance of a label vector being correct. - */ - class IJointProbabilityFunction : public IDistanceMeasure { - public: - - virtual ~IJointProbabilityFunction() {}; - - /** - * Transforms the regression scores that are predicted for an example into a joint probability that - * corresponds to the chance of a given label vector being correct. - * - * @param labelVectorIndex The index of the label vector, the scores should be compared to - * @param labelVector A reference to an object of type `LabelVector`, the scores should be compared to - * @param scoresBegin A `VectorConstView::const_iterator` to the beginning of the scores - * @param scoresEnd A `VectorConstView::const_iterator` to the end of the scores - * @return The joint probability that corresponds to the chance of the given label vector - * being correct - */ - virtual float64 transformScoresIntoJointProbability( - uint32 labelVectorIndex, const LabelVector& labelVector, - VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd) const = 0; - - /** - * Transforms the regression scores that are predicted for an example into a joint probability that - * corresponds to the chance of a given label vector being correct. - * - * @param labelVectorIndex The index of the label vector, the scores should be compared to - * @param labelVector A reference to an object of type `LabelVector`, the scores should be compared to - * @param scores A `SparseSetMatrix::const_row` that stores the scores - * @param numLabels The total number of available labels - * @return The joint probability the corresponds to the chance of the given label vector - * being correct - */ - virtual float64 transformScoresIntoJointProbability(uint32 labelVectorIndex, const LabelVector& labelVector, - SparseSetMatrix::const_row scores, - uint32 numLabels) const = 0; - - /** - * Transforms the regression scores that are predicted for an example into joint probabilities that - * correspond to the chance of individual label vectors contained by a `LabelVectorSet` being correct. - * - * @param labelVectorSet A reference to an object of type `LabelVectorSet` that contains the label - * vectors, the scores should be compared to - * @param scoresBegin A `VectorConstView::const_iterator` to the beginning of the scores - * @param scoresEnd A `VectorConstView::const_iterator` to the end of the scores - * @return An unique pointer to an object of type `DenseVector` that stores the joint - * probabilities that correspond to the chance of the given label vectors being - * correct - */ - virtual std::unique_ptr> transformScoresIntoJointProbabilities( - const LabelVectorSet& labelVectorSet, VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd) const { - uint32 numLabelVectors = labelVectorSet.getNumLabelVectors(); - std::unique_ptr> jointProbabilityVectorPtr = - std::make_unique>(numLabelVectors); - DenseVector::iterator jointProbabilityIterator = jointProbabilityVectorPtr->begin(); - float64 sumOfJointProbabilities = 0; - - // Calculate joint probabilities... - LabelVectorSet::const_iterator labelVectorIterator = labelVectorSet.cbegin(); - - for (uint32 i = 0; i < numLabelVectors; i++) { - const LabelVector& labelVector = *labelVectorIterator[i]; - float64 jointProbability = - this->transformScoresIntoJointProbability(i, labelVector, scoresBegin, scoresEnd); - sumOfJointProbabilities += jointProbability; - jointProbabilityIterator[i] = jointProbability; - } - - // Normalize joint probabilities... - for (uint32 i = 0; i < numLabelVectors; i++) { - float64 jointProbability = jointProbabilityIterator[i]; - jointProbabilityIterator[i] = divideOrZero(jointProbability, sumOfJointProbabilities); - } - - return jointProbabilityVectorPtr; - } - - /** - * @see `IDistanceMeasure::measureDistance` - */ - float64 measureDistance(uint32 labelVectorIndex, const LabelVector& labelVector, - VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd) const override final { - return 1.0 - - this->transformScoresIntoJointProbability(labelVectorIndex, labelVector, scoresBegin, - scoresEnd); - } - }; - - /** - * Defines an interface for all factories that allow to create instances of the type `IJointProbabilityFunction`. - */ - class IJointProbabilityFunctionFactory : public IDistanceMeasureFactory { - public: - - virtual ~IJointProbabilityFunctionFactory() {}; - - /** - * Creates and returns a new object of the type `IJointProbabilityFunction`. - * - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that should be used for - * the calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that should be used for - * the calibration of marginal probabilities - * @return An unique pointer to an object of type - * `IJointProbabilityFunction` that has been created - */ - virtual std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) const = 0; - - /** - * @see `IDistanceMeasureFactory::createDistanceMeasure` - */ - std::unique_ptr createDistanceMeasure( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) const override final { - return this->create(marginalProbabilityCalibrationModel, jointProbabilityCalibrationModel); - } - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/probability_function_logistic.hpp b/cpp/subprojects/boosting/include/boosting/prediction/probability_function_logistic.hpp deleted file mode 100644 index e98957e9..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/probability_function_logistic.hpp +++ /dev/null @@ -1,21 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/probability_function_marginal.hpp" - -namespace boosting { - - /** - * Allows to create instances of the type `IMarginalProbabilityFunction` that transform regression scores that are - * predicted for individual labels into marginal probabilities via the logistic sigmoid function. - */ - class LogisticFunctionFactory final : public IMarginalProbabilityFunctionFactory { - public: - - std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/probability_function_marginal.hpp b/cpp/subprojects/boosting/include/boosting/prediction/probability_function_marginal.hpp deleted file mode 100644 index 43f02476..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/probability_function_marginal.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/prediction/probability_calibration_marginal.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to transform regression scores that are predicted for individual - * labels into marginal probabilities. - */ - class IMarginalProbabilityFunction { - public: - - virtual ~IMarginalProbabilityFunction() {}; - - /** - * Transforms the regression score that is predicted for a specific label into a probability. - * - * @param labelIndex The index of the label, the regression score is predicted for - * @param score The regression score that is predicted - * @return The probability into which the given score was transformed - */ - virtual float64 transformScoreIntoMarginalProbability(uint32 labelIndex, float64 score) const = 0; - }; - - /** - * Defines an interface for all factories that allow to create instances of the type `IMarginalProbabilityFunction`. - */ - class IMarginalProbabilityFunctionFactory { - public: - - virtual ~IMarginalProbabilityFunctionFactory() {}; - - /** - * Creates and returns a new object of the type `IMarginalProbabilityFunction`. - * - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that should be used - * for the calibration of marginal probabilities - * @return An unique pointer to an object of type - * `IMarginalProbabilityFunction` that has been created - */ - virtual std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary.hpp b/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary.hpp deleted file mode 100644 index a45204a2..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/matrix_lil_binary.hpp" -#include "common/data/view_vector.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to transform regression scores into binary predictions. - */ - class IBinaryTransformation { - public: - - virtual ~IBinaryTransformation() {}; - - /** - * Transforms regression scores into binary predictions. - * - * @param scoresBegin An iterator of type `VectorConstView::const_iterator` to the beginning of the - * regression scores - * @param scoresEnd An iterator of type `VectorConstView::const_iterator` to the end of the - * regression scores - * @param predictionBegin An iterator of type `VectorView::iterator` to the beginning of the binary - * predictions - * @param predictionEnd An iterator of type `VectorView::iterator` to the end of the binary predictions - */ - virtual void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - VectorView::iterator predictionBegin, - VectorView::iterator predictionEnd) const = 0; - - /** - * Transforms regression scores into sparse binary predictions. - * - * @param scoresBegin An iterator of type `VectorConstView::const_iterator` to the beginning of the - * regression scores - * @param scoresEnd An iterator of type `VectorConstView::const_iterator` to the end of the regression - * scores - * @param predictionRow An object of type `BinaryLilMatrix::row` that should be used to store the binary - * predictions - */ - virtual void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - BinaryLilMatrix::row predictionRow) const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_example_wise.hpp b/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_example_wise.hpp deleted file mode 100644 index 23b2afda..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_example_wise.hpp +++ /dev/null @@ -1,44 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/transformation_binary.hpp" -#include "common/measures/measure_distance.hpp" -#include "common/prediction/label_vector_set.hpp" - -namespace boosting { - - /** - * An implementation of the class `IBinaryTransformation` that transforms regression scores into binary predictions - * by comparing the scores to the known label vectors according to a certain distance measure and picking the - * closest one. - */ - class ExampleWiseBinaryTransformation final : public IBinaryTransformation { - private: - - const LabelVectorSet& labelVectorSet_; - - const std::unique_ptr distanceMeasurePtr_; - - public: - - /** - * @param labelVectorSet A reference to an object of type `LabelVectorSet` that stores all known - * label vectors - * @param distanceMeasurePtr An unique pointer to an object of type `IDistanceMeasure` that implements - * the distance measure for comparing regression scores to known label vectors - */ - ExampleWiseBinaryTransformation(const LabelVectorSet& labelVectorSet, - std::unique_ptr distanceMeasurePtr); - - void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, VectorView::iterator predictionBegin, - VectorView::iterator predictionEnd) const override; - - void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - BinaryLilMatrix::row predictionRow) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_gfm.hpp b/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_gfm.hpp deleted file mode 100644 index f7c973d9..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_gfm.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/probability_function_joint.hpp" -#include "boosting/prediction/transformation_binary.hpp" -#include "common/prediction/label_vector_set.hpp" - -namespace boosting { - - /** - * An implementation of the class `IBinaryTransformation` that transforms regression scores into binary predictions - * according to the general F-measure maximizer (GFM). - */ - class GfmBinaryTransformation final : public IBinaryTransformation { - private: - - const LabelVectorSet& labelVectorSet_; - - const uint32 maxLabelCardinality_; - - const std::unique_ptr jointProbabilityFunctionPtr_; - - public: - - /** - * @param labelVectorSet A reference to an object of type `LabelVectorSet` that stores all - * known label vectors - * @param jointProbabilityFunctionPtr An unique pointer to an object of type `JointProbabilityFunction` - * that should be used to transform regression scores that are - * predicted for an example into a joint probability - */ - GfmBinaryTransformation(const LabelVectorSet& labelVectorSet, - std::unique_ptr jointProbabilityFunctionPtr); - - void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, VectorView::iterator predictionBegin, - VectorView::iterator predictionEnd) const override; - - void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - BinaryLilMatrix::row predictionRow) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_label_wise.hpp b/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_label_wise.hpp deleted file mode 100644 index db741e07..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/transformation_binary_label_wise.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/discretization_function.hpp" -#include "boosting/prediction/transformation_binary.hpp" - -namespace boosting { - - /** - * An implementation of the class `IBinaryTransformation` that transforms regression scores that are predicted for - * individual labels into binary predictions via element-wise application of an `IDiscretizationFunction`. - */ - class LabelWiseBinaryTransformation final : public IBinaryTransformation { - private: - - std::unique_ptr discretizationFunctionPtr_; - - public: - - /** - * @param discretizationFunctionPtr An unique pointer to an object of type `IDiscretizationFunction` that - * should be used to discretize regression scores - */ - LabelWiseBinaryTransformation(std::unique_ptr discretizationFunctionPtr); - - void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, VectorView::iterator predictionBegin, - VectorView::iterator predictionEnd) const override; - - void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - BinaryLilMatrix::row predictionRow) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/transformation_probability.hpp b/cpp/subprojects/boosting/include/boosting/prediction/transformation_probability.hpp deleted file mode 100644 index 85e7cbfb..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/transformation_probability.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_vector.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to transform aggregated scores into probability estimates. - */ - class IProbabilityTransformation { - public: - - virtual ~IProbabilityTransformation() {}; - - /** - * Transforms aggregated scores into probability estimates. - * - * @param scoresBegin An iterator of type `VectorConstView::const_iterator` to the beginning of - * the aggregated scores - * @param scoresEnd An iterator of type `VectorConstView::const_iterator` to the end of the - * the aggregated scores - * @param probabilitiesBegin An iterator of type `VectorView::iterator` to the beginning of the - * probabilities - * @param probabilitiesEnd An iterator of type `VectorView::iterator` to the end of the probabilities - */ - virtual void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - VectorView::iterator probabilitiesBegin, - VectorView::iterator probabilitiesEnd) const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/transformation_probability_label_wise.hpp b/cpp/subprojects/boosting/include/boosting/prediction/transformation_probability_label_wise.hpp deleted file mode 100644 index f38f25e4..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/transformation_probability_label_wise.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/probability_function_marginal.hpp" -#include "boosting/prediction/transformation_probability.hpp" - -namespace boosting { - - /** - * An implementation of the class `IProbabilityTransformation` that transforms aggregated scores into probability - * estimates via element-wise application of a `IMarginalProbabilityFunction`. - */ - class LabelWiseProbabilityTransformation final : public IProbabilityTransformation { - private: - - const std::unique_ptr marginalProbabilityFunctionPtr_; - - public: - - /** - * @param marginalProbabilityFunctionPtr An unique pointer to an object of type - * `IMarginalProbabilityFunction` that should be used to transform - * regression scores that are predicted for individual labels into - * probabilities - */ - LabelWiseProbabilityTransformation( - std::unique_ptr marginalProbabilityFunctionPtr); - - void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - VectorView::iterator probabilitiesBegin, - VectorView::iterator probabilitiesEnd) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/prediction/transformation_probability_marginalized.hpp b/cpp/subprojects/boosting/include/boosting/prediction/transformation_probability_marginalized.hpp deleted file mode 100644 index 70225c45..00000000 --- a/cpp/subprojects/boosting/include/boosting/prediction/transformation_probability_marginalized.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/prediction/probability_function_joint.hpp" -#include "boosting/prediction/transformation_probability.hpp" - -namespace boosting { - - /** - * An implementation of the class `IProbabilityTransformation` that transforms aggregated scores into marginalized - * probability estimates. - */ - class MarginalizedProbabilityTransformation final : public IProbabilityTransformation { - private: - - const LabelVectorSet& labelVectorSet_; - - const std::unique_ptr jointProbabilityFunctionPtr_; - - public: - - /** - * @param labelVectorSet A reference to an object of type `LabelVectorSet` that stores all - * known label vectors - * @param jointProbabilityFunctionPtr An unique pointer to an object of type `JointProbabilityFunction` - * that should be used to transform regression scores that are - * predicted for individual labels into probabilities - */ - MarginalizedProbabilityTransformation( - const LabelVectorSet& labelVectorSet, - std::unique_ptr jointProbabilityFunctionPtr); - - void apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - VectorView::iterator probabilitiesBegin, - VectorView::iterator probabilitiesEnd) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type.hpp deleted file mode 100644 index fc53a454..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type.hpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss_example_wise.hpp" -#include "boosting/losses/loss_label_wise_sparse.hpp" -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "common/input/feature_matrix.hpp" -#include "common/input/label_matrix_row_wise.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure the heads of the rules that should be induced by a - * rule learner. - */ - class IHeadConfig { - public: - - virtual ~IHeadConfig() {}; - - /** - * Creates and returns a new object of type `IStatisticsProviderFactory` according to the specified - * configuration. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the - * feature values of the training examples - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides access to the - * labels of the training examples - * @param lossConfig A reference to an object of type `ILabelWiseLossConfig` that specifies the - * configuration of the loss function - * @return An unique pointer to an object of type `IStatisticsProviderFactory` that has been - * created - */ - virtual std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsProviderFactory` according to the specified - * configuration. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the - * feature values of the training examples - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides access to the - * labels of the training examples - * @param lossConfig A reference to an object of type `ISparseLabelWiseLossConfig` that specifies the - * configuration of the loss function - * @return An unique pointer to an object of type `IStatisticsProviderFactory` that has been - * created - */ - virtual std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsProviderFactory` according to the specified - * configuration. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the - * feature values of the training examples - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides access to the - * labels of the training examples - * @param lossConfig A reference to an object of type `IExampleWiseLossConfig` that specifies the - * configuration of the loss function - * @param blas A reference to an object of type `Blas` that allows to execute BLAS routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK routines - * @return An unique pointer to an object of type `IStatisticsProviderFactory` that has been - * created - */ - virtual std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const = 0; - - /** - * Returns, whether the heads of rules are partial, i.e., they predict for a subset of the available labels, - * or not. - * - * @return True, if the heads of rules are partial, false otherwise - */ - virtual bool isPartial() const = 0; - - /** - * Returns whether the rule heads predict for a single label or not. - * - * @return True, if the rule heads predict for a single label, false otherwise - */ - virtual bool isSingleLabel() const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_auto.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_auto.hpp deleted file mode 100644 index 76f54aeb..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_auto.hpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/rule_evaluation/head_type.hpp" -#include "boosting/rule_evaluation/regularization.hpp" -#include "common/multi_threading/multi_threading.hpp" - -namespace boosting { - - /** - * Allows to configure a method that automatically decides for the type of rule heads to be used. - */ - class AutomaticHeadConfig final : public IHeadConfig { - private: - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& labelBinningConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - const std::unique_ptr& l1RegularizationConfigPtr_; - - const std::unique_ptr& l2RegularizationConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the - * loss function - * @param labelBinningConfigPtr A reference to an unique pointer that stores the configuration of the - * method for assigning labels to bins - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - * @param l1RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L1 - * regularization - * @param l2RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L2 - * regularization - */ - AutomaticHeadConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr, - const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const override; - - bool isPartial() const override; - - bool isSingleLabel() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_complete.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_complete.hpp deleted file mode 100644 index 0dd032f3..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_complete.hpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/rule_evaluation/head_type.hpp" -#include "boosting/rule_evaluation/regularization.hpp" -#include "common/multi_threading/multi_threading.hpp" - -namespace boosting { - - /** - * Allows to configure complete rule heads that predict for all available labels. - */ - class CompleteHeadConfig final : public IHeadConfig { - private: - - const std::unique_ptr& labelBinningConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - const std::unique_ptr& l1RegularizationConfigPtr_; - - const std::unique_ptr& l2RegularizationConfigPtr_; - - public: - - /** - * @param labelBinningConfigPtr A reference to an unique pointer that stores the configuration of the - * method for assigning labels to bins - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - * @param l1RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L1 - * regularization - * @param l2RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L2 - * regularization - */ - CompleteHeadConfig(const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr, - const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const override; - - bool isPartial() const override; - - bool isSingleLabel() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_partial_dynamic.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_partial_dynamic.hpp deleted file mode 100644 index 8bc7c882..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_partial_dynamic.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/macros.hpp" -#include "boosting/rule_evaluation/head_type.hpp" -#include "boosting/rule_evaluation/regularization.hpp" -#include "common/multi_threading/multi_threading.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure partial rule heads that predict for a subset of the - * available labels that is determined dynamically. Only those labels for which the square of the predictive quality - * exceeds a certain threshold are included in a rule head. - */ - class MLRLBOOSTING_API IDynamicPartialHeadConfig { - public: - - virtual ~IDynamicPartialHeadConfig() {}; - - /** - * Returns the threshold that affects for how many labels the rule heads predict. - * - * @return The threshold that affects for how many labels the rule heads predict - */ - virtual float32 getThreshold() const = 0; - - /** - * Sets the threshold that affects for how many labels the rule heads should predict. - * - * @param threshold A threshold that affects for how many labels the rule heads should predict. A smaller - * threshold results in less labels being selected. A greater threshold results in more - * labels being selected. E.g., a threshold of 0.2 means that a rule will only predict for - * a label if the estimated predictive quality `q` for this particular label satisfies the - * inequality `q^exponent > q_best^exponent * (1 - 0.2)`, where `q_best` is the best - * quality among all labels. Must be in (0, 1) - * @return A reference to an object of type `IDynamicPartialHeadConfig` that allows further - * configuration of the rule heads - */ - virtual IDynamicPartialHeadConfig& setThreshold(float32 threshold) = 0; - - /** - * Sets the exponent that is used to weigh the estimated predictive quality for individual labels. - * - * @return The exponent that is used to weight the estimated predictive quality for individual labels - */ - virtual float32 getExponent() const = 0; - - /** - * Sets the exponent that should be used to weigh the estimated predictive quality for individual labels. - * - * @param exponent An exponent that should be used to weigh the estimated predictive quality for individual - * labels. E.g., an exponent of 2 means that the estimated predictive quality `q` for a - * particular label is weighed as `q^2`. Must be at least 1 - * @return A reference to an object of type `IDynamicPartialHeadConfig` that allows further - * configuration of the rule heads - */ - virtual IDynamicPartialHeadConfig& setExponent(float32 exponent) = 0; - }; - - /** - * Allows to configure partial rule heads that predict for a for a subset of the available labels that is determined - * dynamically. Only those labels for which the square of the predictive quality exceeds a certain threshold are - * included in a rule head. - */ - class DynamicPartialHeadConfig final : public IHeadConfig, - public IDynamicPartialHeadConfig { - private: - - float32 threshold_; - - float32 exponent_; - - const std::unique_ptr& labelBinningConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param labelBinningConfigPtr A reference to an unique pointer that stores the configuration of the - * method for assigning labels to bins - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - DynamicPartialHeadConfig(const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - float32 getThreshold() const override; - - IDynamicPartialHeadConfig& setThreshold(float32 threshold) override; - - float32 getExponent() const override; - - IDynamicPartialHeadConfig& setExponent(float32 exponent) override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const override; - - bool isPartial() const override; - - bool isSingleLabel() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_partial_fixed.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_partial_fixed.hpp deleted file mode 100644 index f2aa9049..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_partial_fixed.hpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/macros.hpp" -#include "boosting/rule_evaluation/head_type.hpp" -#include "boosting/rule_evaluation/regularization.hpp" -#include "common/multi_threading/multi_threading.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure partial rule heads that predict for a predefined - * number of labels. - */ - class MLRLBOOSTING_API IFixedPartialHeadConfig { - public: - - virtual ~IFixedPartialHeadConfig() {}; - - /** - * Returns the percentage that specifies for how many labels the rule heads predict. - * - * @return The percentage that specifies for how many labels the rule heads predict or 0, if the percentage - * is calculated based on the average label cardinality - */ - virtual float32 getLabelRatio() const = 0; - - /** - * Sets the percentage that specifies for how many labels the rule heads should predict. - * - * @param labelRatio A percentage that specifies for how many labels the rule heads should predict, e.g., - * if 100 labels are available, a percentage of 0.5 means that the rule heads predict - * for a subset of `ceil(0.5 * 100) = 50` labels. Must be in (0, 1) or 0, if the - * percentage should be calculated based on the average label cardinality - * @return A reference to an object of type `IFixedPartialHeadConfig` that allows further - * configuration of the rule heads - */ - virtual IFixedPartialHeadConfig& setLabelRatio(float32 labelRatio) = 0; - - /** - * Returns the minimum number of labels for which the rule heads predict. - * - * @return The minimum number of labels for which the rule heads predict - */ - virtual uint32 getMinLabels() const = 0; - - /** - * Sets the minimum number of labels for which the rule heads should predict. - * - * @param minLabels The minimum number of labels for which the rule heads should predict. Must be at least 2 - * @return A reference to an object of type `IFixedPartialHeadConfig` that allows further - * configuration of the rule heads - */ - virtual IFixedPartialHeadConfig& setMinLabels(uint32 minLabels) = 0; - - /** - * Returns the maximum number of labels for which the rule heads predict. - * - * @return The maximum number of labels for which the rule heads predict - */ - virtual uint32 getMaxLabels() const = 0; - - /** - * Sets the maximum number of labels for which the rule heads should predict. - * - * @param maxLabels The maximum number of labels for which the rule heads should predict. Must be at least - * the minimum number of labels or 0, if the maximum number of labels should not be - * restricted - * @return A reference to an object of type `IFixedPartialHeadConfig` that allows further - * configuration of the rule heads - */ - virtual IFixedPartialHeadConfig& setMaxLabels(uint32 maxLabels) = 0; - }; - - /** - * Allows to configure partial rule heads that predict for a predefined number of labels. - */ - class FixedPartialHeadConfig final : public IHeadConfig, - public IFixedPartialHeadConfig { - private: - - float32 labelRatio_; - - uint32 minLabels_; - - uint32 maxLabels_; - - const std::unique_ptr& labelBinningConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param labelBinningConfigPtr A reference to an unique pointer that stores the configuration of the - * method for assigning labels to bins - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - FixedPartialHeadConfig(const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr); - - float32 getLabelRatio() const override; - - IFixedPartialHeadConfig& setLabelRatio(float32 labelRatio) override; - - uint32 getMinLabels() const override; - - IFixedPartialHeadConfig& setMinLabels(uint32 minLabels) override; - - uint32 getMaxLabels() const override; - - IFixedPartialHeadConfig& setMaxLabels(uint32 maxLabels) override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const override; - - bool isPartial() const override; - - bool isSingleLabel() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_single.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_single.hpp deleted file mode 100644 index 4a0ac15e..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/head_type_single.hpp +++ /dev/null @@ -1,62 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/rule_evaluation/head_type.hpp" -#include "boosting/rule_evaluation/regularization.hpp" -#include "common/multi_threading/multi_threading.hpp" - -namespace boosting { - - /** - * Allows to configure single-label rule heads that predict for a single label. - */ - class SingleLabelHeadConfig final : public IHeadConfig { - private: - - const std::unique_ptr& labelBinningConfigPtr_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - const std::unique_ptr& l1RegularizationConfigPtr_; - - const std::unique_ptr& l2RegularizationConfigPtr_; - - public: - - /** - * @param labelBinningConfigPtr A reference to an unique pointer that stores the configuration of the - * method for assigning labels to bins - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - * @param l1RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L1 - * regularization - * @param l2RegularizationConfigPtr A reference to an unique pointer that stores the configuration of the L2 - * regularization - */ - SingleLabelHeadConfig(const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr, - const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const override; - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const override; - - bool isPartial() const override; - - bool isSingleLabel() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization.hpp deleted file mode 100644 index 35b9a499..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization.hpp +++ /dev/null @@ -1,27 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a regularization term that affects the evaluation of - * rules. - */ - class IRegularizationConfig { - public: - - virtual ~IRegularizationConfig() {}; - - /** - * Determines and returns the weight of the regularization term according to the specified configuration. - * - * @return The weight of the regularization term - */ - virtual float64 getWeight() const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization_manual.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization_manual.hpp deleted file mode 100644 index 731ae79a..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization_manual.hpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/macros.hpp" -#include "boosting/rule_evaluation/regularization.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that allow to configure a regularization term that affects the evaluation of - * rules by manually specifying the weight of the regularization term. - */ - class MLRLBOOSTING_API IManualRegularizationConfig { - public: - - virtual ~IManualRegularizationConfig() {}; - - /** - * Returns the weight of the regularization term. - * - * @return The weight of the regularization term - */ - virtual float64 getRegularizationWeight() const = 0; - - /** - * Sets the weight of the regularization term. - * - * @param regularizationWeight The weight of the regularization term. Must be greater than 0 - * @return A reference to an object of type `IManualRegularizationConfig` that allows - * further configuration of the regularization term - */ - virtual IManualRegularizationConfig& setRegularizationWeight(float64 regularizationWeight) = 0; - }; - - /** - * Allows to configure a regularization term that affects the evaluation of rules by manually specifying the weight - * of the regularization term. - */ - class ManualRegularizationConfig final : public IRegularizationConfig, - public IManualRegularizationConfig { - private: - - float64 regularizationWeight_; - - public: - - ManualRegularizationConfig(); - - float64 getRegularizationWeight() const override; - - IManualRegularizationConfig& setRegularizationWeight(float64 regularizationWeight) override; - - float64 getWeight() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization_no.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization_no.hpp deleted file mode 100644 index c783c193..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/regularization_no.hpp +++ /dev/null @@ -1,19 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/rule_evaluation/regularization.hpp" - -namespace boosting { - - /** - * Allows to configure a regularization term that does not affect the evaluation of rules. - */ - class NoRegularizationConfig final : public IRegularizationConfig { - public: - - float64 getWeight() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_compare_function.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_compare_function.hpp deleted file mode 100644 index d2731d6c..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_compare_function.hpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_evaluation/rule_compare_function.hpp" - -namespace boosting { - - /** - * Returns whether the quality of a boosted rule is better than the quality of a second one. - * - * @param first An object of type `Quality` that represents the quality of the first rule - * @param second An object of type `Quality` that represents the quality of the second rule - * @return True, if the quality of the first rule is better than the quality of the second one, false - * otherwise - */ - static inline constexpr bool compareBoostedRuleQuality(const Quality& first, const Quality& second) { - return first.quality < second.quality; - } - - /** - * An object of type `RuleCompareFunction` that defines the function that should be used for comparing the quality - * of boosted rules. - */ - static const RuleCompareFunction BOOSTED_RULE_COMPARE_FUNCTION(compareBoostedRuleQuality, 0.0); - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation.hpp deleted file mode 100644 index 5b776c35..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation.hpp +++ /dev/null @@ -1,36 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_evaluation/score_vector.hpp" - -#include - -namespace boosting { - - /** - * Defines an interface for all classes that allow to calculate the predictions of rules, as well as their overall - * quality, based on the gradients and Hessians that have been calculated according to a loss function. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - */ - template - class IRuleEvaluation { - public: - - virtual ~IRuleEvaluation() {}; - - /** - * Calculates the scores to be predicted by a rule, as well as its overall quality, based on the sums of - * gradients and Hessians that are covered by the rule. - * - * @param statisticVector A reference to an object of template type `StatisticVector` that stores the - * gradients and Hessians - * @return A reference to an object of type `IScoreVector` that stores the predicted - * scores of a rule, as well as its overall quality - */ - virtual const IScoreVector& calculateScores(StatisticVector& statisticVector) = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise.hpp deleted file mode 100644 index 14d225b3..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_vector_example_wise_dense.hpp" -#include "boosting/rule_evaluation/rule_evaluation.hpp" -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" - -#include - -namespace boosting { - - /** - * Defines an interface for all factories that allow to create instances of the type `IRuleEvaluation` that allow to - * calculate the predictions of rules, based on the gradients and Hessians that have been calculated according to a - * loss function that is applied example-wise. - */ - class IExampleWiseRuleEvaluationFactory { - public: - - virtual ~IExampleWiseRuleEvaluationFactory() {}; - - /** - * Creates and returns a new object of type `IRuleEvaluation` that allows to calculate the predictions of - * rules that predict for all available labels, based on the gradients and Hessians that are stored by a ` - * DenseExampleWiseStatisticVector`. - * - * @param statisticVector A reference to an object of type `DenseExampleWiseStatisticVector`. This vector - * is only used to identify the function that is able to deal with this particular - * type of vector via function overloading - * @param indexVector A reference to an object of type `CompleteIndexVector` that provides access to - * the indices of the labels for which the rules may predict - * @return An unique pointer to an object of type `IRuleEvaluation` that has been created - */ - virtual std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const = 0; - - /** - * Creates and returns a new object of type `IRuleEvaluation` that allows to calculate the predictions of - * rules that predict for a subset of the available labels, based on the gradients and Hessians that are - * stored by a `DenseExampleWiseStatisticVector`. - * - * @param statisticVector A reference to an object of type `DenseExampleWiseStatisticVector`. This vector - * is only used to identify the function that is able to deal with this particular - * type of vector via function overloading - * @param indexVector A reference to an object of type `PartialIndexVector` that provides access to - * the indices of the labels for which the rules may predict - * @return An unique pointer to an object of type `IRuleEvaluation` that has been created - */ - virtual std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_complete.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_complete.hpp deleted file mode 100644 index 47df8a7d..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_complete.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `IExampleWiseRuleEvaluationFactory` that allow to calculate the - * predictions of complete rules, which predict for all available labels. - */ - class ExampleWiseCompleteRuleEvaluationFactory final : public IExampleWiseRuleEvaluationFactory { - private: - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const Blas& blas_; - - const Lapack& lapack_; - - public: - - /** - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack An reference to an object of type `Lapack` that allows to execute BLAS - * routines - */ - ExampleWiseCompleteRuleEvaluationFactory(float64 l1RegularizationWeight, float64 l2RegularizationWeight, - const Blas& blas, const Lapack& lapack); - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.hpp deleted file mode 100644 index c0b86f47..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.hpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `IExampleWiseRuleEvaluationFactory` that allow to calculate the - * predictions of complete rules, which predict for all available labels, using gradient-based label binning. - */ - class ExampleWiseCompleteBinnedRuleEvaluationFactory final : public IExampleWiseRuleEvaluationFactory { - private: - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const std::unique_ptr labelBinningFactoryPtr_; - - const Blas& blas_; - - const Lapack& lapack_; - - public: - - /** - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param labelBinningFactoryPtr An unique pointer to an object of type `ILabelBinningFactory` that - * allows to create the implementation to be used to assign labels to bins - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK - * routines - */ - ExampleWiseCompleteBinnedRuleEvaluationFactory(float64 l1RegularizationWeight, - float64 l2RegularizationWeight, - std::unique_ptr labelBinningFactoryPtr, - const Blas& blas, const Lapack& lapack); - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.hpp deleted file mode 100644 index b2acb376..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.hpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `IExampleWiseRuleEvaluationFactory` that allow to calculate the - * predictions of partial rules, which predict for a subset of the available that is determined dynamically. - */ - class ExampleWiseDynamicPartialRuleEvaluationFactory final : public IExampleWiseRuleEvaluationFactory { - private: - - const float32 threshold_; - - const float32 exponent_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const Blas& blas_; - - const Lapack& lapack_; - - public: - - /** - * @param threshold A threshold that affects for how many labels the rule heads should - * predict. A smaller threshold results in less labels being selected. A - * greater threshold results in more labels being selected. E.g., a - * threshold of 0.2 means that a rule will only predict for a label if the - * estimated predictive quality `q` for this particular label satisfies the - * inequality `q^exponent > q_best^exponent * (1 - 0.2)`, where `q_best` is - * the best quality among all labels. Must be in (0, 1) - * @param exponent An exponent that should be used to weigh the estimated predictive - * quality for individual labels. E.g., an exponent of 2 means that the - * estimated predictive quality `q` for a particular label is weighed as - * `q^2`. Must be at least 1 - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack An reference to an object of type `Lapack` that allows to execute BLAS - * routines - */ - ExampleWiseDynamicPartialRuleEvaluationFactory(float32 threshold, float32 exponent, - float64 l1RegularizationWeight, - float64 l2RegularizationWeight, const Blas& blas, - const Lapack& lapack); - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.hpp deleted file mode 100644 index fef27f72..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `IExampleWiseRuleEvaluationFactory` that allow to calculate the - * predictions of partial rules, which predict for a subset of the available labels that is determined dynamically, - * using gradient-based label binning. - */ - class ExampleWiseDynamicPartialBinnedRuleEvaluationFactory final : public IExampleWiseRuleEvaluationFactory { - private: - - const float32 threshold_; - - const float32 exponent_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const std::unique_ptr labelBinningFactoryPtr_; - - const Blas& blas_; - - const Lapack& lapack_; - - public: - - /** - * @param threshold A threshold that affects for how many labels the rule heads should - * predict. A smaller threshold results in less labels being selected. A - * greater threshold results in more labels being selected. E.g., a - * threshold of 0.2 means that a rule will only predict for a label if the - * estimated predictive quality `q` for this particular label satisfies the - * inequality `q^exponent > q_best^exponent * (1 - 0.2)`, where `q_best` is - * the best quality among all labels. Must be in (0, 1) - * @param exponent An exponent that should be used to weigh the estimated predictive - * quality for individual labels. E.g., an exponent of 2 means that the - * estimated predictive quality `q` for a particular label is weighed as - * `q^2`. Must be at least 1 - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param labelBinningFactoryPtr An unique pointer to an object of type `ILabelBinningFactory` that - * allows to create the implementation to be used to assign labels to bins - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack An reference to an object of type `Lapack` that allows to execute BLAS - * routines - */ - ExampleWiseDynamicPartialBinnedRuleEvaluationFactory( - float32 threshold, float32 exponent, float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr labelBinningFactoryPtr, const Blas& blas, const Lapack& lapack); - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.hpp deleted file mode 100644 index 763b910d..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.hpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `IExampleWiseRuleEvaluationFactory` that allow to calculate the - * predictions of partial rules, which predict for a predefined number of labels. - */ - class ExampleWiseFixedPartialRuleEvaluationFactory final : public IExampleWiseRuleEvaluationFactory { - private: - - const float32 labelRatio_; - - const uint32 minLabels_; - - const uint32 maxLabels_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const Blas& blas_; - - const Lapack& lapack_; - - public: - - /** - * @param labelRatio A percentage that specifies for how many labels the rule heads should - * predict, e.g., if 100 labels are available, a percentage of 0.5 means - * that the rule heads predict for a subset of `ceil(0.5 * 100) = 50` - * labels. Must be in (0, 1) - * @param minLabels The minimum number of labels for which the rule heads should predict. - * Must be at least 2 - * @param maxLabels The maximum number of labels for which the rule heads should predict. - * Must be at least `minLabels` or 0, if the maximum number of labels - * should not be restricted - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack An reference to an object of type `Lapack` that allows to execute BLAS - * routines - */ - ExampleWiseFixedPartialRuleEvaluationFactory(float32 labelRatio, uint32 minLabels, uint32 maxLabels, - float64 l1RegularizationWeight, float64 l2RegularizationWeight, - const Blas& blas, const Lapack& lapack); - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.hpp deleted file mode 100644 index fa74c913..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.hpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `IExampleWiseRuleEvaluationFactory` that allow to calculate the - * predictions of partial rules, which predict for a predefined number of labels, using gradient-based label - * binning. - */ - class ExampleWiseFixedPartialBinnedRuleEvaluationFactory final : public IExampleWiseRuleEvaluationFactory { - private: - - const float32 labelRatio_; - - const uint32 minLabels_; - - const uint32 maxLabels_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const std::unique_ptr labelBinningFactoryPtr_; - - const Blas& blas_; - - const Lapack& lapack_; - - public: - - /** - * @param labelRatio A percentage that specifies for how many labels the rule heads should - * predict, e.g., if 100 labels are available, a percentage of 0.5 means - * that the rule heads predict for a subset of `ceil(0.5 * 100) = 50` - * labels. Must be in (0, 1) - * @param minLabels The minimum number of labels for which the rule heads should predict. - * Must be at least 2 - * @param maxLabels The maximum number of labels for which the rule heads should predict. - * Must be at least `minLabels` or 0, if the maximum number of labels - * should not be restricted - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param labelBinningFactoryPtr An unique pointer to an object of type `ILabelBinningFactory` that - * allows to create the implementation to be used to assign labels to bins - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack An reference to an object of type `Lapack` that allows to execute BLAS - * routines - */ - ExampleWiseFixedPartialBinnedRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, float64 l1RegularizationWeight, - float64 l2RegularizationWeight, std::unique_ptr labelBinningFactoryPtr, - const Blas& blas, const Lapack& lapack); - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise.hpp deleted file mode 100644 index 1c8bbfd4..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_vector_label_wise_dense.hpp" -#include "boosting/rule_evaluation/rule_evaluation.hpp" -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" - -#include - -namespace boosting { - - /** - * Defines an interface for all factories that allow to create instances of the type `IRuleEvaluation` that allow to - * calculate the predictions of rules, based on the gradients and Hessians that have been calculated according to a - * loss function that is applied label-wise. - */ - class ILabelWiseRuleEvaluationFactory { - public: - - virtual ~ILabelWiseRuleEvaluationFactory() {}; - - /** - * Creates a new instance of the class `IRuleEvaluation` that allows to calculate the predictions of rules - * that predict for all available labels, based on the gradients and Hessians that are stored by a - * `DenseLabelWiseStatisticVector`. - * - * @param statisticVector A reference to an object of type `DenseLabelWiseStatisticVector`. This vector is - * only used to identify the function that is able to deal with this particular - * type of vector via function overloading - * @param indexVector A reference to an object of the type `CompleteIndexVector` that provides access - * to the indices of the labels for which the rules may predict - * @return An unique pointer to an object of type `IRuleEvaluation` that has been created - */ - virtual std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const = 0; - - /** - * Creates a new instance of the class `IRuleEvaluation` that allows to calculate the predictions of rules - * that predict for a subset of the available labels, based on the gradients and Hessians that are stored by - * a `DenseLabelWiseStatisticVector`. - * - * @param statisticVector A reference to an object of type `DenseLabelWiseStatisticVector`. This vector is - * only used to identify the function that is able to deal with this particular - * type of vector via function overloading - * @param indexVector A reference to an object of the type `PartialIndexVector` that provides access - * to the indices of the labels for which the rules may predict - * @return An unique pointer to an object of type `IRuleEvaluation` that has been created - */ - virtual std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_complete.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_complete.hpp deleted file mode 100644 index 0506f1d9..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_complete.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/rule_evaluation/rule_evaluation_label_wise.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `ILabelWiseRuleEvaluationFactory` that allow to calculate the predictions - * of complete rules, which predict for all available labels. - */ - class LabelWiseCompleteRuleEvaluationFactory final : public ILabelWiseRuleEvaluationFactory { - private: - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - public: - - /** - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - */ - LabelWiseCompleteRuleEvaluationFactory(float64 l1RegularizationWeight, float64 l2RegularizationWeight); - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.hpp deleted file mode 100644 index 0fdc5fcc..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `ILabelWiseRuleEvaluationFactory` that allow to calculate the predictions - * of complete rules, which predict for all available labels, using gradient-based label binning. - */ - class LabelWiseCompleteBinnedRuleEvaluationFactory final : public ILabelWiseRuleEvaluationFactory { - private: - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const std::unique_ptr labelBinningFactoryPtr_; - - public: - - /** - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param labelBinningFactoryPtr An unique pointer to an object of type `ILabelBinningFactory` that - * allows to create the implementation to be used to assign labels to bins - */ - LabelWiseCompleteBinnedRuleEvaluationFactory(float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr labelBinningFactoryPtr); - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.hpp deleted file mode 100644 index 3f6dcafb..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `ISparseLabelWiseRuleEvaluationFactory` that allow to calculate the - * predictions of partial rules, which predict for a subset of the available labels that is determined dynamically. - */ - class LabelWiseDynamicPartialRuleEvaluationFactory final : public ISparseLabelWiseRuleEvaluationFactory { - private: - - const float32 threshold_; - - const float32 exponent_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - public: - - /** - * @param threshold A threshold that affects for how many labels the rule heads should - * predict. A smaller threshold results in less labels being selected. A - * greater threshold results in more labels being selected. E.g., a - * threshold of 0.2 means that a rule will only predict for a label if the - * estimated predictive quality `q` for this particular label satisfies the - * inequality `q^exponent > q_best^exponent * (1 - 0.2)`, where `q_best` is - * the best quality among all labels. Must be in (0, 1) - * @param exponent An exponent that should be used to weigh the estimated predictive - * quality for individual labels. E.g., an exponent of 2 means that the - * estimated predictive quality `q` for a particular label is weighed as - * `q^2`. Must be at least 1 - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - */ - LabelWiseDynamicPartialRuleEvaluationFactory(float32 threshold, float32 exponent, - float64 l1RegularizationWeight, - float64 l2RegularizationWeight); - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.hpp deleted file mode 100644 index 9a85d122..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `ISparseLabelWiseRuleEvaluationFactory` that allow to calculate the - * predictions of partial rules, which predict for a subset of the available labels that is determined dynamically, - * using gradient-based label binning. - */ - class LabelWiseDynamicPartialBinnedRuleEvaluationFactory final : public ISparseLabelWiseRuleEvaluationFactory { - private: - - const float32 threshold_; - - const float32 exponent_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const std::unique_ptr labelBinningFactoryPtr_; - - public: - - /** - * @param threshold A threshold that affects for how many labels the rule heads should - * predict. A smaller threshold results in less labels being selected. A - * greater threshold results in more labels being selected. E.g., a - * threshold of 0.2 means that a rule will only predict for a label if the - * estimated predictive quality `q` for this particular label satisfies the - * inequality `q^exponent > q_best^exponent * (1 - 0.2)`, where `q_best` is - * the best quality among all labels. Must be in (0, 1) - * @param exponent An exponent that should be used to weigh the estimated predictive - * quality for individual labels. E.g., an exponent of 2 means that the - * estimated predictive quality `q` for a particular label is weighed as - * `q^2`. Must be at least 1 - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param labelBinningFactoryPtr An unique pointer to an object of type `ILabelBinningFactory` that - * allows to create the implementation to be used to assign labels to bins - */ - LabelWiseDynamicPartialBinnedRuleEvaluationFactory( - float32 threshold, float32 exponent, float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr labelBinningFactoryPtr); - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.hpp deleted file mode 100644 index a97a76b1..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.hpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `ISparseLabelWiseRuleEvaluationFactory` that allow to calculate the - * predictions of partial rules, which predict for a predefined number of labels. - */ - class LabelWiseFixedPartialRuleEvaluationFactory final : public ISparseLabelWiseRuleEvaluationFactory { - private: - - const float32 labelRatio_; - - const uint32 minLabels_; - - const uint32 maxLabels_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - public: - - /** - * @param labelRatio A percentage that specifies for how many labels the rule heads should - * predict, e.g., if 100 labels are available, a percentage of 0.5 means - * that the rule heads predict for a subset of `ceil(0.5 * 100) = 50` - * labels. Must be in (0, 1) - * @param minLabels The minimum number of labels for which the rule heads should predict. - * Must be at least 2 - * @param maxLabels The maximum number of labels for which the rule heads should predict. - * Must be at least `minLabels` or 0, if the maximum number of labels - * should not be restricted - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - */ - LabelWiseFixedPartialRuleEvaluationFactory(float32 labelRatio, uint32 minLabels, uint32 maxLabels, - float64 l1RegularizationWeight, float64 l2RegularizationWeight); - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.hpp deleted file mode 100644 index 931bda93..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/binning/label_binning.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `ISparseLabelWiseRuleEvaluationFactory` that allow to calculate the - * predictions of partial rules, which predict for a predefined number of labels, using gradient-based label - * binning. - */ - class LabelWiseFixedPartialBinnedRuleEvaluationFactory final : public ISparseLabelWiseRuleEvaluationFactory { - private: - - const float32 labelRatio_; - - const uint32 minLabels_; - - const uint32 maxLabels_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const std::unique_ptr labelBinningFactoryPtr_; - - public: - - /** - * @param labelRatio A percentage that specifies for how many labels the rule heads should - * predict, e.g., if 100 labels are available, a percentage of 0.5 means - * that the rule heads predict for a subset of `ceil(0.5 * 100) = 50` - * labels. Must be in (0, 1) - * @param minLabels The minimum number of labels for which the rule heads should predict. - * Must be at least 2 - * @param maxLabels The maximum number of labels for which the rule heads should predict. - * Must be at least `minLabels` or 0, if the maximum number of labels - * should not be restricted - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param labelBinningFactoryPtr An unique pointer to an object of type `ILabelBinningFactory` that - * allows to create the implementation to be used to assign labels to bins - */ - LabelWiseFixedPartialBinnedRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, float64 l1RegularizationWeight, - float64 l2RegularizationWeight, std::unique_ptr labelBinningFactoryPtr); - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_single.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_single.hpp deleted file mode 100644 index d2349d82..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_single.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `ISparseLabelWiseRuleEvaluationFactory` that allow to calculate the - * predictions of single-label rules, which predict for a single label. - */ - class LabelWiseSingleLabelRuleEvaluationFactory final : public ISparseLabelWiseRuleEvaluationFactory { - private: - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - public: - - /** - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - */ - LabelWiseSingleLabelRuleEvaluationFactory(float64 l1RegularizationWeight, float64 l2RegularizationWeight); - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const override; - - std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp b/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp deleted file mode 100644 index f1e20bed..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_vector_label_wise_sparse.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise.hpp" - -namespace boosting { - - /** - * Defines an interface for all factories that allow to create instances of the type `IRuleEvaluation` that allow to - * calculate the predictions of rules, based on the gradients and Hessians that have been calculated according to a - * loss function that is applied label-wise and are stored using a sparse data structure. - */ - class ISparseLabelWiseRuleEvaluationFactory : public ILabelWiseRuleEvaluationFactory { - public: - - virtual ~ISparseLabelWiseRuleEvaluationFactory() override {}; - - // Keep "create" functions from the parent class rather than hiding them - using ILabelWiseRuleEvaluationFactory::create; - - /** - * Creates a new instance of the class `IRuleEvaluation` that allows to calculate the predictions of rules - * that predict for all available labels, based on the gradients and Hessians that are stored by a - * `SparseLabelWiseStatisticVector`. - * - * @param statisticVector A reference to an object of type `SparseLabelWiseStatisticVector`. This vector - * is only used to identify the function that is able to deal with this particular - * type of vector via function overloading - * @param indexVector A reference to an object of the type `CompleteIndexVector` that provides access - * to the indices of the labels for which the rules may predict - * @return An unique pointer to an object of type `IRuleEvaluation` that has been created - */ - virtual std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const = 0; - - /** - * Creates a new instance of the class `IRuleEvaluation` that allows to calculate the predictions of rules - * that predict for a subset of the available labels, based on the gradients and Hessians that are stored by - * a `SparseLabelWiseStatisticVector`. - * - * @param statisticVector A reference to an object of type `SparseLabelWiseStatisticVector`. This vector - * is only used to identify the function that is able to deal with this particular - * type of vector via function overloading - * @param indexVector A reference to an object of the type `PartialIndexVector` that provides access - * to the indices of the labels for which the rules may predict - * @return An unique pointer to an object of type `IRuleEvaluation` that has been created - */ - virtual std::unique_ptr> create( - const SparseLabelWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/rule_model_assemblage/default_rule_auto.hpp b/cpp/subprojects/boosting/include/boosting/rule_model_assemblage/default_rule_auto.hpp deleted file mode 100644 index 3c8b6659..00000000 --- a/cpp/subprojects/boosting/include/boosting/rule_model_assemblage/default_rule_auto.hpp +++ /dev/null @@ -1,46 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/rule_evaluation/head_type.hpp" -#include "boosting/statistics/statistic_format.hpp" -#include "common/rule_model_assemblage/default_rule.hpp" - -namespace boosting { - - /** - * Allows to configure a method that automatically decides whether a default rule should be included in a rule-based - * model or not. - */ - class AutomaticDefaultRuleConfig final : public IDefaultRuleConfig { - private: - - const std::unique_ptr& statisticsConfigPtr_; - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& headConfigPtr_; - - public: - - /** - * @param statisticsConfigPtr A reference to an unique pointer that stores the configuration of the - * statistics - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the loss - * function - * @param headConfigPtr A reference to an unique pointer that stores the configuration of the rule - * heads - */ - AutomaticDefaultRuleConfig(const std::unique_ptr& statisticsConfigPtr, - const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& headConfigPtr); - - /** - * @see `IDefaultRuleConfig::isDefaultRuleUsed` - */ - bool isDefaultRuleUsed(const IRowWiseLabelMatrix& labelMatrix) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/sampling/partition_sampling_auto.hpp b/cpp/subprojects/boosting/include/boosting/sampling/partition_sampling_auto.hpp deleted file mode 100644 index f5c220b0..00000000 --- a/cpp/subprojects/boosting/include/boosting/sampling/partition_sampling_auto.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "common/prediction/probability_calibration_joint.hpp" -#include "common/prediction/probability_calibration_marginal.hpp" -#include "common/sampling/partition_sampling.hpp" -#include "common/stopping/global_pruning.hpp" - -namespace boosting { - - /** - * Allows to configure a method that automatically decides for a method that partitions the available training - * examples into a training set and a holdout set, depending on whether a holdout set is needed and depending on the - * loss function. - */ - class AutomaticPartitionSamplingConfig final : public IPartitionSamplingConfig { - private: - - const std::unique_ptr& globalPruningConfigPtr_; - - const std::unique_ptr& marginalProbabilityCalibratorConfigPtr_; - - const std::unique_ptr& jointProbabilityCalibratorConfigPtr_; - - public: - - /** - * @param globalPruningConfigPtr A reference to an unique pointer that stores the - * configuration of the method that is used for pruning - * entire rules - * @param marginalProbabilityCalibratorConfigPtr A reference to an unique pointer that stores the - * configuration of the calibrator that is used to fit a - * model for the calibration of marginal probabilities - * @param jointProbabilityCalibratorConfigPtr A reference to an unique pointer that stores the - * configuration of the calibrator that is used to fit a - * model for the calibration of joint probabilities - */ - AutomaticPartitionSamplingConfig( - const std::unique_ptr& globalPruningConfigPtr, - const std::unique_ptr& marginalProbabilityCalibratorConfigPtr, - const std::unique_ptr& jointProbabilityCalibratorConfigPtr); - - /** - * @see `IPartitionSamplingConfig::createPartitionSamplingFactory` - */ - std::unique_ptr createPartitionSamplingFactory() const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistic_format.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistic_format.hpp deleted file mode 100644 index e01e6bad..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistic_format.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/blas.hpp" -#include "boosting/math/lapack.hpp" -#include "common/input/feature_matrix.hpp" -#include "common/input/label_matrix_row_wise.hpp" -#include "common/statistics/statistics_provider.hpp" - -namespace boosting { - - /** - * Returns whether a sparse representation of the gradients and Hessians should be preferred or not. - * - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise access to - * the labels of the training examples - * @param defaultRuleUsed True, if a default rule is used, false otherwise - * @param partialHeadsUsed True, if the partial heads are used by the rules, false otherwise - * @return True, if a sparse representation should be preferred, false otherwise - */ - static inline bool shouldSparseStatisticsBePreferred(const IRowWiseLabelMatrix& labelMatrix, bool defaultRuleUsed, - bool partialHeadsUsed) { - return labelMatrix.isSparse() && labelMatrix.getNumCols() > 120 && !defaultRuleUsed && partialHeadsUsed; - } - - /** - * Defines an interface for all classes that allow to configure which format should be used for storing statistics - * about the labels of the training examples. - */ - class IStatisticsConfig { - public: - - virtual ~IStatisticsConfig() {}; - - /** - * Creates and returns a new object of type `IStatisticsProviderFactory` according to the specified - * configuration. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the - * feature values of the training examples - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise access - * to the labels of the training examples - * @param blas A reference to an object of type `Blas` that allows to execute BLAS routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK routines - * @return An unique pointer to an object of type `IStatisticsProviderFactory` that has been - * created - */ - virtual std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack) const = 0; - - /** - * Returns whether a dense format is used for storing statistics about the labels of the training examples - * or not. - * - * @return True, if a dense format is used, false otherwise - */ - virtual bool isDense() const = 0; - - /** - * Returns whether a sparse format is used for storing statistics about the labels of the training examples - * or not. - * - * @return True, if a sparse format is used, false otherwise - */ - virtual bool isSparse() const = 0; - }; - -}; diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistic_format_auto.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistic_format_auto.hpp deleted file mode 100644 index 0832ea1a..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistic_format_auto.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/rule_evaluation/head_type.hpp" -#include "boosting/statistics/statistic_format.hpp" -#include "common/rule_model_assemblage/default_rule.hpp" - -namespace boosting { - - /** - * Allows to configure a method that automatically decides for a format for storing statistics about the labels of - * the training examples. - */ - class AutomaticStatisticsConfig final : public IStatisticsConfig { - private: - - const std::unique_ptr& lossConfigPtr_; - - const std::unique_ptr& headConfigPtr_; - - const std::unique_ptr& defaultRuleConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the loss - * function - * @param headConfigPtr A reference to an unique pointer that stores the configuration of the rule - * heads - * @param defaultRuleConfigPtr A reference to an unique pointer that stores the configuration of the - * default rule - */ - AutomaticStatisticsConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& headConfigPtr, - const std::unique_ptr& defaultRuleConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack) const override; - - bool isDense() const override; - - bool isSparse() const override; - }; - -}; diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistic_format_dense.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistic_format_dense.hpp deleted file mode 100644 index b5a9315f..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistic_format_dense.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/statistics/statistic_format.hpp" - -namespace boosting { - - /** - * Allows to configure a dense format for storing statistics about the labels of the training examples. - */ - class DenseStatisticsConfig final : public IStatisticsConfig { - private: - - const std::unique_ptr& lossConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the loss function - */ - DenseStatisticsConfig(const std::unique_ptr& lossConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack) const override; - - bool isDense() const override; - - bool isSparse() const override; - }; - -}; diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistic_format_sparse.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistic_format_sparse.hpp deleted file mode 100644 index 29ad582a..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistic_format_sparse.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss.hpp" -#include "boosting/statistics/statistic_format.hpp" - -namespace boosting { - - /** - * Allows to configure a sparse format for storing statistics about the labels of the training examples. - */ - class SparseStatisticsConfig final : public IStatisticsConfig { - private: - - const std::unique_ptr& lossConfigPtr_; - - public: - - /** - * @param lossConfigPtr A reference to an unique pointer that stores the configuration of the loss function - */ - SparseStatisticsConfig(const std::unique_ptr& lossConfigPtr); - - std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack) const override; - - bool isDense() const override; - - bool isSparse() const override; - }; - -}; diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistics.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistics.hpp deleted file mode 100644 index 1096fd09..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistics.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/matrix_sparse_set.hpp" -#include "common/data/view_c_contiguous.hpp" -#include "common/statistics/statistics.hpp" - -#include - -/** - * Defines an interface for all classes that provide access to gradients and Hessians which serve as the basis for - * learning a new boosted rule or refining an existing one. - */ -class IBoostingStatistics : public IStatistics { - public: - - virtual ~IBoostingStatistics() {}; - - /** - * A visitor function for handling score matrices of the type `CContiguousConstView`. - */ - typedef std::function&)> DenseScoreMatrixVisitor; - - /** - * A visitor function for handling score matrices of the type `SparseSetMatrix`. - */ - typedef std::function&)> SparseScoreMatrixVisitor; - - /** - * Invokes one of the given visitor functions, depending on which one is able to handle the type of matrix that - * is used to store the currently predicted scores. - * - * @param denseVisitor The visitor function for handling objects of the type `CContiguousConstView` - * @param sparseVisitor The visitor function for handling objects of the type `SparseSetMatrix` - */ - virtual void visitScoreMatrix(DenseScoreMatrixVisitor denseVisitor, - SparseScoreMatrixVisitor sparseVisitor) const = 0; -}; diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistics_example_wise.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistics_example_wise.hpp deleted file mode 100644 index b41e902e..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistics_example_wise.hpp +++ /dev/null @@ -1,51 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/rule_evaluation/rule_evaluation_example_wise.hpp" -#include "boosting/statistics/statistics_label_wise.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that store gradients and Hessians that have been calculated according to a - * differentiable loss-function that is applied example-wise. - * - * @tparam ExampleWiseRuleEvaluationFactory The type of the classes that may be used for calculating the - * example-wise predictions of rules, as well as their overall quality - * @tparam LabelWiseRuleEvaluationFactory The type of the classes that may be used for calculating the label-wise - * predictions of rules, as well as their overall quality - */ - template - class IExampleWiseStatistics : virtual public IBoostingStatistics { - public: - - virtual ~IExampleWiseStatistics() override {}; - - /** - * Sets the factory that allows to create instances of the class that is used for calculating the - * predictions of rules, as well as their overall quality. - * - * @param ruleEvaluationFactory A reference to an object of template type `ExampleWiseRuleEvaluationFactory` - * to be set - */ - virtual void setRuleEvaluationFactory(const ExampleWiseRuleEvaluationFactory& ruleEvaluationFactory) = 0; - - /** - * Creates and returns an instance of type `ILabelWiseStatistics` from the gradients and Hessians that are - * stored by this object. - * - * @param ruleEvaluationFactory A reference to an object of template type `LabelWiseRuleEvaluationFactory` - * that allows to create instances of the class that is used for calculating - * the predictions of rules, as well as their overall quality - * @param numThreads The number of threads that should be used to convert the statistics for - * individual examples in parallel - * @return An unique pointer to an object of type `ILabelWiseStatistics` that has been - * created - */ - virtual std::unique_ptr> toLabelWiseStatistics( - const LabelWiseRuleEvaluationFactory& ruleEvaluationFactory, uint32 numThreads) = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistics_label_wise.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistics_label_wise.hpp deleted file mode 100644 index 27a94bf7..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistics_label_wise.hpp +++ /dev/null @@ -1,33 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/rule_evaluation/rule_evaluation_label_wise.hpp" -#include "boosting/statistics/statistics.hpp" - -namespace boosting { - - /** - * Defines an interface for all classes that store gradients and Hessians that have been calculated according to a - * differentiable loss function that is applied label-wise. - * - * @tparam RuleEvaluationFactory The type of the classes that may be used for calculating the predictions of rules, - * as well as their overall quality - */ - template - class ILabelWiseStatistics : virtual public IBoostingStatistics { - public: - - virtual ~ILabelWiseStatistics() override {}; - - /** - * Sets the factory that allows to create instances of the class that is used for calculating the - * predictions of rules, as well as their overall quality - * - * @param ruleEvaluationFactory A reference to an object of template type `RuleEvaluationFactory` to be set - */ - virtual void setRuleEvaluationFactory(const RuleEvaluationFactory& ruleEvaluationFactory) = 0; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_example_wise_dense.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_example_wise_dense.hpp deleted file mode 100644 index 3905492e..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_example_wise_dense.hpp +++ /dev/null @@ -1,140 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - */ -#pragma once - -#include "boosting/losses/loss_example_wise.hpp" -#include "boosting/statistics/statistics_example_wise.hpp" -#include "common/statistics/statistics_provider.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `IStatisticsProvider` that provide access to an object of type - * `IExampleWiseStatistics`, which uses dense data structures to store the statistics. - */ - class DenseExampleWiseStatisticsProviderFactory final : public IStatisticsProviderFactory { - private: - - const std::unique_ptr lossFactoryPtr_; - - const std::unique_ptr evaluationMeasureFactoryPtr_; - - const std::unique_ptr defaultRuleEvaluationFactoryPtr_; - - const std::unique_ptr regularRuleEvaluationFactoryPtr_; - - const std::unique_ptr pruningRuleEvaluationFactoryPtr_; - - const uint32 numThreads_; - - public: - - /** - * @param lossFactoryPtr An unique pointer to an object of type `IExampleWiseLossFactory` - * that allows to create implementations of the loss function that - * should be used for calculating gradients and Hessians - * @param evaluationMeasureFactoryPtr An unique pointer to an object of type - * `IEvaluationMeasureFactory` that allows to create - * implementations of the evaluation measure that should be used - * for assessing the quality of predictions - * @param defaultRuleEvaluationFactoryPtr An unique pointer to an object of type - * `IExampleWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, of the default rule - * @param regularRuleEvaluationFactoryPtr An unique pointer to an object of type - * `IExampleWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, of all remaining rules - * @param pruningRuleEvaluationFactoryPtr An unique pointer to an object of type - * `IExampleWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, when pruning rules - * @param numThreads The number of CPU threads to be used to calculate the initial - * statistics in parallel. Must be at least 1 - */ - DenseExampleWiseStatisticsProviderFactory( - std::unique_ptr lossFactoryPtr, - std::unique_ptr evaluationMeasureFactoryPtr, - std::unique_ptr defaultRuleEvaluationFactoryPtr, - std::unique_ptr regularRuleEvaluationFactoryPtr, - std::unique_ptr pruningRuleEvaluationFactoryPtr, uint32 numThreads); - - /** - * @see `IStatisticsProviderFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& labelMatrix) const override; - - /** - * @see `IStatisticsProviderFactory::create` - */ - std::unique_ptr create(const BinaryCsrConstView& labelMatrix) const override; - }; - - /** - * Allows to create instances of the class `IStatisticsProvider` that provide access to an object of type - * `IExampleWiseStatistics`, which uses dense data structures to store the statistics and can be converted into an - * object of type `ILabelWiseStatistics`. - */ - class DenseConvertibleExampleWiseStatisticsProviderFactory final : public IStatisticsProviderFactory { - private: - - const std::unique_ptr lossFactoryPtr_; - - const std::unique_ptr evaluationMeasureFactoryPtr_; - - const std::unique_ptr defaultRuleEvaluationFactoryPtr_; - - const std::unique_ptr regularRuleEvaluationFactoryPtr_; - - const std::unique_ptr pruningRuleEvaluationFactoryPtr_; - - const uint32 numThreads_; - - public: - - /** - * @param lossFactoryPtr An unique pointer to an object of type `IExampleWiseLossFactory` - * that allows to create implementations of the loss function that - * should be used for calculating gradients and Hessians - * @param evaluationMeasureFactoryPtr An unique pointer to an object of type - * `IEvaluationMeasureFactory` that allows to create - * implementations of the evaluation measure that should be used - * for assessing the quality of predictions - * @param defaultRuleEvaluationFactoryPtr An unique pointer to an object of type - * `IExampleWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, of the default rule - * @param regularRuleEvaluationFactoryPtr An unique pointer to an object of type - * `ILabelWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, of all remaining rules - * @param pruningRuleEvaluationFactoryPtr An unique pointer to an object of type - * `ILabelWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, when pruning rules - * @param numThreads The number of CPU threads to be used to calculate the initial - * statistics in parallel. Must be at least 1 - */ - DenseConvertibleExampleWiseStatisticsProviderFactory( - std::unique_ptr lossFactoryPtr, - std::unique_ptr evaluationMeasureFactoryPtr, - std::unique_ptr defaultRuleEvaluationFactoryPtr, - std::unique_ptr regularRuleEvaluationFactoryPtr, - std::unique_ptr pruningRuleEvaluationFactoryPtr, uint32 numThreads); - - /** - * @see `IStatisticsProviderFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& labelMatrix) const override; - - /** - * @see `IStatisticsProviderFactory::create` - */ - std::unique_ptr create(const BinaryCsrConstView& labelMatrix) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_label_wise_dense.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_label_wise_dense.hpp deleted file mode 100644 index ed87e770..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_label_wise_dense.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - */ -#pragma once - -#include "boosting/losses/loss_label_wise.hpp" -#include "boosting/statistics/statistics_label_wise.hpp" -#include "common/statistics/statistics_provider.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `IStatisticsProvider` that provide access to an object of type - * `ILabelWiseStatistics`, which uses dense data structures to store the statistics. - */ - class DenseLabelWiseStatisticsProviderFactory final : public IStatisticsProviderFactory { - private: - - const std::unique_ptr lossFactoryPtr_; - - const std::unique_ptr evaluationMeasureFactoryPtr_; - - const std::unique_ptr defaultRuleEvaluationFactoryPtr_; - - const std::unique_ptr regularRuleEvaluationFactoryPtr_; - - const std::unique_ptr pruningRuleEvaluationFactoryPtr_; - - const uint32 numThreads_; - - public: - - /** - * @param lossFactoryPtr An unique pointer to an object of type `ILabelWiseLossFactory` - * that allows to create implementations of the loss function that - * should be used for calculating gradients and Hessians - * @param evaluationMeasureFactoryPtr An unique pointer to an object of type - * `IEvaluationMeasureFactory` that allows to create - * implementations of the evaluation measure that should be used - * for assessing the quality of predictions - * @param defaultRuleEvaluationFactoryPtr An unique pointer to an object of type - * `ILabelWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, of the default rule - * @param regularRuleEvaluationFactoryPtr An unique pointer to an object of type - * `ILabelWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, of all remaining rules - * @param pruningRuleEvaluationFactoryPtr An unique pointer to an object of type - * `ILabelWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, when pruning rules - * @param numThreads The number of CPU threads to be used to calculate the initial - * statistics in parallel. Must be at least 1 - */ - DenseLabelWiseStatisticsProviderFactory( - std::unique_ptr lossFactoryPtr, - std::unique_ptr evaluationMeasureFactoryPtr, - std::unique_ptr defaultRuleEvaluationFactoryPtr, - std::unique_ptr regularRuleEvaluationFactoryPtr, - std::unique_ptr pruningRuleEvaluationFactoryPtr, uint32 numThreads); - - /** - * @see `IStatisticsProviderFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& labelMatrix) const override; - - /** - * @see `IStatisticsProviderFactory::create` - */ - std::unique_ptr create(const BinaryCsrConstView& labelMatrix) const override; - }; - -} diff --git a/cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_label_wise_sparse.hpp b/cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_label_wise_sparse.hpp deleted file mode 100644 index 5f268349..00000000 --- a/cpp/subprojects/boosting/include/boosting/statistics/statistics_provider_label_wise_sparse.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/losses/loss_label_wise_sparse.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_sparse.hpp" -#include "boosting/statistics/statistics_label_wise.hpp" -#include "common/statistics/statistics_provider.hpp" - -namespace boosting { - - /** - * Allows to create instances of the class `IStatisticsProvider` that provide access to an object of type - * `ILabelWiseStatistics`, which uses sparse data structures to store the statistics. - */ - class SparseLabelWiseStatisticsProviderFactory final : public IStatisticsProviderFactory { - private: - - const std::unique_ptr lossFactoryPtr_; - - const std::unique_ptr evaluationMeasureFactoryPtr_; - - const std::unique_ptr regularRuleEvaluationFactoryPtr_; - - const std::unique_ptr pruningRuleEvaluationFactoryPtr_; - - const uint32 numThreads_; - - public: - - /** - * @param lossFactoryPtr An unique pointer to an object of type - * `ISparseLabelWiseLossFactory` that allows to create - * implementations of the loss function that should be used for - * calculating gradients and Hessians - * @param evaluationMeasureFactoryPtr An unique pointer to an object of type - * `ISparseEvaluationMeasureFactory` that allows to create - * implementations of the evaluation measure that should be used - * for assessing the quality of predictions - * @param regularRuleEvaluationFactoryPtr An unique pointer to an object of type - * `ISparseLabelWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, of all remaining rules - * @param pruningRuleEvaluationFactoryPtr An unique pointer to an object of type - * `ISparseLabelWiseRuleEvaluationFactory` that should be used for - * calculating the predictions, as well as corresponding quality - * scores, when pruning rules - * @param numThreads The number of CPU threads to be used to calculate the initial - * statistics in parallel. Must be at least 1 - */ - SparseLabelWiseStatisticsProviderFactory( - std::unique_ptr lossFactoryPtr, - std::unique_ptr evaluationMeasureFactoryPtr, - std::unique_ptr regularRuleEvaluationFactoryPtr, - std::unique_ptr pruningRuleEvaluationFactoryPtr, - uint32 numThreads); - - /** - * @see `IStatisticsProviderFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& labelMatrix) const override; - - /** - * @see `IStatisticsProviderFactory::create` - */ - std::unique_ptr create(const BinaryCsrConstView& labelMatrix) const override; - }; - -} diff --git a/cpp/subprojects/boosting/meson.build b/cpp/subprojects/boosting/meson.build deleted file mode 100644 index 9cb30d13..00000000 --- a/cpp/subprojects/boosting/meson.build +++ /dev/null @@ -1,109 +0,0 @@ -project('boosting', 'cpp') - -# Source files -source_files = [ - 'src/boosting/binning/feature_binning_auto.cpp', - 'src/boosting/binning/label_binning_auto.cpp', - 'src/boosting/binning/label_binning_equal_width.cpp', - 'src/boosting/binning/label_binning_no.cpp', - 'src/boosting/data/histogram_view_label_wise_sparse.cpp', - 'src/boosting/data/matrix_c_contiguous_numeric.cpp', - 'src/boosting/data/matrix_sparse_set_numeric.cpp', - 'src/boosting/data/statistic_vector_example_wise_dense.cpp', - 'src/boosting/data/statistic_vector_label_wise_dense.cpp', - 'src/boosting/data/statistic_vector_label_wise_sparse.cpp', - 'src/boosting/data/statistic_view_example_wise_dense.cpp', - 'src/boosting/data/statistic_view_label_wise_dense.cpp', - 'src/boosting/data/statistic_view_label_wise_sparse.cpp', - 'src/boosting/iterator/diagonal_iterator.cpp', - 'src/boosting/losses/loss_example_wise_logistic.cpp', - 'src/boosting/losses/loss_example_wise_squared_error.cpp', - 'src/boosting/losses/loss_example_wise_squared_hinge.cpp', - 'src/boosting/losses/loss_label_wise_logistic.cpp', - 'src/boosting/losses/loss_label_wise_squared_error.cpp', - 'src/boosting/losses/loss_label_wise_squared_hinge.cpp', - 'src/boosting/math/blas.cpp', - 'src/boosting/math/lapack.cpp', - 'src/boosting/model/rule_list_builder.cpp', - 'src/boosting/multi_threading/parallel_rule_refinement_auto.cpp', - 'src/boosting/multi_threading/parallel_statistic_update_auto.cpp', - 'src/boosting/post_processing/shrinkage_constant.cpp', - 'src/boosting/prediction/discretization_function_probability.cpp', - 'src/boosting/prediction/discretization_function_score.cpp', - 'src/boosting/prediction/predictor_binary_auto.cpp', - 'src/boosting/prediction/predictor_binary_example_wise.cpp', - 'src/boosting/prediction/predictor_binary_gfm.cpp', - 'src/boosting/prediction/predictor_binary_label_wise.cpp', - 'src/boosting/prediction/predictor_probability_auto.cpp', - 'src/boosting/prediction/predictor_probability_label_wise.cpp', - 'src/boosting/prediction/predictor_probability_marginalized.cpp', - 'src/boosting/prediction/predictor_score_label_wise.cpp', - 'src/boosting/prediction/probability_calibration_isotonic.cpp', - 'src/boosting/prediction/probability_function_chain_rule.cpp', - 'src/boosting/prediction/probability_function_logistic.cpp', - 'src/boosting/prediction/transformation_binary_example_wise.cpp', - 'src/boosting/prediction/transformation_binary_gfm.cpp', - 'src/boosting/prediction/transformation_binary_label_wise.cpp', - 'src/boosting/prediction/transformation_probability_label_wise.cpp', - 'src/boosting/prediction/transformation_probability_marginalized.cpp', - 'src/boosting/rule_evaluation/head_type_auto.cpp', - 'src/boosting/rule_evaluation/head_type_complete.cpp', - 'src/boosting/rule_evaluation/head_type_partial_dynamic.cpp', - 'src/boosting/rule_evaluation/head_type_partial_fixed.cpp', - 'src/boosting/rule_evaluation/head_type_single.cpp', - 'src/boosting/rule_evaluation/regularization_manual.cpp', - 'src/boosting/rule_evaluation/regularization_no.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_example_wise_complete.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_label_wise_complete.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.cpp', - 'src/boosting/rule_evaluation/rule_evaluation_label_wise_single.cpp', - 'src/boosting/rule_model_assemblage/default_rule_auto.cpp', - 'src/boosting/sampling/partition_sampling_auto.cpp', - 'src/boosting/statistics/statistic_format_auto.cpp', - 'src/boosting/statistics/statistic_format_dense.cpp', - 'src/boosting/statistics/statistic_format_sparse.cpp', - 'src/boosting/statistics/statistics_provider_example_wise_dense.cpp', - 'src/boosting/statistics/statistics_provider_label_wise_dense.cpp', - 'src/boosting/statistics/statistics_provider_label_wise_sparse.cpp', - 'src/boosting/learner.cpp', - 'src/boosting/learner_boomer.cpp' -] - -# Dependencies -common_project = subproject('common') -common_dep = common_project.get_variable('common_dep') - -dependencies = [ - common_dep -] - -# Directory containing public headers -include_directories = include_directories('include') - -# Directory into which the library should be installed -install_root = common_project.get_variable('install_root') -install_dir = install_root / meson.project_name() / 'mlrl' / meson.project_name() / 'cython/' - -# Library version -version = common_project.get_variable('version') - -# Library declaration -cpp_args = common_project.get_variable('cpp_args') -link_args = common_project.get_variable('link_args') - -if host_machine.system() == 'windows' - cpp_args += '-DMLRLBOOSTING_EXPORTS' -endif - -boosting_lib = library('mlrl' + meson.project_name(), source_files, include_directories : include_directories, - dependencies : dependencies, cpp_args : cpp_args, link_args : link_args, version : version, - install : true, install_dir : install_dir) diff --git a/cpp/subprojects/boosting/src/boosting/binning/feature_binning_auto.cpp b/cpp/subprojects/boosting/src/boosting/binning/feature_binning_auto.cpp deleted file mode 100644 index b1ef0ace..00000000 --- a/cpp/subprojects/boosting/src/boosting/binning/feature_binning_auto.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "boosting/binning/feature_binning_auto.hpp" - -#include "common/binning/feature_binning_equal_width.hpp" -#include "common/binning/feature_binning_no.hpp" - -namespace boosting { - - AutomaticFeatureBinningConfig::AutomaticFeatureBinningConfig( - const std::unique_ptr& multiThreadingConfigPtr) - : multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - std::unique_ptr AutomaticFeatureBinningConfig::createThresholdsFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - if (!featureMatrix.isSparse() && featureMatrix.getNumRows() > 200000) { - return EqualWidthFeatureBinningConfig(multiThreadingConfigPtr_) - .createThresholdsFactory(featureMatrix, labelMatrix); - } else { - return NoFeatureBinningConfig(multiThreadingConfigPtr_).createThresholdsFactory(featureMatrix, labelMatrix); - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/binning/label_binning_auto.cpp b/cpp/subprojects/boosting/src/boosting/binning/label_binning_auto.cpp deleted file mode 100644 index 99cac988..00000000 --- a/cpp/subprojects/boosting/src/boosting/binning/label_binning_auto.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "boosting/binning/label_binning_auto.hpp" - -#include "boosting/binning/label_binning_equal_width.hpp" -#include "boosting/binning/label_binning_no.hpp" - -namespace boosting { - - AutomaticLabelBinningConfig::AutomaticLabelBinningConfig( - const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr) - : l1RegularizationConfigPtr_(l1RegularizationConfigPtr), l2RegularizationConfigPtr_(l2RegularizationConfigPtr) { - - } - - std::unique_ptr - AutomaticLabelBinningConfig::createLabelWiseCompleteRuleEvaluationFactory() const { - return NoLabelBinningConfig(l1RegularizationConfigPtr_, l2RegularizationConfigPtr_) - .createLabelWiseCompleteRuleEvaluationFactory(); - } - - std::unique_ptr - AutomaticLabelBinningConfig::createLabelWiseFixedPartialRuleEvaluationFactory(float32 labelRatio, - uint32 minLabels, - uint32 maxLabels) const { - return NoLabelBinningConfig(l1RegularizationConfigPtr_, l2RegularizationConfigPtr_) - .createLabelWiseFixedPartialRuleEvaluationFactory(labelRatio, minLabels, maxLabels); - } - - std::unique_ptr - AutomaticLabelBinningConfig::createLabelWiseDynamicPartialRuleEvaluationFactory(float32 threshold, - float32 exponent) const { - return NoLabelBinningConfig(l1RegularizationConfigPtr_, l2RegularizationConfigPtr_) - .createLabelWiseDynamicPartialRuleEvaluationFactory(threshold, exponent); - } - - std::unique_ptr - AutomaticLabelBinningConfig::createExampleWiseCompleteRuleEvaluationFactory(const Blas& blas, - const Lapack& lapack) const { - return EqualWidthLabelBinningConfig(l1RegularizationConfigPtr_, l2RegularizationConfigPtr_) - .createExampleWiseCompleteRuleEvaluationFactory(blas, lapack); - } - - std::unique_ptr - AutomaticLabelBinningConfig::createExampleWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, const Blas& blas, const Lapack& lapack) const { - return EqualWidthLabelBinningConfig(l1RegularizationConfigPtr_, l2RegularizationConfigPtr_) - .createExampleWiseFixedPartialRuleEvaluationFactory(labelRatio, minLabels, maxLabels, blas, lapack); - } - - std::unique_ptr - AutomaticLabelBinningConfig::createExampleWiseDynamicPartialRuleEvaluationFactory(float32 threshold, - float32 exponent, - const Blas& blas, - const Lapack& lapack) const { - return EqualWidthLabelBinningConfig(l1RegularizationConfigPtr_, l2RegularizationConfigPtr_) - .createExampleWiseDynamicPartialRuleEvaluationFactory(threshold, exponent, blas, lapack); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/binning/label_binning_equal_width.cpp b/cpp/subprojects/boosting/src/boosting/binning/label_binning_equal_width.cpp deleted file mode 100644 index 2c5fbf20..00000000 --- a/cpp/subprojects/boosting/src/boosting/binning/label_binning_equal_width.cpp +++ /dev/null @@ -1,272 +0,0 @@ -#include "boosting/binning/label_binning_equal_width.hpp" - -#include "boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.hpp" -#include "common/math/math.hpp" -#include "common/util/validation.hpp" - -namespace boosting { - - /** - * Assigns labels to bins, based on the corresponding gradients and Hessians, in a way such that each bin contains - * labels for which the predicted score is expected to belong to the same value range. - */ - class EqualWidthLabelBinning final : public ILabelBinning { - private: - - const float32 binRatio_; - - const uint32 minBins_; - - const uint32 maxBins_; - - public: - - /** - * @param binRatio A percentage that specifies how many bins should be used to assign labels to, e.g., if - * 100 labels are available, 0.5 means that `ceil(0.5 * 100) = 50` bins should be used. - * Must be in (0, 1) - * @param minBins The minimum number of bins to be used to assign labels to. Must be at least 2 - * @param maxBins The maximum number of bins to be used to assign labels to. Must be at least `minBins` or - * 0, if the maximum number of bins should not be restricted - */ - EqualWidthLabelBinning(float32 binRatio, uint32 minBins, uint32 maxBins) - : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - - uint32 getMaxBins(uint32 numLabels) const override { - return calculateBoundedFraction(numLabels, binRatio_, minBins_, maxBins_) + 1; - } - - LabelInfo getLabelInfo(const float64* criteria, uint32 numElements) const override { - LabelInfo labelInfo; - labelInfo.numNegativeBins = 0; - labelInfo.numPositiveBins = 0; - - if (numElements > 0) { - labelInfo.minNegative = 0; - labelInfo.maxNegative = -std::numeric_limits::infinity(); - labelInfo.minPositive = std::numeric_limits::infinity(); - labelInfo.maxPositive = 0; - - for (uint32 i = 0; i < numElements; i++) { - float64 criterion = criteria[i]; - - if (criterion < 0) { - labelInfo.numNegativeBins++; - - if (criterion < labelInfo.minNegative) { - labelInfo.minNegative = criterion; - } - - if (criterion > labelInfo.maxNegative) { - labelInfo.maxNegative = criterion; - } - } else if (criterion > 0) { - labelInfo.numPositiveBins++; - - if (criterion < labelInfo.minPositive) { - labelInfo.minPositive = criterion; - } - - if (criterion > labelInfo.maxPositive) { - labelInfo.maxPositive = criterion; - } - } - } - - if (labelInfo.numNegativeBins > 0) { - labelInfo.numNegativeBins = - calculateBoundedFraction(labelInfo.numNegativeBins, binRatio_, minBins_, maxBins_); - } - - if (labelInfo.numPositiveBins > 0) { - labelInfo.numPositiveBins = - calculateBoundedFraction(labelInfo.numPositiveBins, binRatio_, minBins_, maxBins_); - } - } - - return labelInfo; - } - - void createBins(LabelInfo labelInfo, const float64* criteria, uint32 numElements, Callback callback, - ZeroCallback zeroCallback) const override { - uint32 numNegativeBins = labelInfo.numNegativeBins; - float64 minNegative = labelInfo.minNegative; - float64 maxNegative = labelInfo.maxNegative; - uint32 numPositiveBins = labelInfo.numPositiveBins; - float64 minPositive = labelInfo.minPositive; - float64 maxPositive = labelInfo.maxPositive; - - float64 spanPerNegativeBin = minNegative < 0 ? (maxNegative - minNegative) / numNegativeBins : 0; - float64 spanPerPositiveBin = maxPositive > 0 ? (maxPositive - minPositive) / numPositiveBins : 0; - - for (uint32 i = 0; i < numElements; i++) { - float64 criterion = criteria[i]; - - if (criterion < 0) { - uint32 binIndex = (uint32) std::floor((criterion - minNegative) / spanPerNegativeBin); - - if (binIndex >= numNegativeBins) { - binIndex = numNegativeBins - 1; - } - - callback(binIndex, i); - } else if (criterion > 0) { - uint32 binIndex = (uint32) std::floor((criterion - minPositive) / spanPerPositiveBin); - - if (binIndex >= numPositiveBins) { - binIndex = numPositiveBins - 1; - } - - callback(numNegativeBins + binIndex, i); - } else { - zeroCallback(i); - } - } - } - }; - - /** - * Allows to create instances of the class `EqualWidthLabelBinning` that assign labels to bins in a way such that - * each bin contains labels for which the predicted score is expected to belong to the same value range. - */ - class EqualWidthLabelBinningFactory final : public ILabelBinningFactory { - private: - - const float32 binRatio_; - - const uint32 minBins_; - - const uint32 maxBins_; - - public: - - /** - * @param binRatio A percentage that specifies how many bins should be used, e.g., if 100 labels are a - * available, a percentage of 0.5 means that `ceil(0.5 * 100) = 50` bins should be used. - * Must be in (0, 1) - * @param minBins The minimum number of bins that should be used. Must be at least 2 - * @param maxBins The maximum number of bins that should be used. Must be at least `minBins` or 0, if the - * maximum number of bins should not be restricted - */ - EqualWidthLabelBinningFactory(float32 binRatio, uint32 minBins, uint32 maxBins) - : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - - std::unique_ptr create() const override { - return std::make_unique(binRatio_, minBins_, maxBins_); - } - }; - - EqualWidthLabelBinningConfig::EqualWidthLabelBinningConfig( - const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr) - : binRatio_(0.04f), minBins_(1), maxBins_(0), l1RegularizationConfigPtr_(l1RegularizationConfigPtr), - l2RegularizationConfigPtr_(l2RegularizationConfigPtr) {} - - float32 EqualWidthLabelBinningConfig::getBinRatio() const { - return binRatio_; - } - - IEqualWidthLabelBinningConfig& EqualWidthLabelBinningConfig::setBinRatio(float32 binRatio) { - assertGreater("binRatio", binRatio, 0); - assertLess("binRatio", binRatio, 1); - binRatio_ = binRatio; - return *this; - } - - uint32 EqualWidthLabelBinningConfig::getMinBins() const { - return minBins_; - } - - IEqualWidthLabelBinningConfig& EqualWidthLabelBinningConfig::setMinBins(uint32 minBins) { - assertGreaterOrEqual("minBins", minBins, 1); - minBins_ = minBins; - return *this; - } - - uint32 EqualWidthLabelBinningConfig::getMaxBins() const { - return maxBins_; - } - - IEqualWidthLabelBinningConfig& EqualWidthLabelBinningConfig::setMaxBins(uint32 maxBins) { - if (maxBins != 0) assertGreaterOrEqual("maxBins", maxBins, minBins_); - maxBins_ = maxBins; - return *this; - } - - std::unique_ptr - EqualWidthLabelBinningConfig::createLabelWiseCompleteRuleEvaluationFactory() const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - std::unique_ptr labelBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - return std::make_unique( - l1RegularizationWeight, l2RegularizationWeight, std::move(labelBinningFactoryPtr)); - } - - std::unique_ptr - EqualWidthLabelBinningConfig::createLabelWiseFixedPartialRuleEvaluationFactory(float32 labelRatio, - uint32 minLabels, - uint32 maxLabels) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - std::unique_ptr labelBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - return std::make_unique( - labelRatio, minLabels, maxLabels, l1RegularizationWeight, l2RegularizationWeight, - std::move(labelBinningFactoryPtr)); - } - - std::unique_ptr - EqualWidthLabelBinningConfig::createLabelWiseDynamicPartialRuleEvaluationFactory(float32 threshold, - float32 exponent) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - std::unique_ptr labelBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - return std::make_unique( - threshold, exponent, l1RegularizationWeight, l2RegularizationWeight, std::move(labelBinningFactoryPtr)); - } - - std::unique_ptr - EqualWidthLabelBinningConfig::createExampleWiseCompleteRuleEvaluationFactory(const Blas& blas, - const Lapack& lapack) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - std::unique_ptr labelBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - return std::make_unique( - l1RegularizationWeight, l2RegularizationWeight, std::move(labelBinningFactoryPtr), blas, lapack); - } - - std::unique_ptr - EqualWidthLabelBinningConfig::createExampleWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, const Blas& blas, const Lapack& lapack) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - std::unique_ptr labelBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - return std::make_unique( - labelRatio, minLabels, maxLabels, l1RegularizationWeight, l2RegularizationWeight, - std::move(labelBinningFactoryPtr), blas, lapack); - } - - std::unique_ptr - EqualWidthLabelBinningConfig::createExampleWiseDynamicPartialRuleEvaluationFactory(float32 threshold, - float32 exponent, - const Blas& blas, - const Lapack& lapack) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - std::unique_ptr labelBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - return std::make_unique( - threshold, exponent, l1RegularizationWeight, l2RegularizationWeight, std::move(labelBinningFactoryPtr), blas, - lapack); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/binning/label_binning_no.cpp b/cpp/subprojects/boosting/src/boosting/binning/label_binning_no.cpp deleted file mode 100644 index eaa63e9d..00000000 --- a/cpp/subprojects/boosting/src/boosting/binning/label_binning_no.cpp +++ /dev/null @@ -1,72 +0,0 @@ -#include "boosting/binning/label_binning_no.hpp" - -#include "boosting/rule_evaluation/rule_evaluation_example_wise_complete.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.hpp" -#include "boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_complete.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.hpp" -#include "boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.hpp" - -namespace boosting { - - NoLabelBinningConfig::NoLabelBinningConfig(const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr) - : l1RegularizationConfigPtr_(l1RegularizationConfigPtr), l2RegularizationConfigPtr_(l2RegularizationConfigPtr) { - - } - - std::unique_ptr - NoLabelBinningConfig::createLabelWiseCompleteRuleEvaluationFactory() const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - return std::make_unique(l1RegularizationWeight, l2RegularizationWeight); - } - - std::unique_ptr - NoLabelBinningConfig::createLabelWiseFixedPartialRuleEvaluationFactory(float32 labelRatio, uint32 minLabels, - uint32 maxLabels) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - return std::make_unique( - labelRatio, minLabels, maxLabels, l1RegularizationWeight, l2RegularizationWeight); - } - - std::unique_ptr - NoLabelBinningConfig::createLabelWiseDynamicPartialRuleEvaluationFactory(float32 threshold, - float32 exponent) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - return std::make_unique( - threshold, exponent, l1RegularizationWeight, l2RegularizationWeight); - } - - std::unique_ptr - NoLabelBinningConfig::createExampleWiseCompleteRuleEvaluationFactory(const Blas& blas, - const Lapack& lapack) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - return std::make_unique(l1RegularizationWeight, - l2RegularizationWeight, blas, lapack); - } - - std::unique_ptr - NoLabelBinningConfig::createExampleWiseFixedPartialRuleEvaluationFactory(float32 labelRatio, uint32 minLabels, - uint32 maxLabels, const Blas& blas, - const Lapack& lapack) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - return std::make_unique( - labelRatio, minLabels, maxLabels, l1RegularizationWeight, l2RegularizationWeight, blas, lapack); - } - - std::unique_ptr - NoLabelBinningConfig::createExampleWiseDynamicPartialRuleEvaluationFactory(float32 threshold, float32 exponent, - const Blas& blas, - const Lapack& lapack) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - return std::make_unique( - threshold, exponent, l1RegularizationWeight, l2RegularizationWeight, blas, lapack); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/histogram_view_label_wise_sparse.cpp b/cpp/subprojects/boosting/src/boosting/data/histogram_view_label_wise_sparse.cpp deleted file mode 100644 index 1a4b7adf..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/histogram_view_label_wise_sparse.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "boosting/data/histogram_view_label_wise_sparse.hpp" - -#include "common/data/arrays.hpp" -#include "statistic_vector_label_wise_sparse_common.hpp" - -namespace boosting { - - SparseLabelWiseHistogramConstView::SparseLabelWiseHistogramConstView(uint32 numRows, uint32 numCols, - Triple* statistics, float64* weights) - : numRows_(numRows), numCols_(numCols), statistics_(statistics), weights_(weights) {} - - SparseLabelWiseHistogramConstView::const_iterator SparseLabelWiseHistogramConstView::cbegin(uint32 row) const { - return &statistics_[row * numCols_]; - } - - SparseLabelWiseHistogramConstView::const_iterator SparseLabelWiseHistogramConstView::cend(uint32 row) const { - return &statistics_[(row + 1) * numCols_]; - } - - SparseLabelWiseHistogramConstView::weight_const_iterator SparseLabelWiseHistogramConstView::weights_cbegin() const { - return weights_; - } - - SparseLabelWiseHistogramConstView::weight_const_iterator SparseLabelWiseHistogramConstView::weights_cend() const { - return &weights_[numRows_]; - } - - uint32 SparseLabelWiseHistogramConstView::getNumRows() const { - return numRows_; - } - - uint32 SparseLabelWiseHistogramConstView::getNumCols() const { - return numCols_; - } - - SparseLabelWiseHistogramView::SparseLabelWiseHistogramView(uint32 numRows, uint32 numCols, - Triple* statistics, float64* weights) - : SparseLabelWiseHistogramConstView(numRows, numCols, statistics, weights) {} - - void SparseLabelWiseHistogramView::clear() { - setArrayToZeros(weights_, numRows_); - setArrayToZeros(statistics_, numRows_ * numCols_); - } - - void SparseLabelWiseHistogramView::addToRow(uint32 row, SparseLabelWiseStatisticConstView::const_iterator begin, - SparseLabelWiseStatisticConstView::const_iterator end, float64 weight) { - if (weight != 0) { - weights_[row] += weight; - addToSparseLabelWiseStatisticVector(&statistics_[row * numCols_], begin, end, weight); - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/matrix_c_contiguous_numeric.cpp b/cpp/subprojects/boosting/src/boosting/data/matrix_c_contiguous_numeric.cpp deleted file mode 100644 index d95141a3..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/matrix_c_contiguous_numeric.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "boosting/data/matrix_c_contiguous_numeric.hpp" - -namespace boosting { - - template - NumericCContiguousMatrix::NumericCContiguousMatrix(uint32 numRows, uint32 numCols) - : CContiguousMatrix(numRows, numCols) {} - - template - NumericCContiguousMatrix::NumericCContiguousMatrix(uint32 numRows, uint32 numCols, bool init) - : CContiguousMatrix(numRows, numCols, init) {} - - template - void NumericCContiguousMatrix::addToRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd) { - typename NumericCContiguousMatrix::value_iterator iterator = this->values_begin(row); - uint32 numCols = this->getNumCols(); - - for (uint32 i = 0; i < numCols; i++) { - iterator[i] += begin[i]; - } - } - - template - void NumericCContiguousMatrix::addToRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd) { - typename NumericCContiguousMatrix::value_iterator iterator = this->values_begin(row); - uint32 numCols = indicesEnd - indicesBegin; - - for (uint32 i = 0; i < numCols; i++) { - uint32 index = indicesBegin[i]; - iterator[index] += begin[i]; - } - } - - template - void NumericCContiguousMatrix::removeFromRowFromSubset(uint32 row, - typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd) { - typename NumericCContiguousMatrix::value_iterator iterator = this->values_begin(row); - uint32 numCols = this->getNumCols(); - - for (uint32 i = 0; i < numCols; i++) { - iterator[i] -= begin[i]; - } - } - - template - void NumericCContiguousMatrix::removeFromRowFromSubset(uint32 row, - typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd) { - typename NumericCContiguousMatrix::value_iterator iterator = this->values_begin(row); - uint32 numCols = indicesEnd - indicesBegin; - - for (uint32 i = 0; i < numCols; i++) { - uint32 index = indicesBegin[i]; - iterator[index] -= begin[i]; - } - } - - template class NumericCContiguousMatrix; - template class NumericCContiguousMatrix; - template class NumericCContiguousMatrix; - template class NumericCContiguousMatrix; - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/matrix_sparse_set_numeric.cpp b/cpp/subprojects/boosting/src/boosting/data/matrix_sparse_set_numeric.cpp deleted file mode 100644 index 08744014..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/matrix_sparse_set_numeric.cpp +++ /dev/null @@ -1,76 +0,0 @@ -#include "boosting/data/matrix_sparse_set_numeric.hpp" - -namespace boosting { - - template - static inline void addToRowFromSubsetInternally(typename NumericSparseSetMatrix::row row, - typename VectorConstView::const_iterator iterator, - IndexIterator indexIterator, uint32 numElements) { - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indexIterator[i]; - IndexedValue& entry = row.emplace(index, 0); - entry.value += iterator[i]; - } - } - - template - static inline void removeFromRowFromSubsetInternally(typename NumericSparseSetMatrix::row row, - typename VectorConstView::const_iterator iterator, - IndexIterator indexIterator, uint32 numElements) { - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indexIterator[i]; - IndexedValue& entry = row.emplace(index, 0); - entry.value -= iterator[i]; - } - } - - template - NumericSparseSetMatrix::NumericSparseSetMatrix(uint32 numRows, uint32 numCols) - : SparseSetMatrix(numRows, numCols) {} - - template - void NumericSparseSetMatrix::addToRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd) { - addToRowFromSubsetInternally((*this)[row], begin, indicesBegin, - this->getNumCols()); - } - - template - void NumericSparseSetMatrix::addToRowFromSubset(uint32 row, typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd) { - uint32 numElements = indicesEnd - indicesBegin; - addToRowFromSubsetInternally((*this)[row], begin, indicesBegin, - numElements); - } - - template - void NumericSparseSetMatrix::removeFromRowFromSubset(uint32 row, - typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd) { - removeFromRowFromSubsetInternally((*this)[row], begin, indicesBegin, - this->getNumCols()); - } - - template - void NumericSparseSetMatrix::removeFromRowFromSubset(uint32 row, - typename VectorConstView::const_iterator begin, - typename VectorConstView::const_iterator end, - PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd) { - uint32 numElements = indicesEnd - indicesBegin; - removeFromRowFromSubsetInternally((*this)[row], begin, indicesBegin, - numElements); - } - - template class NumericSparseSetMatrix; - template class NumericSparseSetMatrix; - template class NumericSparseSetMatrix; - template class NumericSparseSetMatrix; - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/statistic_vector_example_wise_dense.cpp b/cpp/subprojects/boosting/src/boosting/data/statistic_vector_example_wise_dense.cpp deleted file mode 100644 index 02434dd9..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/statistic_vector_example_wise_dense.cpp +++ /dev/null @@ -1,189 +0,0 @@ -#include "boosting/data/statistic_vector_example_wise_dense.hpp" - -#include "boosting/data/arrays.hpp" -#include "boosting/math/math.hpp" -#include "common/data/arrays.hpp" - -#include - -namespace boosting { - - DenseExampleWiseStatisticVector::DenseExampleWiseStatisticVector(uint32 numGradients) - : DenseExampleWiseStatisticVector(numGradients, false) {} - - DenseExampleWiseStatisticVector::DenseExampleWiseStatisticVector(uint32 numGradients, bool init) - : numGradients_(numGradients), numHessians_(triangularNumber(numGradients)), - gradients_( - (float64*) (init ? calloc(numGradients, sizeof(float64)) : malloc(numGradients * sizeof(float64)))), - hessians_( - (float64*) (init ? calloc(numHessians_, sizeof(float64)) : malloc(numHessians_ * sizeof(float64)))) {} - - DenseExampleWiseStatisticVector::DenseExampleWiseStatisticVector(const DenseExampleWiseStatisticVector& vector) - : DenseExampleWiseStatisticVector(vector.numGradients_) { - copyArray(vector.gradients_, gradients_, numGradients_); - copyArray(vector.hessians_, hessians_, numHessians_); - } - - DenseExampleWiseStatisticVector::~DenseExampleWiseStatisticVector() { - free(gradients_); - free(hessians_); - } - - DenseExampleWiseStatisticVector::gradient_iterator DenseExampleWiseStatisticVector::gradients_begin() { - return gradients_; - } - - DenseExampleWiseStatisticVector::gradient_iterator DenseExampleWiseStatisticVector::gradients_end() { - return &gradients_[numGradients_]; - } - - DenseExampleWiseStatisticVector::gradient_const_iterator DenseExampleWiseStatisticVector::gradients_cbegin() const { - return gradients_; - } - - DenseExampleWiseStatisticVector::gradient_const_iterator DenseExampleWiseStatisticVector::gradients_cend() const { - return &gradients_[numGradients_]; - } - - DenseExampleWiseStatisticVector::hessian_iterator DenseExampleWiseStatisticVector::hessians_begin() { - return hessians_; - } - - DenseExampleWiseStatisticVector::hessian_iterator DenseExampleWiseStatisticVector::hessians_end() { - return &hessians_[numHessians_]; - } - - DenseExampleWiseStatisticVector::hessian_const_iterator DenseExampleWiseStatisticVector::hessians_cbegin() const { - return hessians_; - } - - DenseExampleWiseStatisticVector::hessian_const_iterator DenseExampleWiseStatisticVector::hessians_cend() const { - return &hessians_[numHessians_]; - } - - DenseExampleWiseStatisticVector::hessian_diagonal_const_iterator - DenseExampleWiseStatisticVector::hessians_diagonal_cbegin() const { - return DiagonalConstIterator(hessians_, 0); - } - - DenseExampleWiseStatisticVector::hessian_diagonal_const_iterator - DenseExampleWiseStatisticVector::hessians_diagonal_cend() const { - return DiagonalConstIterator(hessians_, numGradients_); - } - - uint32 DenseExampleWiseStatisticVector::getNumElements() const { - return numGradients_; - } - - void DenseExampleWiseStatisticVector::clear() { - setArrayToZeros(gradients_, numGradients_); - setArrayToZeros(hessians_, numHessians_); - } - - void DenseExampleWiseStatisticVector::add(gradient_const_iterator gradientsBegin, - gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, - hessian_const_iterator hessiansEnd) { - addToArray(gradients_, gradientsBegin, numGradients_); - addToArray(hessians_, hessiansBegin, numHessians_); - } - - void DenseExampleWiseStatisticVector::add(gradient_const_iterator gradientsBegin, - gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, hessian_const_iterator hessiansEnd, - float64 weight) { - addToArray(gradients_, gradientsBegin, numGradients_, weight); - addToArray(hessians_, hessiansBegin, numHessians_, weight); - } - - void DenseExampleWiseStatisticVector::remove(gradient_const_iterator gradientsBegin, - gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, - hessian_const_iterator hessiansEnd) { - removeFromArray(gradients_, gradientsBegin, numGradients_); - removeFromArray(hessians_, hessiansBegin, numHessians_); - } - - void DenseExampleWiseStatisticVector::remove(gradient_const_iterator gradientsBegin, - gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, - hessian_const_iterator hessiansEnd, float64 weight) { - removeFromArray(gradients_, gradientsBegin, numGradients_, weight); - removeFromArray(hessians_, hessiansBegin, numHessians_, weight); - } - - void DenseExampleWiseStatisticVector::addToSubset(gradient_const_iterator gradientsBegin, - gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, - hessian_const_iterator hessiansEnd, - const CompleteIndexVector& indices) { - addToArray(gradients_, gradientsBegin, numGradients_); - addToArray(hessians_, hessiansBegin, numHessians_); - } - - void DenseExampleWiseStatisticVector::addToSubset(gradient_const_iterator gradientsBegin, - gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, - hessian_const_iterator hessiansEnd, - const PartialIndexVector& indices) { - PartialIndexVector::const_iterator indexIterator = indices.cbegin(); - addToArray(gradients_, gradientsBegin, indexIterator, numGradients_); - - for (uint32 i = 0; i < numGradients_; i++) { - uint32 index = indexIterator[i]; - addToArray(&hessians_[triangularNumber(i)], &hessiansBegin[triangularNumber(index)], indexIterator, i + 1); - } - } - - void DenseExampleWiseStatisticVector::addToSubset(gradient_const_iterator gradientsBegin, - gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, - hessian_const_iterator hessiansEnd, - const CompleteIndexVector& indices, float64 weight) { - addToArray(gradients_, gradientsBegin, numGradients_, weight); - addToArray(hessians_, hessiansBegin, numHessians_, weight); - } - - void DenseExampleWiseStatisticVector::addToSubset(gradient_const_iterator gradientsBegin, - gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, - hessian_const_iterator hessiansEnd, - const PartialIndexVector& indices, float64 weight) { - PartialIndexVector::const_iterator indexIterator = indices.cbegin(); - addToArray(gradients_, gradientsBegin, indexIterator, numGradients_, weight); - - for (uint32 i = 0; i < numGradients_; i++) { - uint32 index = indexIterator[i]; - addToArray(&hessians_[triangularNumber(i)], &hessiansBegin[triangularNumber(index)], indexIterator, i + 1, - weight); - } - } - - void DenseExampleWiseStatisticVector::difference( - gradient_const_iterator firstGradientsBegin, gradient_const_iterator firstGradientsEnd, - hessian_const_iterator firstHessiansBegin, hessian_const_iterator firstHessiansEnd, - const CompleteIndexVector& firstIndices, gradient_const_iterator secondGradientsBegin, - gradient_const_iterator secondGradientsEnd, hessian_const_iterator secondHessiansBegin, - hessian_const_iterator secondHessiansEnd) { - setArrayToDifference(gradients_, firstGradientsBegin, secondGradientsBegin, numGradients_); - setArrayToDifference(hessians_, firstHessiansBegin, secondHessiansBegin, numHessians_); - } - - void DenseExampleWiseStatisticVector::difference( - gradient_const_iterator firstGradientsBegin, gradient_const_iterator firstGradientsEnd, - hessian_const_iterator firstHessiansBegin, hessian_const_iterator firstHessiansEnd, - const PartialIndexVector& firstIndices, gradient_const_iterator secondGradientsBegin, - gradient_const_iterator secondGradientsEnd, hessian_const_iterator secondHessiansBegin, - hessian_const_iterator secondHessiansEnd) { - PartialIndexVector::const_iterator indexIterator = firstIndices.cbegin(); - setArrayToDifference(gradients_, firstGradientsBegin, secondGradientsBegin, indexIterator, numGradients_); - - for (uint32 i = 0; i < numGradients_; i++) { - uint32 offset = triangularNumber(i); - uint32 index = indexIterator[i]; - setArrayToDifference(&hessians_[offset], &firstHessiansBegin[triangularNumber(index)], - &secondHessiansBegin[offset], indexIterator, i + 1); - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_dense.cpp b/cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_dense.cpp deleted file mode 100644 index 5667afdc..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_dense.cpp +++ /dev/null @@ -1,107 +0,0 @@ -#include "boosting/data/statistic_vector_label_wise_dense.hpp" - -#include "boosting/data/arrays.hpp" -#include "common/data/arrays.hpp" - -#include - -namespace boosting { - - DenseLabelWiseStatisticVector::DenseLabelWiseStatisticVector(uint32 numElements) - : DenseLabelWiseStatisticVector(numElements, false) {} - - DenseLabelWiseStatisticVector::DenseLabelWiseStatisticVector(uint32 numElements, bool init) - : numElements_(numElements), - statistics_((Tuple*) (init ? calloc(numElements, sizeof(Tuple)) - : malloc(numElements * sizeof(Tuple)))) {} - - DenseLabelWiseStatisticVector::DenseLabelWiseStatisticVector(const DenseLabelWiseStatisticVector& vector) - : DenseLabelWiseStatisticVector(vector.numElements_) { - copyArray(vector.statistics_, statistics_, numElements_); - } - - DenseLabelWiseStatisticVector::~DenseLabelWiseStatisticVector() { - free(statistics_); - } - - DenseLabelWiseStatisticVector::iterator DenseLabelWiseStatisticVector::begin() { - return statistics_; - } - - DenseLabelWiseStatisticVector::iterator DenseLabelWiseStatisticVector::end() { - return &statistics_[numElements_]; - } - - DenseLabelWiseStatisticVector::const_iterator DenseLabelWiseStatisticVector::cbegin() const { - return statistics_; - } - - DenseLabelWiseStatisticVector::const_iterator DenseLabelWiseStatisticVector::cend() const { - return &statistics_[numElements_]; - } - - uint32 DenseLabelWiseStatisticVector::getNumElements() const { - return numElements_; - } - - void DenseLabelWiseStatisticVector::clear() { - setArrayToZeros(statistics_, numElements_); - } - - void DenseLabelWiseStatisticVector::add(const DenseLabelWiseStatisticVector& vector) { - addToArray(statistics_, vector.statistics_, numElements_); - } - - void DenseLabelWiseStatisticVector::add(const DenseLabelWiseStatisticConstView& view, uint32 row) { - addToArray(statistics_, view.cbegin(row), numElements_); - } - - void DenseLabelWiseStatisticVector::add(const DenseLabelWiseStatisticConstView& view, uint32 row, float64 weight) { - addToArray(statistics_, view.cbegin(row), numElements_, weight); - } - - void DenseLabelWiseStatisticVector::remove(const DenseLabelWiseStatisticConstView& view, uint32 row) { - removeFromArray(statistics_, view.cbegin(row), numElements_); - } - - void DenseLabelWiseStatisticVector::remove(const DenseLabelWiseStatisticConstView& view, uint32 row, - float64 weight) { - removeFromArray(statistics_, view.cbegin(row), numElements_, weight); - } - - void DenseLabelWiseStatisticVector::addToSubset(const DenseLabelWiseStatisticConstView& view, uint32 row, - const CompleteIndexVector& indices) { - addToArray(statistics_, view.cbegin(row), numElements_); - } - - void DenseLabelWiseStatisticVector::addToSubset(const DenseLabelWiseStatisticConstView& view, uint32 row, - const PartialIndexVector& indices) { - PartialIndexVector::const_iterator indexIterator = indices.cbegin(); - addToArray(statistics_, view.cbegin(row), indexIterator, numElements_); - } - - void DenseLabelWiseStatisticVector::addToSubset(const DenseLabelWiseStatisticConstView& view, uint32 row, - const CompleteIndexVector& indices, float64 weight) { - addToArray(statistics_, view.cbegin(row), numElements_, weight); - } - - void DenseLabelWiseStatisticVector::addToSubset(const DenseLabelWiseStatisticConstView& view, uint32 row, - const PartialIndexVector& indices, float64 weight) { - PartialIndexVector::const_iterator indexIterator = indices.cbegin(); - addToArray(statistics_, view.cbegin(row), indexIterator, numElements_, weight); - } - - void DenseLabelWiseStatisticVector::difference(const DenseLabelWiseStatisticVector& first, - const CompleteIndexVector& firstIndices, - const DenseLabelWiseStatisticVector& second) { - setArrayToDifference(statistics_, first.cbegin(), second.cbegin(), numElements_); - } - - void DenseLabelWiseStatisticVector::difference(const DenseLabelWiseStatisticVector& first, - const PartialIndexVector& firstIndices, - const DenseLabelWiseStatisticVector& second) { - PartialIndexVector::const_iterator indexIterator = firstIndices.cbegin(); - setArrayToDifference(statistics_, first.cbegin(), second.cbegin(), indexIterator, numElements_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_sparse.cpp b/cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_sparse.cpp deleted file mode 100644 index 9a22cea1..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_sparse.cpp +++ /dev/null @@ -1,246 +0,0 @@ -#include "boosting/data/statistic_vector_label_wise_sparse.hpp" - -#include "boosting/data/arrays.hpp" -#include "common/data/arrays.hpp" -#include "statistic_vector_label_wise_sparse_common.hpp" - -#include - -namespace boosting { - - SparseLabelWiseStatisticVector::ConstIterator::ConstIterator(const Triple* iterator, float64 sumOfWeights) - : iterator_(iterator), sumOfWeights_(sumOfWeights) {} - - SparseLabelWiseStatisticVector::ConstIterator::value_type SparseLabelWiseStatisticVector::ConstIterator::operator[]( - uint32 index) const { - const Triple& triple = iterator_[index]; - float64 gradient = triple.first; - float64 hessian = triple.second + (sumOfWeights_ - triple.third); - return Tuple(gradient, hessian); - } - - SparseLabelWiseStatisticVector::ConstIterator::value_type SparseLabelWiseStatisticVector::ConstIterator::operator*() - const { - const Triple& triple = *iterator_; - float64 gradient = triple.first; - float64 hessian = triple.second + (sumOfWeights_ - triple.third); - return Tuple(gradient, hessian); - } - - SparseLabelWiseStatisticVector::ConstIterator& SparseLabelWiseStatisticVector::ConstIterator::operator++() { - ++iterator_; - return *this; - } - - SparseLabelWiseStatisticVector::ConstIterator& SparseLabelWiseStatisticVector::ConstIterator::operator++(int n) { - iterator_++; - return *this; - } - - SparseLabelWiseStatisticVector::ConstIterator& SparseLabelWiseStatisticVector::ConstIterator::operator--() { - --iterator_; - return *this; - } - - SparseLabelWiseStatisticVector::ConstIterator& SparseLabelWiseStatisticVector::ConstIterator::operator--(int n) { - iterator_--; - return *this; - } - - bool SparseLabelWiseStatisticVector::ConstIterator::operator!=(const ConstIterator& rhs) const { - return iterator_ != rhs.iterator_; - } - - bool SparseLabelWiseStatisticVector::ConstIterator::operator==(const ConstIterator& rhs) const { - return iterator_ == rhs.iterator_; - } - - SparseLabelWiseStatisticVector::ConstIterator::difference_type - SparseLabelWiseStatisticVector::ConstIterator::operator-(const ConstIterator& rhs) const { - return iterator_ - rhs.iterator_; - } - - SparseLabelWiseStatisticVector::SparseLabelWiseStatisticVector(uint32 numElements) - : SparseLabelWiseStatisticVector(numElements, false) {} - - SparseLabelWiseStatisticVector::SparseLabelWiseStatisticVector(uint32 numElements, bool init) - : numElements_(numElements), - statistics_((Triple*) (init ? calloc(numElements, sizeof(Triple)) - : malloc(numElements * sizeof(Triple)))), - sumOfWeights_(0) {} - - SparseLabelWiseStatisticVector::SparseLabelWiseStatisticVector(const SparseLabelWiseStatisticVector& vector) - : SparseLabelWiseStatisticVector(vector.numElements_) { - copyArray(vector.statistics_, statistics_, numElements_); - sumOfWeights_ = vector.sumOfWeights_; - } - - SparseLabelWiseStatisticVector::~SparseLabelWiseStatisticVector() { - free(statistics_); - } - - SparseLabelWiseStatisticVector::const_iterator SparseLabelWiseStatisticVector::cbegin() const { - return ConstIterator(statistics_, sumOfWeights_); - } - - SparseLabelWiseStatisticVector::const_iterator SparseLabelWiseStatisticVector::cend() const { - return ConstIterator(&statistics_[numElements_], sumOfWeights_); - } - - uint32 SparseLabelWiseStatisticVector::getNumElements() const { - return numElements_; - } - - void SparseLabelWiseStatisticVector::clear() { - sumOfWeights_ = 0; - setArrayToZeros(statistics_, numElements_); - } - - void SparseLabelWiseStatisticVector::add(const SparseLabelWiseStatisticVector& vector) { - sumOfWeights_ += vector.sumOfWeights_; - addToArray(statistics_, vector.statistics_, numElements_); - } - - void SparseLabelWiseStatisticVector::add(const SparseLabelWiseStatisticConstView& view, uint32 row) { - sumOfWeights_ += 1; - addToSparseLabelWiseStatisticVector(statistics_, view.cbegin(row), view.cend(row)); - } - - void SparseLabelWiseStatisticVector::add(const SparseLabelWiseStatisticConstView& view, uint32 row, - float64 weight) { - if (weight != 0) { - sumOfWeights_ += weight; - addToSparseLabelWiseStatisticVector(statistics_, view.cbegin(row), view.cend(row), weight); - } - } - - void SparseLabelWiseStatisticVector::remove(const SparseLabelWiseStatisticConstView& view, uint32 row) { - sumOfWeights_ -= 1; - removeFromSparseLabelWiseStatisticVector(statistics_, view.cbegin(row), view.cend(row)); - } - - void SparseLabelWiseStatisticVector::remove(const SparseLabelWiseStatisticConstView& view, uint32 row, - float64 weight) { - if (weight != 0) { - sumOfWeights_ -= weight; - removeFromSparseLabelWiseStatisticVector(statistics_, view.cbegin(row), view.cend(row), weight); - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseStatisticConstView& view, uint32 row, - const CompleteIndexVector& indices) { - sumOfWeights_ += 1; - addToSparseLabelWiseStatisticVector(statistics_, view.cbegin(row), view.cend(row)); - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseStatisticConstView& view, uint32 row, - const PartialIndexVector& indices) { - sumOfWeights_ += 1; - SparseLabelWiseStatisticConstView::const_row viewRow = view[row]; - PartialIndexVector::const_iterator indexIterator = indices.cbegin(); - uint32 numElements = indices.getNumElements(); - - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indexIterator[i]; - const IndexedValue>* entry = viewRow[index]; - - if (entry) { - const Tuple& tuple = entry->value; - Triple& triple = statistics_[i]; - triple.first += (tuple.first); - triple.second += (tuple.second); - triple.third += 1; - } - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseStatisticConstView& view, uint32 row, - const CompleteIndexVector& indices, float64 weight) { - if (weight != 0) { - sumOfWeights_ += weight; - addToSparseLabelWiseStatisticVector(statistics_, view.cbegin(row), view.cend(row), weight); - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseStatisticConstView& view, uint32 row, - const PartialIndexVector& indices, float64 weight) { - if (weight != 0) { - sumOfWeights_ += weight; - SparseLabelWiseStatisticConstView::const_row viewRow = view[row]; - PartialIndexVector::const_iterator indexIterator = indices.cbegin(); - uint32 numElements = indices.getNumElements(); - - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indexIterator[i]; - const IndexedValue>* entry = viewRow[index]; - - if (entry) { - const Tuple& tuple = entry->value; - Triple& triple = statistics_[i]; - triple.first += (tuple.first * weight); - triple.second += (tuple.second * weight); - triple.third += weight; - } - } - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseHistogramConstView& view, uint32 row, - const CompleteIndexVector& indices) { - SparseLabelWiseHistogramConstView::weight_const_iterator weightIterator = view.weights_cbegin(); - float64 binWeight = weightIterator[row]; - - if (binWeight != 0) { - sumOfWeights_ += binWeight; - addToArray(statistics_, view.cbegin(row), numElements_); - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseHistogramConstView& view, uint32 row, - const PartialIndexVector& indices) { - SparseLabelWiseHistogramConstView::weight_const_iterator weightIterator = view.weights_cbegin(); - float64 binWeight = weightIterator[row]; - - if (binWeight != 0) { - sumOfWeights_ += binWeight; - addToArray(statistics_, view.cbegin(row), indices.cbegin(), indices.getNumElements()); - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseHistogramConstView& view, uint32 row, - const CompleteIndexVector& indices, float64 weight) { - SparseLabelWiseHistogramConstView::weight_const_iterator weightIterator = view.weights_cbegin(); - float64 binWeight = weightIterator[row] * weight; - - if (binWeight != 0) { - sumOfWeights_ += binWeight; - addToArray(statistics_, view.cbegin(row), numElements_, weight); - } - } - - void SparseLabelWiseStatisticVector::addToSubset(const SparseLabelWiseHistogramConstView& view, uint32 row, - const PartialIndexVector& indices, float64 weight) { - SparseLabelWiseHistogramConstView::weight_const_iterator weightIterator = view.weights_cbegin(); - float64 binWeight = weightIterator[row] * weight; - - if (binWeight != 0) { - sumOfWeights_ += binWeight; - addToArray(statistics_, view.cbegin(row), indices.cbegin(), indices.getNumElements(), weight); - } - } - - void SparseLabelWiseStatisticVector::difference(const SparseLabelWiseStatisticVector& first, - const CompleteIndexVector& firstIndices, - const SparseLabelWiseStatisticVector& second) { - sumOfWeights_ = first.sumOfWeights_ - second.sumOfWeights_; - setArrayToDifference(statistics_, first.statistics_, second.statistics_, numElements_); - } - - void SparseLabelWiseStatisticVector::difference(const SparseLabelWiseStatisticVector& first, - const PartialIndexVector& firstIndices, - const SparseLabelWiseStatisticVector& second) { - sumOfWeights_ = first.sumOfWeights_ - second.sumOfWeights_; - setArrayToDifference(statistics_, first.statistics_, second.statistics_, firstIndices.cbegin(), numElements_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_sparse_common.hpp b/cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_sparse_common.hpp deleted file mode 100644 index d2c42ac1..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/statistic_vector_label_wise_sparse_common.hpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_view_label_wise_sparse.hpp" - -namespace boosting { - - /** - * Adds the statistics that are stored in a single row of a `SparseLabelWiseStatisticConstView` to a sparse vector. - * - * @param statistics A pointer to an array the statistics should be added to - * @param begin A `SparseLabelWiseStatisticsConstView::const_iterator` to the beginning of the statistics to - * be added - * @param begin A `SparseLabelWiseStatisticsConstView::const_iterator` to the end of the statistics to be - * added - */ - static inline void addToSparseLabelWiseStatisticVector(Triple* statistics, - SparseLabelWiseStatisticConstView::const_iterator begin, - SparseLabelWiseStatisticConstView::const_iterator end) { - uint32 numElements = end - begin; - - for (uint32 i = 0; i < numElements; i++) { - const IndexedValue>& entry = begin[i]; - const Tuple& tuple = entry.value; - Triple& triple = statistics[entry.index]; - triple.first += tuple.first; - triple.second += tuple.second; - triple.third += 1; - } - } - - /** - * Adds the statistics that are stored in a single row of a `SparseLabelWiseStatisticConstView` to a sparse vector. - * The statistics are multiplied by a specific weight. - * - * @param statistics A pointer to an array the statistics should be added to - * @param begin A `SparseLabelWiseStatisticsConstView::const_iterator` to the beginning of the statistics to - * be added - * @param begin A `SparseLabelWiseStatisticsConstView::const_iterator` to the end of the statistics to be - * added - * @param weight The weight, the statistics should be multiplied by - */ - static inline void addToSparseLabelWiseStatisticVector(Triple* statistics, - SparseLabelWiseStatisticConstView::const_iterator begin, - SparseLabelWiseStatisticConstView::const_iterator end, - float64 weight) { - uint32 numElements = end - begin; - - for (uint32 i = 0; i < numElements; i++) { - const IndexedValue>& entry = begin[i]; - const Tuple& tuple = entry.value; - Triple& triple = statistics[entry.index]; - triple.first += (tuple.first * weight); - triple.second += (tuple.second * weight); - triple.third += weight; - } - } - - /** - * Removes the statistics that are stored in a single row of a `SparseLabelWiseStatisticConstView` from a sparse - * vector. - * - * @param statistics A pointer to an array the statistics should be removed from - * @param begin A `SparseLabelWiseStatisticsConstView::const_iterator` to the beginning of the statistics to - * be removed - * @param begin A `SparseLabelWiseStatisticsConstView::const_iterator` to the end of the statistics to be - * removed - */ - static inline void removeFromSparseLabelWiseStatisticVector(Triple* statistics, - SparseLabelWiseStatisticConstView::const_iterator begin, - SparseLabelWiseStatisticConstView::const_iterator end) { - uint32 numElements = end - begin; - - for (uint32 i = 0; i < numElements; i++) { - const IndexedValue>& entry = begin[i]; - const Tuple& tuple = entry.value; - Triple& triple = statistics[entry.index]; - triple.first -= tuple.first; - triple.second -= tuple.second; - triple.third -= 1; - } - } - - /** - * Removes the statistics that are stored in a single row of a `SparseLabelWiseStatisticConstView` from a sparse - * vector. The statistics are multiplied by a specific weight. - * - * @param statistics A pointer to an array the statistics should be remove from - * @param begin A `SparseLabelWiseStatisticsConstView::const_iterator` to the beginning of the statistics to - * be removed - * @param begin A `SparseLabelWiseStatisticsConstView::const_iterator` to the end of the statistics to be - * removed - * @param weight The weight, the statistics should be multiplied by - */ - static inline void removeFromSparseLabelWiseStatisticVector(Triple* statistics, - SparseLabelWiseStatisticConstView::const_iterator begin, - SparseLabelWiseStatisticConstView::const_iterator end, - float64 weight) { - uint32 numElements = end - begin; - - for (uint32 i = 0; i < numElements; i++) { - const IndexedValue>& entry = begin[i]; - const Tuple& tuple = entry.value; - Triple& triple = statistics[entry.index]; - triple.first -= (tuple.first * weight); - triple.second -= (tuple.second * weight); - triple.third -= weight; - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/statistic_view_example_wise_dense.cpp b/cpp/subprojects/boosting/src/boosting/data/statistic_view_example_wise_dense.cpp deleted file mode 100644 index 42513329..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/statistic_view_example_wise_dense.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include "boosting/data/statistic_view_example_wise_dense.hpp" - -#include "boosting/data/arrays.hpp" -#include "common/data/arrays.hpp" - -namespace boosting { - - DenseExampleWiseStatisticConstView::DenseExampleWiseStatisticConstView(uint32 numRows, uint32 numGradients, - uint32 numHessians, float64* gradients, - float64* hessians) - : numRows_(numRows), numGradients_(numGradients), numHessians_(numHessians), gradients_(gradients), - hessians_(hessians) {} - - DenseExampleWiseStatisticConstView::gradient_const_iterator DenseExampleWiseStatisticConstView::gradients_cbegin( - uint32 row) const { - return &gradients_[row * numGradients_]; - } - - DenseExampleWiseStatisticConstView::gradient_const_iterator DenseExampleWiseStatisticConstView::gradients_cend( - uint32 row) const { - return &gradients_[(row + 1) * numGradients_]; - } - - DenseExampleWiseStatisticConstView::hessian_const_iterator DenseExampleWiseStatisticConstView::hessians_cbegin( - uint32 row) const { - return &hessians_[row * numHessians_]; - } - - DenseExampleWiseStatisticConstView::hessian_const_iterator DenseExampleWiseStatisticConstView::hessians_cend( - uint32 row) const { - return &hessians_[(row + 1) * numHessians_]; - } - - DenseExampleWiseStatisticConstView::hessian_diagonal_const_iterator - DenseExampleWiseStatisticConstView::hessians_diagonal_cbegin(uint32 row) const { - return DiagonalConstIterator(&hessians_[row * numHessians_], 0); - } - - DenseExampleWiseStatisticConstView::hessian_diagonal_const_iterator - DenseExampleWiseStatisticConstView::hessians_diagonal_cend(uint32 row) const { - return DiagonalConstIterator(&hessians_[row * numHessians_], numGradients_); - } - - uint32 DenseExampleWiseStatisticConstView::getNumRows() const { - return numRows_; - } - - uint32 DenseExampleWiseStatisticConstView::getNumCols() const { - return numGradients_; - } - - DenseExampleWiseStatisticView::DenseExampleWiseStatisticView(uint32 numRows, uint32 numGradients, - uint32 numHessians, float64* gradients, - float64* hessians) - : DenseExampleWiseStatisticConstView(numRows, numGradients, numHessians, gradients, hessians) {} - - DenseExampleWiseStatisticView::gradient_iterator DenseExampleWiseStatisticView::gradients_begin(uint32 row) { - return &gradients_[row * numGradients_]; - } - - DenseExampleWiseStatisticView::gradient_iterator DenseExampleWiseStatisticView::gradients_end(uint32 row) { - return &gradients_[(row + 1) * numGradients_]; - } - - DenseExampleWiseStatisticView::hessian_iterator DenseExampleWiseStatisticView::hessians_begin(uint32 row) { - return &hessians_[row * numHessians_]; - } - - DenseExampleWiseStatisticView::hessian_iterator DenseExampleWiseStatisticView::hessians_end(uint32 row) { - return &hessians_[(row + 1) * numHessians_]; - } - - void DenseExampleWiseStatisticView::clear() { - setArrayToZeros(gradients_, numRows_ * numGradients_); - setArrayToZeros(hessians_, numRows_ * numHessians_); - } - - void DenseExampleWiseStatisticView::addToRow(uint32 row, gradient_const_iterator gradientsBegin, - gradient_const_iterator gradientsEnd, - hessian_const_iterator hessiansBegin, - hessian_const_iterator hessiansEnd, float64 weight) { - addToArray(&gradients_[row * numGradients_], gradientsBegin, numGradients_, weight); - addToArray(&hessians_[row * numHessians_], hessiansBegin, numHessians_, weight); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/statistic_view_label_wise_dense.cpp b/cpp/subprojects/boosting/src/boosting/data/statistic_view_label_wise_dense.cpp deleted file mode 100644 index 6f393e62..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/statistic_view_label_wise_dense.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "boosting/data/statistic_view_label_wise_dense.hpp" - -#include "boosting/data/arrays.hpp" -#include "common/data/arrays.hpp" - -namespace boosting { - - DenseLabelWiseStatisticConstView::DenseLabelWiseStatisticConstView(uint32 numRows, uint32 numCols, - Tuple* statistics) - : numRows_(numRows), numCols_(numCols), statistics_(statistics) {} - - DenseLabelWiseStatisticConstView::const_iterator DenseLabelWiseStatisticConstView::cbegin(uint32 row) const { - return &statistics_[row * numCols_]; - } - - DenseLabelWiseStatisticConstView::const_iterator DenseLabelWiseStatisticConstView::cend(uint32 row) const { - return &statistics_[(row + 1) * numCols_]; - } - - uint32 DenseLabelWiseStatisticConstView::getNumRows() const { - return numRows_; - } - - uint32 DenseLabelWiseStatisticConstView::getNumCols() const { - return numCols_; - } - - DenseLabelWiseStatisticView::DenseLabelWiseStatisticView(uint32 numRows, uint32 numCols, Tuple* statistics) - : DenseLabelWiseStatisticConstView(numRows, numCols, statistics) {} - - DenseLabelWiseStatisticView::iterator DenseLabelWiseStatisticView::begin(uint32 row) { - return &statistics_[row * numCols_]; - } - - DenseLabelWiseStatisticView::iterator DenseLabelWiseStatisticView::end(uint32 row) { - return &statistics_[(row + 1) * numCols_]; - } - - void DenseLabelWiseStatisticView::clear() { - setArrayToZeros(statistics_, numRows_ * numCols_); - } - - void DenseLabelWiseStatisticView::addToRow(uint32 row, const_iterator begin, const_iterator end, float64 weight) { - uint32 offset = row * numCols_; - addToArray(&statistics_[offset], begin, numCols_, weight); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/data/statistic_view_label_wise_sparse.cpp b/cpp/subprojects/boosting/src/boosting/data/statistic_view_label_wise_sparse.cpp deleted file mode 100644 index b1281baf..00000000 --- a/cpp/subprojects/boosting/src/boosting/data/statistic_view_label_wise_sparse.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "boosting/data/statistic_view_label_wise_sparse.hpp" - -namespace boosting { - - SparseLabelWiseStatisticConstView::SparseLabelWiseStatisticConstView(uint32 numCols, - SparseSetMatrix>* statistics) - : numCols_(numCols), statistics_(statistics) {} - - SparseLabelWiseStatisticConstView::const_iterator SparseLabelWiseStatisticConstView::cbegin(uint32 row) const { - return statistics_->cbegin(row); - } - - SparseLabelWiseStatisticConstView::const_iterator SparseLabelWiseStatisticConstView::cend(uint32 row) const { - return statistics_->cend(row); - } - - SparseLabelWiseStatisticConstView::const_row SparseLabelWiseStatisticConstView::operator[](uint32 row) const { - return ((const SparseSetMatrix>&) *statistics_)[row]; - } - - uint32 SparseLabelWiseStatisticConstView::getNumRows() const { - return statistics_->getNumRows(); - } - - uint32 SparseLabelWiseStatisticConstView::getNumCols() const { - return numCols_; - } - - SparseLabelWiseStatisticView::SparseLabelWiseStatisticView(uint32 numCols, - SparseSetMatrix>* statistics) - : SparseLabelWiseStatisticConstView(numCols, statistics) {} - - SparseLabelWiseStatisticView::row SparseLabelWiseStatisticView::operator[](uint32 row) { - return (*statistics_)[row]; - } - - void SparseLabelWiseStatisticView::clear() { - statistics_->clear(); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/iterator/diagonal_iterator.cpp b/cpp/subprojects/boosting/src/boosting/iterator/diagonal_iterator.cpp deleted file mode 100644 index 6a0560c7..00000000 --- a/cpp/subprojects/boosting/src/boosting/iterator/diagonal_iterator.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "boosting/iterator/diagonal_iterator.hpp" - -#include "boosting/math/math.hpp" - -namespace boosting { - - template - DiagonalConstIterator::DiagonalConstIterator(const T* ptr, uint32 index) : ptr_(ptr), index_(index) {} - - template - typename DiagonalConstIterator::reference DiagonalConstIterator::operator[](uint32 index) const { - return ptr_[triangularNumber(index + 1) - 1]; - } - - template - typename DiagonalConstIterator::reference DiagonalConstIterator::operator*() const { - return ptr_[triangularNumber(index_ + 1) - 1]; - } - - template - DiagonalConstIterator& DiagonalConstIterator::operator++() { - ++index_; - return *this; - } - - template - DiagonalConstIterator& DiagonalConstIterator::operator++(int n) { - index_++; - return *this; - } - - template - DiagonalConstIterator& DiagonalConstIterator::operator--() { - --index_; - return *this; - } - - template - DiagonalConstIterator& DiagonalConstIterator::operator--(int n) { - index_--; - return *this; - } - - template - bool DiagonalConstIterator::operator!=(const DiagonalConstIterator& rhs) const { - return index_ != rhs.index_; - } - - template - bool DiagonalConstIterator::operator==(const DiagonalConstIterator& rhs) const { - return index_ == rhs.index_; - } - - template - typename DiagonalConstIterator::difference_type DiagonalConstIterator::operator-( - const DiagonalConstIterator& rhs) const { - return (difference_type) index_ - (difference_type) rhs.index_; - } - - template class DiagonalConstIterator; - template class DiagonalConstIterator; - template class DiagonalConstIterator; - template class DiagonalConstIterator; - -} diff --git a/cpp/subprojects/boosting/src/boosting/learner.cpp b/cpp/subprojects/boosting/src/boosting/learner.cpp deleted file mode 100644 index ef8ee222..00000000 --- a/cpp/subprojects/boosting/src/boosting/learner.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "boosting/learner.hpp" - -#include "boosting/model/rule_list_builder.hpp" -#include "boosting/rule_evaluation/rule_compare_function.hpp" - -namespace boosting { - - AbstractBoostingRuleLearner::Config::Config() - : AbstractRuleLearner::Config(BOOSTED_RULE_COMPARE_FUNCTION), - headConfigPtr_(std::make_unique(labelBinningConfigPtr_, parallelStatisticUpdateConfigPtr_, - l1RegularizationConfigPtr_, l2RegularizationConfigPtr_)), - statisticsConfigPtr_(std::make_unique(lossConfigPtr_)), - lossConfigPtr_(std::make_unique(headConfigPtr_)), - l1RegularizationConfigPtr_(std::make_unique()), - l2RegularizationConfigPtr_(std::make_unique()), - labelBinningConfigPtr_( - std::make_unique(l1RegularizationConfigPtr_, l2RegularizationConfigPtr_)) {} - - std::unique_ptr& AbstractBoostingRuleLearner::Config::getHeadConfigPtr() { - return headConfigPtr_; - } - - std::unique_ptr& AbstractBoostingRuleLearner::Config::getStatisticsConfigPtr() { - return statisticsConfigPtr_; - } - - std::unique_ptr& AbstractBoostingRuleLearner::Config::getL1RegularizationConfigPtr() { - return l1RegularizationConfigPtr_; - } - - std::unique_ptr& AbstractBoostingRuleLearner::Config::getL2RegularizationConfigPtr() { - return l2RegularizationConfigPtr_; - } - - std::unique_ptr& AbstractBoostingRuleLearner::Config::getLossConfigPtr() { - return lossConfigPtr_; - } - - std::unique_ptr& AbstractBoostingRuleLearner::Config::getLabelBinningConfigPtr() { - return labelBinningConfigPtr_; - } - - AbstractBoostingRuleLearner::AbstractBoostingRuleLearner(IBoostingRuleLearner::IConfig& config, - Blas::DdotFunction ddotFunction, - Blas::DspmvFunction dspmvFunction, - Lapack::DsysvFunction dsysvFunction) - : AbstractRuleLearner(config), config_(config), blas_(Blas(ddotFunction, dspmvFunction)), - lapack_(Lapack(dsysvFunction)) {} - - std::unique_ptr AbstractBoostingRuleLearner::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix) const { - return config_.getStatisticsConfigPtr()->createStatisticsProviderFactory(featureMatrix, labelMatrix, blas_, - lapack_); - } - - std::unique_ptr AbstractBoostingRuleLearner::createModelBuilderFactory() const { - return std::make_unique(); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/learner_boomer.cpp b/cpp/subprojects/boosting/src/boosting/learner_boomer.cpp deleted file mode 100644 index 651d6f5a..00000000 --- a/cpp/subprojects/boosting/src/boosting/learner_boomer.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "boosting/learner_boomer.hpp" - -namespace boosting { - - Boomer::Config::Config() { - this->useSequentialRuleModelAssemblage(); - this->useGreedyTopDownRuleInduction(); - this->useDefaultRule(); - this->useNoLabelSampling(); - this->useNoInstanceSampling(); - this->useFeatureSamplingWithoutReplacement(); - this->useParallelPrediction(); - this->useAutomaticDefaultRule(); - this->useAutomaticPartitionSampling(); - this->useAutomaticFeatureBinning(); - this->useSizeStoppingCriterion(); - this->useNoTimeStoppingCriterion(); - this->useNoRulePruning(); - this->useNoGlobalPruning(); - this->useNoSequentialPostOptimization(); - this->useConstantShrinkagePostProcessor(); - this->useAutomaticParallelRuleRefinement(); - this->useAutomaticParallelStatisticUpdate(); - this->useAutomaticHeads(); - this->useAutomaticStatistics(); - this->useLabelWiseLogisticLoss(); - this->useNoL1Regularization(); - this->useL2Regularization(); - this->useAutomaticLabelBinning(); - this->useAutomaticBinaryPredictor(); - this->useLabelWiseScorePredictor(); - this->useAutomaticProbabilityPredictor(); - } - - ISizeStoppingCriterionConfig& Boomer::Config::useSizeStoppingCriterion() { - ISizeStoppingCriterionConfig& ref = ISizeStoppingCriterionMixin::useSizeStoppingCriterion(); - ref.setMaxRules(1000); - return ref; - } - - Boomer::Boomer(std::unique_ptr configPtr, Blas::DdotFunction ddotFunction, - Blas::DspmvFunction dspmvFunction, Lapack::DsysvFunction dsysvFunction) - : AbstractBoostingRuleLearner(*configPtr, ddotFunction, dspmvFunction, dsysvFunction), - configPtr_(std::move(configPtr)) {} - - std::unique_ptr createBoomerConfig() { - return std::make_unique(); - } - - std::unique_ptr createBoomer(std::unique_ptr configPtr, Blas::DdotFunction ddotFunction, - Blas::DspmvFunction dspmvFunction, Lapack::DsysvFunction dsysvFunction) { - return std::make_unique(std::move(configPtr), ddotFunction, dspmvFunction, dsysvFunction); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_logistic.cpp b/cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_logistic.cpp deleted file mode 100644 index 6436944d..00000000 --- a/cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_logistic.cpp +++ /dev/null @@ -1,374 +0,0 @@ -#include "boosting/losses/loss_example_wise_logistic.hpp" - -#include "boosting/prediction/probability_function_chain_rule.hpp" -#include "boosting/prediction/probability_function_logistic.hpp" -#include "common/iterator/binary_forward_iterator.hpp" -#include "common/math/math.hpp" - -namespace boosting { - - static inline void updateGradientAndHessian(float64 invertedExpectedScore, float64 x, float64 max, float64 sumExp, - float64& gradient, float64& hessian) { - // Calculate the gradient that corresponds to the current label. The gradient calculates as - // `-expectedScore_c * exp(x_c) / (1 + exp(x_1) + exp(x_2) + ...)`, which can be rewritten as - // `-expectedScore_c * (exp(x_c - max) / sumExp)` - float64 xExp = std::exp(x - max); - float64 tmp = divideOrZero(xExp, sumExp); - gradient = invertedExpectedScore * tmp; - - // Calculate the Hessian on the diagonal of the Hessian matrix that corresponds to the current label. Such - // Hessian calculates as `exp(x_c) * (1 + exp(x_1) + exp(x_2) + ...) / (1 + exp(x_1) + exp(x_2) + ...)^2`, - // or as `(exp(x_c - max) / sumExp) * (1 - exp(x_c - max) / sumExp)` - hessian = tmp * (1 - tmp); - } - - template - static inline void updateLabelWiseStatisticsInternally(VectorConstView::const_iterator scoreIterator, - LabelIterator labelIterator, - DenseLabelWiseStatisticView::iterator statisticIterator, - uint32 numLabels) { - // This implementation uses the so-called "exp-normalize-trick" to increase numerical stability (see, e.g., - // https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/). It is based on rewriting a fraction - // of the form `exp(x_1) / (exp(x_1) + exp(x_2) + ...)` as - // `exp(x_1 - max) / (exp(x_1 - max) + exp(x_2 - max) + ...)`, where `max = max(x_1, x_2, ...)`. To be able to - // exploit this equivalence for the calculation of gradients and Hessians, they are calculated as products of - // fractions of the above form. - LabelIterator labelIterator2 = labelIterator; - - // For each label `c`, calculate `x = -expectedScore_c * predictedScore_c` and find the largest and second - // largest values (that must be greater than 0, because `exp(1) = 0`) among all of them... - float64 max = 0; // The largest value - - for (uint32 c = 0; c < numLabels; c++) { - float64 predictedScore = scoreIterator[c]; - bool trueLabel = *labelIterator; - float64 x = trueLabel ? -predictedScore : predictedScore; - statisticIterator[c].first = x; // Temporarily store `x` in the array of statistics - - if (x > max) { - max = x; - } - - labelIterator++; - } - - // Calculate `sumExp = exp(0 - max) + exp(x_1 - max) + exp(x_2 - max) + ...` - float64 sumExp = std::exp(0.0 - max); - - for (uint32 c = 0; c < numLabels; c++) { - float64 x = statisticIterator[c].first; - sumExp += std::exp(x - max); - } - - // Calculate the gradients and Hessians... - for (uint32 c = 0; c < numLabels; c++) { - float64 predictedScore = scoreIterator[c]; - bool trueLabel = *labelIterator2; - float64 invertedExpectedScore = trueLabel ? -1 : 1; - float64 x = predictedScore * invertedExpectedScore; - Tuple& tuple = statisticIterator[c]; - updateGradientAndHessian(invertedExpectedScore, x, max, sumExp, tuple.first, tuple.second); - labelIterator2++; - } - } - - template - static inline void updateExampleWiseStatisticsInternally( - VectorConstView::const_iterator scoreIterator, LabelIterator labelIterator, - DenseExampleWiseStatisticView::gradient_iterator gradientIterator, - DenseExampleWiseStatisticView::hessian_iterator hessianIterator, uint32 numLabels) { - // This implementation uses the so-called "exp-normalize-trick" to increase numerical stability (see, e.g., - // https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/). It is based on rewriting a fraction - // of the form `exp(x_1) / (exp(x_1) + exp(x_2) + ...)` as - // `exp(x_1 - max) / (exp(x_1 - max) + exp(x_2 - max) + ...)`, where `max = max(x_1, x_2, ...)`. To be able to - // exploit this equivalence for the calculation of gradients and Hessians, they are calculated as products of - // fractions of the above form. - LabelIterator labelIterator2 = labelIterator; - LabelIterator labelIterator3 = labelIterator; - - // For each label `c`, calculate `x = -expectedScore_c * predictedScore_c` and find the largest and second - // largest values (that must be greater than 0, because `exp(1) = 0`) among all of them... - float64 max = 0; // The largest value - float64 max2 = 0; // The second largest value - - for (uint32 c = 0; c < numLabels; c++) { - float64 predictedScore = scoreIterator[c]; - bool trueLabel = *labelIterator; - float64 x = trueLabel ? -predictedScore : predictedScore; - gradientIterator[c] = x; // Temporarily store `x` in the array of gradients - - if (x > max) { - max2 = max; - max = x; - } else if (x > max2) { - max2 = x; - } - - labelIterator++; - } - - // In the following, the largest value the exponential function may be applied to is `max + max2`, which happens - // when Hessians that belong to the upper triangle of the Hessian matrix are calculated... - max2 += max; - - // Calculate `sumExp = exp(0 - max) + exp(x_1 - max) + exp(x_2 - max) + ...` - float64 sumExp = std::exp(0.0 - max); - float64 zeroExp = std::exp(0.0 - max2); - float64 sumExp2 = zeroExp; - - for (uint32 c = 0; c < numLabels; c++) { - float64 x = gradientIterator[c]; - sumExp += std::exp(x - max); - sumExp2 += std::exp(x - max2); - } - - // Calculate `zeroExp / sumExp2` (it is needed multiple times for calculating Hessians that belong to the upper - // triangle of the Hessian matrix)... - zeroExp = divideOrZero(zeroExp, sumExp2); - - // Calculate the gradients and Hessians... - for (uint32 c = 0; c < numLabels; c++) { - float64 predictedScore = scoreIterator[c]; - bool trueLabel = *labelIterator2; - float64 invertedExpectedScore = trueLabel ? -1 : 1; - float64 x = predictedScore * invertedExpectedScore; - - // Calculate the Hessians that belong to the part of the Hessian matrix' upper triangle that corresponds to - // the current label. Such Hessian calculates as - // `-expectedScore_c * expectedScore_r * exp(x_c + x_r) / (1 + exp(x_1) + exp(x_2) + ...)^2`, or as - // `-expectedScore_c * expectedScore_r * (exp(x_c + x_r - max) / sumExp) * (exp(0 - max) / sumExp)` - LabelIterator labelIterator4 = labelIterator3; - - for (uint32 r = 0; r < c; r++) { - float64 predictedScore2 = scoreIterator[r]; - bool trueLabel2 = *labelIterator4; - float64 expectedScore2 = trueLabel2 ? 1 : -1; - float64 x2 = predictedScore2 * -expectedScore2; - *hessianIterator = invertedExpectedScore * expectedScore2 - * divideOrZero(std::exp(x + x2 - max2), sumExp2) * zeroExp; - hessianIterator++; - labelIterator4++; - } - - updateGradientAndHessian(invertedExpectedScore, x, max, sumExp, gradientIterator[c], *hessianIterator); - hessianIterator++; - labelIterator2++; - } - } - - template - static inline float64 evaluateInternally(VectorConstView::const_iterator scoreIterator, - LabelIterator labelIterator, uint32 numLabels) { - // The example-wise logistic loss calculates as - // `log(1 + exp(-expectedScore_1 * predictedScore_1) + ... + exp(-expectedScore_2 * predictedScore_2) + ...)`. - // In the following, we exploit the identity - // `log(exp(x_1) + exp(x_2) + ...) = max + log(exp(x_1 - max) + exp(x_2 - max) + ...)`, where - // `max = max(x_1, x_2, ...)`, to increase numerical stability (see, e.g., section "Log-sum-exp for computing - // the log-distribution" in https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/). - LabelIterator labelIterator2 = labelIterator; - float64 max = 0; - - // For each label `i`, calculate `x = -expectedScore_i * predictedScore_i` and find the largest value (that must - // be greater than 0, because `exp(1) = 0`) among all of them... - for (uint32 i = 0; i < numLabels; i++) { - bool trueLabel = *labelIterator; - float64 predictedScore = scoreIterator[i]; - float64 x = trueLabel ? -predictedScore : predictedScore; - - if (x > max) { - max = x; - } - - labelIterator++; - } - - // Calculate the example-wise loss as `max + log(exp(0 - max) + exp(x_1 - max) + ...)`... - float64 sumExp = std::exp(0 - max); - - for (uint32 i = 0; i < numLabels; i++) { - bool trueLabel = *labelIterator2; - float64 predictedScore = scoreIterator[i]; - float64 x = trueLabel ? -predictedScore : predictedScore; - sumExp += std::exp(x - max); - labelIterator2++; - } - - return max + std::log(sumExp); - } - - /** - * An implementation of the type `IExampleWiseLoss` that implements a multi-label variant of the logistic loss that - * is applied example-wise. - */ - class ExampleWiseLogisticLoss final : public IExampleWiseLoss { - public: - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cbegin(exampleIndex), - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cbegin(exampleIndex), - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), labelIterator, - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), labelIterator, - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - void updateExampleWiseStatistics(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - DenseExampleWiseStatisticView& statisticView) const override { - updateExampleWiseStatisticsInternally( - scoreMatrix.values_cbegin(exampleIndex), labelMatrix.values_cbegin(exampleIndex), - statisticView.gradients_begin(exampleIndex), statisticView.hessians_begin(exampleIndex), - labelMatrix.getNumCols()); - } - - void updateExampleWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - DenseExampleWiseStatisticView& statisticView) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - updateExampleWiseStatisticsInternally( - scoreMatrix.values_cbegin(exampleIndex), labelIterator, statisticView.gradients_begin(exampleIndex), - statisticView.hessians_begin(exampleIndex), labelMatrix.getNumCols()); - } - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const override { - return evaluateInternally(scoreMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cbegin(exampleIndex), labelMatrix.getNumCols()); - } - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - return evaluateInternally(scoreMatrix.values_cbegin(exampleIndex), labelIterator, - labelMatrix.getNumCols()); - } - - /** - * @see `IDistanceMeasure::measureDistance` - */ - float64 measureDistance(uint32 labelVectorIndex, const LabelVector& labelVector, - VectorView::const_iterator scoresBegin, - VectorView::const_iterator scoresEnd) const override { - // The example-wise logistic loss calculates as - // `log(1 + exp(-expectedScore_1 * predictedScore_1) + ... + exp(-expectedScore_2 * predictedScore_2) - // + ...)`. In the following, we exploit the identity `log(exp(x_1) + exp(x_2) + ...) = - // max + log(exp(x_1 - max) + exp(x_2 - max) + ...)`, where `max = max(x_1, x_2, ...)`, to increase - // numerical stability (see, e.g., section "Log-sum-exp for computing the log-distribution" in - // https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/). - uint32 numLabels = scoresEnd - scoresBegin; - auto labelIterator = make_binary_forward_iterator(labelVector.cbegin(), labelVector.cend()); - float64 max = 0; - - // For each label `i`, calculate `x = -expectedScore_i * predictedScore_i` and find the largest value - // (that must be greater than 0, because `exp(1) = 0`) among all of them... - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoresBegin[i]; - bool trueLabel = *labelIterator; - float64 x = trueLabel ? -predictedScore : predictedScore; - - if (x > max) { - max = x; - } - - labelIterator++; - } - - // Calculate the example-wise loss as `max + log(exp(0 - max) + exp(x_1 - max) + ...)`... - float64 sumExp = std::exp(0 - max); - labelIterator = make_binary_forward_iterator(labelVector.cbegin(), labelVector.cend()); - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoresBegin[i]; - bool trueLabel = *labelIterator; - float64 x = trueLabel ? -predictedScore : predictedScore; - sumExp += std::exp(x - max); - labelIterator++; - } - - return max + std::log(sumExp); - } - }; - - /** - * Allows to create instances of the type `IExampleWiseLoss` that implement a multi-label variant of the logistic - * loss that is applied example-wise. - */ - class ExampleWiseLogisticLossFactory final : public IExampleWiseLossFactory { - public: - - std::unique_ptr createExampleWiseLoss() const override { - return std::make_unique(); - } - }; - - ExampleWiseLogisticLossConfig::ExampleWiseLogisticLossConfig(const std::unique_ptr& headConfigPtr) - : headConfigPtr_(headConfigPtr) {} - - std::unique_ptr ExampleWiseLogisticLossConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const { - return headConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, *this, blas, lapack); - } - - std::unique_ptr - ExampleWiseLogisticLossConfig::createMarginalProbabilityFunctionFactory() const { - return std::make_unique(); - } - - std::unique_ptr - ExampleWiseLogisticLossConfig::createJointProbabilityFunctionFactory() const { - return std::make_unique(this->createMarginalProbabilityFunctionFactory()); - } - - float64 ExampleWiseLogisticLossConfig::getDefaultPrediction() const { - return 0; - } - - std::unique_ptr ExampleWiseLogisticLossConfig::createExampleWiseLossFactory() const { - return std::make_unique(); - } - -} \ No newline at end of file diff --git a/cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_squared_error.cpp b/cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_squared_error.cpp deleted file mode 100644 index c627d585..00000000 --- a/cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_squared_error.cpp +++ /dev/null @@ -1,276 +0,0 @@ -#include "boosting/losses/loss_example_wise_squared_error.hpp" - -#include "common/iterator/binary_forward_iterator.hpp" -#include "common/math/math.hpp" - -namespace boosting { - - template - static inline void updateLabelWiseStatisticsInternally(VectorConstView::const_iterator scoreIterator, - LabelIterator labelIterator, - DenseLabelWiseStatisticView::iterator statisticIterator, - uint32 numLabels) { - LabelIterator labelIterator2 = labelIterator; - - // For each label `i`, calculate `x_i = predictedScore_i^2 + (-2 * expectedScore_i * predictedScore_i) + 1` and - // sum up those values. The sum is used as a denominator when calculating the gradients and Hessians - // afterwards... - float64 denominator = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator; - float64 expectedScore = trueLabel ? 1 : -1; - float64 x = (predictedScore * predictedScore) + (-2 * expectedScore * predictedScore) + 1; - statisticIterator[i].first = x; // Temporarily store `x` in the array of gradients - denominator += x; - labelIterator++; - } - - // The denominator that is used for the calculation of gradients is `sqrt(x_1 + x_2 + ...)`... - float64 denominatorGradient = std::sqrt(denominator); - - // The denominator that is used for the calculation of Hessians is `(x_1 + x_2 + ...)^1.5`... - float64 denominatorHessian = std::pow(denominator, 1.5); - - // Calculate the gradients and Hessians... - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator2; - float64 expectedScore = trueLabel ? 1 : -1; - Tuple& tuple = statisticIterator[i]; - float64 x = tuple.first; - - // Calculate the gradient as `(predictedScore_i - expectedScore_i) / sqrt(x_1 + x_2 + ...)`... - tuple.first = divideOrZero(predictedScore - expectedScore, denominatorGradient); - - // Calculate the Hessian on the diagonal of the Hessian matrix as - // `(x_1 + ... + x_i-1 + x_i+1 + ...) / (x_1 + x_2 + ...)^1.5`... - tuple.second = divideOrZero(denominator - x, denominatorHessian); - labelIterator2++; - } - } - - template - static inline void updateExampleWiseStatisticsInternally( - VectorConstView::const_iterator scoreIterator, LabelIterator labelIterator, - DenseExampleWiseStatisticView::gradient_iterator gradientIterator, - DenseExampleWiseStatisticView::hessian_iterator hessianIterator, uint32 numLabels) { - LabelIterator labelIterator2 = labelIterator; - LabelIterator labelIterator3 = labelIterator; - - // For each label `i`, calculate `x_i = predictedScore_i^2 + (-2 * expectedScore_i * predictedScore_i) + 1` and - // sum up those values. The sum is used as a denominator when calculating the gradients and Hessians - // afterwards... - float64 denominator = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator; - float64 expectedScore = trueLabel ? 1 : -1; - float64 x = (predictedScore * predictedScore) + (-2 * expectedScore * predictedScore) + 1; - gradientIterator[i] = x; // Temporarily store `x` in the array of gradients - denominator += x; - labelIterator++; - } - - // The denominator that is used for the calculation of gradients is `sqrt(x_1 + x_2 + ...)`... - float64 denominatorGradient = std::sqrt(denominator); - - // The denominator that is used for the calculation of Hessians is `(x_1 + x_2 + ...)^1.5`... - float64 denominatorHessian = std::pow(denominator, 1.5); - - // Calculate the gradients and Hessians... - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator2; - float64 expectedScore = trueLabel ? 1 : -1; - float64 x = gradientIterator[i]; - - // Calculate the Hessians that belong to the part of the Hessian matrix' upper triangle that corresponds to - // the current label. Such a hessian calculates as - // `-(predictedScore_i - expectedScore_i) * (predictedScore_j - expectedScore_j) / (x_1 + x_2 + ...)^1.5` - LabelIterator labelIterator4 = labelIterator3; - - for (uint32 j = 0; j < i; j++) { - float64 predictedScore2 = scoreIterator[j]; - bool trueLabel2 = *labelIterator4; - float64 expectedScore2 = trueLabel2 ? 1 : -1; - *hessianIterator = divideOrZero( - -(predictedScore - expectedScore) * (predictedScore2 - expectedScore2), denominatorHessian); - hessianIterator++; - labelIterator4++; - } - - // Calculate the gradient as `(predictedScore_i - expectedScore_i) / sqrt(x_1 + x_2 + ...)`... - gradientIterator[i] = divideOrZero(predictedScore - expectedScore, denominatorGradient); - - // Calculate the Hessian on the diagonal of the Hessian matrix as - // `(x_1 + ... + x_i-1 + x_i+1 + ...) / (x_1 + x_2 + ...)^1.5`... - *hessianIterator = divideOrZero(denominator - x, denominatorHessian); - hessianIterator++; - labelIterator2++; - } - } - - template - static inline float64 evaluateInternally(VectorConstView::const_iterator scoreIterator, - LabelIterator labelIterator, uint32 numLabels) { - // The example-wise squared error loss calculates as `sqrt((expectedScore_1 - predictedScore_1)^2 + ...)`. - float64 sumOfSquares = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator; - float64 expectedScore = trueLabel ? 1 : -1; - float64 difference = (expectedScore - predictedScore); - sumOfSquares += (difference * difference); - labelIterator++; - } - - return std::sqrt(sumOfSquares); - } - - /** - * An implementation of the type `IExampleWiseLoss` that implements a multi-label variant of the squared error loss - * that is applied example-wise. - */ - class ExampleWiseSquaredErrorLoss final : public IExampleWiseLoss { - public: - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cbegin(exampleIndex), - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cbegin(exampleIndex), - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), labelIterator, - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), labelIterator, - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - void updateExampleWiseStatistics(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - DenseExampleWiseStatisticView& statisticView) const override { - updateExampleWiseStatisticsInternally( - scoreMatrix.values_cbegin(exampleIndex), labelMatrix.values_cbegin(exampleIndex), - statisticView.gradients_begin(exampleIndex), statisticView.hessians_begin(exampleIndex), - labelMatrix.getNumCols()); - } - - void updateExampleWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - DenseExampleWiseStatisticView& statisticView) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - updateExampleWiseStatisticsInternally( - scoreMatrix.values_cbegin(exampleIndex), labelIterator, statisticView.gradients_begin(exampleIndex), - statisticView.hessians_begin(exampleIndex), labelMatrix.getNumCols()); - } - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const override { - return evaluateInternally(scoreMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cbegin(exampleIndex), labelMatrix.getNumCols()); - } - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - return evaluateInternally(scoreMatrix.values_cbegin(exampleIndex), labelIterator, - labelMatrix.getNumCols()); - } - - /** - * @see `IDistanceMeasure::measureDistance` - */ - float64 measureDistance(uint32 labelVectorIndex, const LabelVector& labelVector, - VectorView::const_iterator scoresBegin, - VectorView::const_iterator scoresEnd) const override { - uint32 numLabels = scoresEnd - scoresBegin; - auto labelIterator = make_binary_forward_iterator(labelVector.cbegin(), labelVector.cend()); - return evaluateInternally(scoresBegin, labelIterator, numLabels); - } - }; - - /** - * Allows to create instances of the type `IExampleWiseLoss` that implement a multi-label variant of the squared - * error loss that is applied example-wise. - */ - class ExampleWiseSquaredErrorLossFactory final : public IExampleWiseLossFactory { - public: - - std::unique_ptr createExampleWiseLoss() const override { - return std::make_unique(); - } - }; - - ExampleWiseSquaredErrorLossConfig::ExampleWiseSquaredErrorLossConfig( - const std::unique_ptr& headConfigPtr) - : headConfigPtr_(headConfigPtr) {} - - std::unique_ptr ExampleWiseSquaredErrorLossConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const { - return headConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, *this, blas, lapack); - } - - std::unique_ptr - ExampleWiseSquaredErrorLossConfig::createMarginalProbabilityFunctionFactory() const { - return nullptr; - } - - std::unique_ptr - ExampleWiseSquaredErrorLossConfig::createJointProbabilityFunctionFactory() const { - return nullptr; - } - - float64 ExampleWiseSquaredErrorLossConfig::getDefaultPrediction() const { - return 0.0; - } - - std::unique_ptr ExampleWiseSquaredErrorLossConfig::createExampleWiseLossFactory() const { - return std::make_unique(); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_squared_hinge.cpp b/cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_squared_hinge.cpp deleted file mode 100644 index 503395e0..00000000 --- a/cpp/subprojects/boosting/src/boosting/losses/loss_example_wise_squared_hinge.cpp +++ /dev/null @@ -1,369 +0,0 @@ -#include "boosting/losses/loss_example_wise_squared_hinge.hpp" - -#include "common/iterator/binary_forward_iterator.hpp" -#include "common/math/math.hpp" - -namespace boosting { - - template - static inline void updateLabelWiseStatisticsInternally(VectorConstView::const_iterator scoreIterator, - LabelIterator labelIterator, - DenseLabelWiseStatisticView::iterator statisticIterator, - uint32 numLabels) { - LabelIterator labelIterator2 = labelIterator; - - // For each label `i`, calculate `x_i = predictedScore_i^2 - 2 * predictedScore_i + 1` if trueLabel_i = 1 and - // `predictedScore_i < 1` or `x_i = predictedScore^2` if `trueLabel_i = 0` and `predictedScore_i > 0` - // or `x_i = 0` otherwise. The of those values is used as a denominator when calculating the gradients and - // Hessians afterwards... - float64 denominator = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator; - float64 x; - - if (trueLabel) { - if (predictedScore < 1) { - x = (predictedScore * predictedScore) - (2 * predictedScore) + 1; - } else { - x = 0; - } - } else { - if (predictedScore > 0) { - x = (predictedScore * predictedScore); - } else { - x = 0; - } - } - - statisticIterator[i].first = x; // Temporarily store `x` in the array of gradients - denominator += x; - labelIterator++; - } - - // The denominator that is used for the calculation of gradients is `sqrt(x_1 + x_2 + ...)`... - float64 denominatorGradient = std::sqrt(denominator); - - // The denominator that is used for the calculation of Hessians is `(x_1 + x_2 + ...)^1.5`... - float64 denominatorHessian = std::pow(denominator, 1.5); - - // Calculate the gradients and Hessians... - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator2; - Tuple& tuple = statisticIterator[i]; - float64 gradient; - float64 hessian; - - if (trueLabel) { - if (predictedScore < 1) { - gradient = divideOrZero(predictedScore - 1, denominatorGradient); - hessian = divideOrZero(denominator - tuple.first, denominatorHessian); - } else { - gradient = 0; - hessian = 1; - } - } else { - if (predictedScore > 0) { - gradient = divideOrZero(predictedScore, denominatorGradient); - hessian = divideOrZero(denominator - tuple.first, denominatorHessian); - } else { - gradient = 0; - hessian = 1; - } - } - - tuple.first = gradient; - tuple.second = hessian; - labelIterator2++; - } - } - - template - static inline void updateExampleWiseStatisticsInternally( - VectorConstView::const_iterator scoreIterator, LabelIterator labelIterator, - DenseExampleWiseStatisticView::gradient_iterator gradientIterator, - DenseExampleWiseStatisticView::hessian_iterator hessianIterator, uint32 numLabels) { - LabelIterator labelIterator2 = labelIterator; - LabelIterator labelIterator3 = labelIterator; - - // For each label `i`, calculate `x_i = predictedScore_i^2 - 2 * predictedScore_i + 1` if trueLabel_i = 1 and - // `predictedScore_i < 1` or `x_i = predictedScore^2` if `trueLabel_i = 0` and `predictedScore_i > 0` - // or `x_i = 0` otherwise. The of those values is used as a denominator when calculating the gradients and - // Hessians afterwards... - float64 denominator = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator; - float64 x; - - if (trueLabel) { - if (predictedScore < 1) { - x = (predictedScore * predictedScore) - (2 * predictedScore) + 1; - } else { - x = 0; - } - } else { - if (predictedScore > 0) { - x = (predictedScore * predictedScore); - } else { - x = 0; - } - } - - gradientIterator[i] = x; // Temporarily store `x` in the array of gradients - denominator += x; - labelIterator++; - } - - // The denominator that is used for the calculation of gradients is `sqrt(x_1 + x_2 + ...)`... - float64 denominatorGradient = std::sqrt(denominator); - - // The denominator that is used for the calculation of Hessians is `(x_1 + x_2 + ...)^1.5`... - float64 denominatorHessian = std::pow(denominator, 1.5); - - // Calculate the gradients and Hessians... - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator2; - float64 gradient; - float64 hessian; - - if (trueLabel) { - if (predictedScore < 1) { - gradient = divideOrZero(predictedScore - 1, denominatorGradient); - hessian = divideOrZero(denominator - gradientIterator[i], denominatorHessian); - } else { - gradient = 0; - hessian = 1; - } - } else { - if (predictedScore > 0) { - gradient = divideOrZero(predictedScore, denominatorGradient); - hessian = divideOrZero(denominator - gradientIterator[i], denominatorHessian); - } else { - gradient = 0; - hessian = 1; - } - } - - LabelIterator labelIterator4 = labelIterator3; - - for (uint32 j = 0; j < i; j++) { - float64 hessianTriangle; - - if (gradient != 0) { - bool trueLabel2 = *labelIterator4; - float64 predictedScore2 = scoreIterator[j]; - float64 numerator; - - if (trueLabel2) { - if (predictedScore2 < 1) { - numerator = predictedScore2 - 1; - } else { - numerator = 0; - } - } else { - if (predictedScore2 > 0) { - numerator = predictedScore2; - } else { - numerator = 0; - } - } - - if (trueLabel) { - numerator *= -(predictedScore - 1); - } else { - numerator *= -predictedScore; - } - - hessianTriangle = divideOrZero(numerator, denominatorHessian); - } else { - hessianTriangle = 0; - } - - *hessianIterator = hessianTriangle; - hessianIterator++; - labelIterator4++; - } - - gradientIterator[i] = gradient; - *hessianIterator = hessian; - hessianIterator++; - labelIterator2++; - } - } - - template - static inline float64 evaluateInternally(VectorConstView::const_iterator scoreIterator, - LabelIterator labelIterator, uint32 numLabels) { - // The example-wise squared hinge loss calculates as `sqrt((L_1 + ...)` with - // `L_i = max(1 - predictedScore_i, 0)^2` if `trueLabel_i = 1` or `L_i = max(predictedScore_i, 0)^2` if - // `trueLabel_i = 0`. - float64 sumOfSquares = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator; - - if (trueLabel) { - if (predictedScore < 1) { - sumOfSquares += ((1 - predictedScore) * (1 - predictedScore)); - } - } else { - if (predictedScore > 0) { - sumOfSquares += (predictedScore * predictedScore); - } - } - - labelIterator++; - } - - return std::sqrt(sumOfSquares); - } - - /** - * An implementation of the type `IExampleWiseLoss` that implements a multi-label variant of the squared hinge loss - * that is applied example-wise. - */ - class ExampleWiseSquaredHingeLoss final : public IExampleWiseLoss { - public: - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cbegin(exampleIndex), - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, - const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cbegin(exampleIndex), - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), labelIterator, - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - virtual void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - updateLabelWiseStatisticsInternally(scoreMatrix.values_cbegin(exampleIndex), labelIterator, - statisticView.begin(exampleIndex), labelMatrix.getNumCols()); - } - - void updateExampleWiseStatistics(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - DenseExampleWiseStatisticView& statisticView) const override { - updateExampleWiseStatisticsInternally( - scoreMatrix.values_cbegin(exampleIndex), labelMatrix.values_cbegin(exampleIndex), - statisticView.gradients_begin(exampleIndex), statisticView.hessians_begin(exampleIndex), - labelMatrix.getNumCols()); - } - - void updateExampleWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - DenseExampleWiseStatisticView& statisticView) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - updateExampleWiseStatisticsInternally( - scoreMatrix.values_cbegin(exampleIndex), labelIterator, statisticView.gradients_begin(exampleIndex), - statisticView.hessians_begin(exampleIndex), labelMatrix.getNumCols()); - } - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const override { - return evaluateInternally(scoreMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cbegin(exampleIndex), labelMatrix.getNumCols()); - } - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const override { - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - return evaluateInternally(scoreMatrix.values_cbegin(exampleIndex), labelIterator, - labelMatrix.getNumCols()); - } - - /** - * @see `IDistanceMeasure::measureDistance` - */ - float64 measureDistance(uint32 labelVectorIndex, const LabelVector& labelVector, - VectorView::const_iterator scoresBegin, - VectorView::const_iterator scoresEnd) const override { - uint32 numLabels = scoresEnd - scoresBegin; - auto labelIterator = make_binary_forward_iterator(labelVector.cbegin(), labelVector.cend()); - return evaluateInternally(scoresBegin, labelIterator, numLabels); - } - }; - - /** - * Allows to create instances of the type `IExampleWiseLoss` that implement a multi-label variant of the squared - * hinge loss that is applied example-wise. - */ - class ExampleWiseSquaredHingeLossFactory final : public IExampleWiseLossFactory { - public: - - std::unique_ptr createExampleWiseLoss() const override { - return std::make_unique(); - } - }; - - ExampleWiseSquaredHingeLossConfig::ExampleWiseSquaredHingeLossConfig( - const std::unique_ptr& headConfigPtr) - : headConfigPtr_(headConfigPtr) {} - - std::unique_ptr ExampleWiseSquaredHingeLossConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const { - return headConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, *this, blas, lapack); - } - - std::unique_ptr - ExampleWiseSquaredHingeLossConfig::createMarginalProbabilityFunctionFactory() const { - return nullptr; - } - - std::unique_ptr - ExampleWiseSquaredHingeLossConfig::createJointProbabilityFunctionFactory() const { - return nullptr; - } - - float64 ExampleWiseSquaredHingeLossConfig::getDefaultPrediction() const { - return 0.5; - } - - std::unique_ptr ExampleWiseSquaredHingeLossConfig::createExampleWiseLossFactory() const { - return std::make_unique(); - } - -} \ No newline at end of file diff --git a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_common.hpp b/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_common.hpp deleted file mode 100644 index 7f69fa80..00000000 --- a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_common.hpp +++ /dev/null @@ -1,198 +0,0 @@ -#include "boosting/losses/loss_label_wise.hpp" -#include "common/iterator/binary_forward_iterator.hpp" -#include "common/math/math.hpp" - -#include - -namespace boosting { - - /** - * An implementation of the type `ILabelWiseLoss` that relies on an "update function" and an "evaluation function" - * for updating the gradients and Hessians and evaluation the predictions for an individual label, respectively. - */ - class LabelWiseLoss : virtual public ILabelWiseLoss { - public: - - /** - * A function that allows to update the gradient and Hessian for a single example and label. The function - * accepts the true label, the predicted score, as well as pointers to the gradient and Hessian to be - * updated, as arguments. - */ - typedef void (*UpdateFunction)(bool trueLabel, float64 predictedScore, float64* gradient, float64* hessian); - - /** - * A function that allows to calculate a numerical score that assesses the quality of the prediction for a - * single example and label. The function accepts the true label and the predicted score as arguments and - * returns a numerical score. - */ - typedef float64 (*EvaluateFunction)(bool trueLabel, float64 predictedScore); - - /** - * The "update function" that is used for updating gradients and Hessians. - */ - const UpdateFunction updateFunction_; - - /** - * The "evaluation function" that is used for evaluating predictions. - */ - const EvaluateFunction evaluateFunction_; - - /** - * @param updateFunction The "update function" to be used for updating gradients and Hessians - * @param evaluateFunction The "evaluation function" to be used for evaluating predictions - */ - LabelWiseLoss(UpdateFunction updateFunction, EvaluateFunction evaluateFunction) - : updateFunction_(updateFunction), evaluateFunction_(evaluateFunction) {} - - void updateLabelWiseStatistics(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override final { - DenseLabelWiseStatisticView::iterator statisticIterator = statisticView.begin(exampleIndex); - CContiguousConstView::value_const_iterator scoreIterator = - scoreMatrix.values_cbegin(exampleIndex); - CContiguousConstView::value_const_iterator labelIterator = - labelMatrix.values_cbegin(exampleIndex); - uint32 numLabels = labelMatrix.getNumCols(); - - for (uint32 i = 0; i < numLabels; i++) { - bool trueLabel = labelIterator[i]; - float64 predictedScore = scoreIterator[i]; - Tuple& tuple = statisticIterator[i]; - (*updateFunction_)(trueLabel, predictedScore, &(tuple.first), &(tuple.second)); - } - } - - void updateLabelWiseStatistics(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override final { - DenseLabelWiseStatisticView::iterator statisticIterator = statisticView.begin(exampleIndex); - CContiguousConstView::value_const_iterator scoreIterator = - scoreMatrix.values_cbegin(exampleIndex); - CContiguousConstView::value_const_iterator labelIterator = - labelMatrix.values_cbegin(exampleIndex); - uint32 numLabels = labelIndicesEnd - labelIndicesBegin; - - for (uint32 i = 0; i < numLabels; i++) { - uint32 labelIndex = labelIndicesBegin[i]; - bool trueLabel = labelIterator[labelIndex]; - float64 predictedScore = scoreIterator[labelIndex]; - Tuple& tuple = statisticIterator[labelIndex]; - (*updateFunction_)(trueLabel, predictedScore, &(tuple.first), &(tuple.second)); - } - } - - void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override final { - DenseLabelWiseStatisticView::iterator statisticIterator = statisticView.begin(exampleIndex); - CContiguousConstView::value_const_iterator scoreIterator = - scoreMatrix.values_cbegin(exampleIndex); - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - uint32 numLabels = labelMatrix.getNumCols(); - - for (uint32 i = 0; i < numLabels; i++) { - bool trueLabel = *labelIterator; - float64 predictedScore = scoreIterator[i]; - Tuple& tuple = statisticIterator[i]; - (*updateFunction_)(trueLabel, predictedScore, &(tuple.first), &(tuple.second)); - labelIterator++; - } - } - - void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - DenseLabelWiseStatisticView& statisticView) const override final { - DenseLabelWiseStatisticView::iterator statisticIterator = statisticView.begin(exampleIndex); - CContiguousConstView::value_const_iterator scoreIterator = - scoreMatrix.values_cbegin(exampleIndex); - BinaryCsrConstView::index_const_iterator indexIterator = labelMatrix.indices_cbegin(exampleIndex); - BinaryCsrConstView::index_const_iterator indicesEnd = labelMatrix.indices_cend(exampleIndex); - uint32 numLabels = labelIndicesEnd - labelIndicesBegin; - - for (uint32 i = 0; i < numLabels; i++) { - uint32 labelIndex = labelIndicesBegin[i]; - indexIterator = std::lower_bound(indexIterator, indicesEnd, labelIndex); - bool trueLabel = indexIterator != indicesEnd && *indexIterator == labelIndex; - float64 predictedScore = scoreIterator[labelIndex]; - Tuple& tuple = statisticIterator[labelIndex]; - (*updateFunction_)(trueLabel, predictedScore, &(tuple.first), &(tuple.second)); - } - } - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const override final { - CContiguousConstView::value_const_iterator scoreIterator = - scoreMatrix.values_cbegin(exampleIndex); - CContiguousConstView::value_const_iterator labelIterator = - labelMatrix.values_cbegin(exampleIndex); - uint32 numLabels = labelMatrix.getNumCols(); - float64 mean = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = labelIterator[i]; - float64 score = (*evaluateFunction_)(trueLabel, predictedScore); - mean = iterativeArithmeticMean(i + 1, score, mean); - } - - return mean; - } - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const override final { - CContiguousConstView::value_const_iterator scoreIterator = - scoreMatrix.values_cbegin(exampleIndex); - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - uint32 numLabels = labelMatrix.getNumCols(); - float64 mean = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoreIterator[i]; - bool trueLabel = *labelIterator; - float64 score = (*evaluateFunction_)(trueLabel, predictedScore); - mean = iterativeArithmeticMean(i + 1, score, mean); - labelIterator++; - } - - return mean; - } - - /** - * @see `IDistanceMeasure::measureDistance` - */ - float64 measureDistance(uint32 labelVectorIndex, const LabelVector& labelVector, - VectorView::const_iterator scoresBegin, - VectorView::const_iterator scoresEnd) const override final { - uint32 numLabels = scoresEnd - scoresBegin; - auto labelIterator = make_binary_forward_iterator(labelVector.cbegin(), labelVector.cend()); - float64 mean = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 predictedScore = scoresBegin[i]; - bool trueLabel = *labelIterator; - float64 score = (*evaluateFunction_)(trueLabel, predictedScore); - mean = iterativeArithmeticMean(i + 1, score, mean); - labelIterator++; - } - - return mean; - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_logistic.cpp b/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_logistic.cpp deleted file mode 100644 index 2afae029..00000000 --- a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_logistic.cpp +++ /dev/null @@ -1,106 +0,0 @@ -#include "boosting/losses/loss_label_wise_logistic.hpp" - -#include "boosting/math/math.hpp" -#include "boosting/prediction/probability_function_chain_rule.hpp" -#include "boosting/prediction/probability_function_logistic.hpp" -#include "loss_label_wise_common.hpp" - -namespace boosting { - - /** - * Calculates and returns the function `1 / (1 + exp(-x))^2 = exp(x)^2 / (1 + exp(x))^2`, given a specific value - * `x`. - * - * This implementation exploits the identity `1 / (1 + exp(-x)) = exp(x) / (1 + exp(x))` to increase numerical - * stability (see, e.g., section "Numerically stable sigmoid function" in - * https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/). - * - * @param x The value `x` - * @return The value that has been calculated - */ - static inline constexpr float64 squaredLogisticFunction(float64 x) { - if (x >= 0) { - float64 exponential = std::exp(-x); // Evaluates to 0 for large x, resulting in 1 ultimately - return 1 / ((exponential + 1) * (exponential + 1)); - } else { - float64 exponential = std::exp(x); // Evaluates to 0 for large x, resulting in 0 ultimately - return (exponential * exponential) / ((exponential + 1) * (exponential + 1)); - } - } - - /** - * Calculates and returns the function `log(1 + exp(x)) = log(exp(0) + exp(x))`, given a specific value `x`. - * - * This function exploits the identity `log(exp(0) + exp(x)) = b + log(exp(0 - b) + exp(x - b))`, where - * `b = max(0, x)`, to increase numerical stability (see, e.g., section "Log-sum-exp for computing the - * log-distribution" in https://timvieira.github.io/blog/post/2014/02/11/exp-normalize-trick/). - * - * @param x The value `x` - * @return The value that has been calculated - */ - static inline constexpr float64 logSumExp(float64 x) { - if (x > 0) { - return x + std::log(std::exp(0 - x) + 1); - } else { - return std::log(1 + std::exp(x)); - } - } - - static inline void updateGradientAndHessian(bool trueLabel, float64 predictedScore, float64* gradient, - float64* hessian) { - // The gradient computes as `-expectedScore / (1 + exp(expectedScore * predictedScore))`, or as - // `1 / (1 + exp(-predictedScore)) - 1` if `trueLabel == true`, `1 / (1 + exp(-predictedScore))`, otherwise... - float64 logistic = logisticFunction(predictedScore); - *gradient = trueLabel ? logistic - 1.0 : logistic; - - // The Hessian computes as `exp(expectedScore * predictedScore) / (1 + exp(expectedScore * predictedScore))^2`, - // or as `1 / (1 + exp(expectedScore * predictedScore)) - 1 / (1 + exp(expectedScore * predictedScore))^2` - *hessian = logistic - squaredLogisticFunction(predictedScore); - } - - static inline float64 evaluatePrediction(bool trueLabel, float64 predictedScore) { - // The logistic loss calculates as `log(1 + exp(-expectedScore * predictedScore))`... - float64 x = trueLabel ? -predictedScore : predictedScore; - return logSumExp(x); - } - - /** - * Allows to create instances of the type `ILabelWiseLoss` that implement a multi-label variant of the logistic loss - * that is applied label-wise. - */ - class LabelWiseLogisticLossFactory final : public ILabelWiseLossFactory { - public: - - std::unique_ptr createLabelWiseLoss() const override { - return std::make_unique(&updateGradientAndHessian, &evaluatePrediction); - } - }; - - LabelWiseLogisticLossConfig::LabelWiseLogisticLossConfig(const std::unique_ptr& headConfigPtr) - : headConfigPtr_(headConfigPtr) {} - - std::unique_ptr LabelWiseLogisticLossConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const { - return headConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, *this); - } - - std::unique_ptr - LabelWiseLogisticLossConfig::createMarginalProbabilityFunctionFactory() const { - return std::make_unique(); - } - - std::unique_ptr - LabelWiseLogisticLossConfig::createJointProbabilityFunctionFactory() const { - return std::make_unique(this->createMarginalProbabilityFunctionFactory()); - } - - float64 LabelWiseLogisticLossConfig::getDefaultPrediction() const { - return 0; - } - - std::unique_ptr LabelWiseLogisticLossConfig::createLabelWiseLossFactory() const { - return std::make_unique(); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_sparse_common.hpp b/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_sparse_common.hpp deleted file mode 100644 index 459b2bcb..00000000 --- a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_sparse_common.hpp +++ /dev/null @@ -1,275 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "boosting/losses/loss_label_wise_sparse.hpp" -#include "common/iterator/non_zero_index_forward_iterator.hpp" -#include "loss_label_wise_common.hpp" - -#include - -namespace boosting { - - static const uint32 LIMIT = std::numeric_limits::max(); - - template - static inline uint32 fetchNextStatistic(IndexIterator& indexIterator, IndexIterator indicesEnd, - SparseSetMatrix::const_iterator& scoreIterator, - SparseSetMatrix::const_iterator scoresEnd, Tuple& tuple, - LabelWiseLoss::UpdateFunction updateFunction) { - uint32 labelIndex = indexIterator == indicesEnd ? LIMIT : *indexIterator; - uint32 scoreIndex = scoreIterator == scoresEnd ? LIMIT : (*scoreIterator).index; - - if (scoreIndex < labelIndex) { - (*updateFunction)(false, (*scoreIterator).value, &tuple.first, &tuple.second); - scoreIterator++; - return scoreIndex; - } else if (labelIndex < scoreIndex) { - (*updateFunction)(true, 0, &tuple.first, &tuple.second); - indexIterator++; - return labelIndex; - } else if (labelIndex < LIMIT) { - (*updateFunction)(true, (*scoreIterator).value, &tuple.first, &tuple.second); - scoreIterator++; - indexIterator++; - return labelIndex; - } - - return LIMIT; - } - - template - static inline uint32 fetchNextNonZeroStatistic(IndexIterator& indexIterator, IndexIterator indicesEnd, - SparseSetMatrix::const_iterator& scoreIterator, - SparseSetMatrix::const_iterator scoresEnd, - Tuple& tuple, - LabelWiseLoss::UpdateFunction updateFunction) { - uint32 index = fetchNextStatistic(indexIterator, indicesEnd, scoreIterator, scoresEnd, tuple, updateFunction); - - while (tuple.first == 0 && index < LIMIT) { - index = fetchNextStatistic(indexIterator, indicesEnd, scoreIterator, scoresEnd, tuple, updateFunction); - } - - return index; - } - - template - static inline void updateLabelWiseStatisticsInternally(IndexIterator indicesBegin, IndexIterator indicesEnd, - SparseSetMatrix::const_iterator scoresBegin, - SparseSetMatrix::const_iterator scoresEnd, - SparseLabelWiseStatisticView::row row, - LabelWiseLoss::UpdateFunction updateFunction) { - row.clear(); - Tuple tuple; - uint32 index; - - while ( - (index = fetchNextNonZeroStatistic(indicesBegin, indicesEnd, scoresBegin, scoresEnd, tuple, updateFunction)) - < LIMIT) { - IndexedValue>& entry = row.emplace(index); - entry.value = tuple; - } - } - - template - static inline uint32 fetchNextEvaluation(IndexIterator& indexIterator, IndexIterator indicesEnd, - SparseSetMatrix::const_iterator& scoreIterator, - SparseSetMatrix::const_iterator scoresEnd, float64& score, - LabelWiseLoss::EvaluateFunction evaluateFunction) { - uint32 labelIndex = indexIterator == indicesEnd ? LIMIT : *indexIterator; - uint32 scoreIndex = scoreIterator == scoresEnd ? LIMIT : (*scoreIterator).index; - - if (scoreIndex < labelIndex) { - score = (*evaluateFunction)(false, (*scoreIterator).value); - scoreIterator++; - return scoreIndex; - } else if (labelIndex < scoreIndex) { - score = (*evaluateFunction)(true, 0); - indexIterator++; - return labelIndex; - } else if (labelIndex < LIMIT) { - score = (*evaluateFunction)(true, (*scoreIterator).value); - scoreIterator++; - indexIterator++; - return labelIndex; - } - - return LIMIT; - } - - template - static inline uint32 fetchNextNonZeroEvaluation(IndexIterator& indexIterator, IndexIterator indicesEnd, - SparseSetMatrix::const_iterator& scoreIterator, - SparseSetMatrix::const_iterator scoresEnd, float64& score, - LabelWiseLoss::EvaluateFunction evaluateFunction) { - uint32 index = - fetchNextEvaluation(indexIterator, indicesEnd, scoreIterator, scoresEnd, score, evaluateFunction); - - while (score == 0 && index < LIMIT) { - index = fetchNextEvaluation(indexIterator, indicesEnd, scoreIterator, scoresEnd, score, evaluateFunction); - } - - return index; - } - - template - static inline float64 evaluateInternally(IndexIterator indicesBegin, IndexIterator indicesEnd, - SparseSetMatrix::const_iterator scoresBegin, - SparseSetMatrix::const_iterator scoresEnd, - LabelWiseLoss::EvaluateFunction evaluateFunction, uint32 numLabels) { - float64 mean = 0; - float64 score = 0; - uint32 i = 0; - - while (fetchNextNonZeroEvaluation(indicesBegin, indicesEnd, scoresBegin, scoresEnd, score, evaluateFunction) - < LIMIT) { - mean = iterativeArithmeticMean(i + 1, score, mean); - i++; - } - - return mean * ((float64) i / (float64) numLabels); - } - - /** - * An implementation of the type `ISparseLabelWiseLoss` that relies on an "update function" and an - * "evaluation function" for updating the gradients and Hessians and evaluation the predictions for an individual - * label, respectively. - */ - class SparseLabelWiseLoss final : public LabelWiseLoss, - public ISparseLabelWiseLoss { - public: - - /** - * @param updateFunction The "update function" to be used for updating gradients and Hessians - * @param evaluateFunction The "evaluation function" to be used for evaluating predictions - */ - SparseLabelWiseLoss(UpdateFunction updateFunction, EvaluateFunction evaluateFunction) - : LabelWiseLoss(updateFunction, evaluateFunction) {} - - /** - * Keep "updateLabelWiseStatistics" functions from the parent class rather than hiding them. - */ - using LabelWiseLoss::updateLabelWiseStatistics; - - void updateLabelWiseStatistics(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - SparseLabelWiseStatisticView& statisticView) const override { - auto indicesBegin = make_non_zero_index_forward_iterator(labelMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cend(exampleIndex)); - auto indicesEnd = make_non_zero_index_forward_iterator(labelMatrix.values_cend(exampleIndex), - labelMatrix.values_cend(exampleIndex)); - updateLabelWiseStatisticsInternally(indicesBegin, indicesEnd, scoreMatrix.cbegin(exampleIndex), - scoreMatrix.cend(exampleIndex), statisticView[exampleIndex], - LabelWiseLoss::updateFunction_); - } - - void updateLabelWiseStatistics(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - SparseLabelWiseStatisticView& statisticView) const override { - const SparseSetMatrix::const_row scoreMatrixRow = scoreMatrix[exampleIndex]; - CContiguousConstView::value_const_iterator labelIterator = - labelMatrix.values_cbegin(exampleIndex); - SparseLabelWiseStatisticView::row statisticViewRow = statisticView[exampleIndex]; - uint32 numElements = labelIndicesEnd - labelIndicesBegin; - Tuple tuple; - - for (uint32 i = 0; i < numElements; i++) { - uint32 index = labelIndicesBegin[i]; - const IndexedValue* scoreMatrixEntry = scoreMatrixRow[index]; - float64 predictedScore = scoreMatrixEntry ? scoreMatrixEntry->value : 0; - bool trueLabel = labelIterator[index]; - (*LabelWiseLoss::updateFunction_)(trueLabel, predictedScore, &tuple.first, &tuple.second); - - if (tuple.first != 0) { - IndexedValue>& statisticViewEntry = statisticViewRow.emplace(index); - statisticViewEntry.value = tuple; - } else { - statisticViewRow.erase(index); - } - } - } - - void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix, - CompleteIndexVector::const_iterator labelIndicesBegin, - CompleteIndexVector::const_iterator labelIndicesEnd, - SparseLabelWiseStatisticView& statisticView) const override { - updateLabelWiseStatisticsInternally(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex), - scoreMatrix.cbegin(exampleIndex), scoreMatrix.cend(exampleIndex), - statisticView[exampleIndex], LabelWiseLoss::updateFunction_); - } - - void updateLabelWiseStatistics(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix, - PartialIndexVector::const_iterator labelIndicesBegin, - PartialIndexVector::const_iterator labelIndicesEnd, - SparseLabelWiseStatisticView& statisticView) const override { - const SparseSetMatrix::const_row scoreMatrixRow = scoreMatrix[exampleIndex]; - BinaryCsrConstView::index_const_iterator indexIterator = labelMatrix.indices_cbegin(exampleIndex); - BinaryCsrConstView::index_const_iterator indicesEnd = labelMatrix.indices_cend(exampleIndex); - SparseLabelWiseStatisticView::row statisticViewRow = statisticView[exampleIndex]; - uint32 numElements = labelIndicesEnd - labelIndicesBegin; - Tuple tuple; - - for (uint32 i = 0; i < numElements; i++) { - uint32 index = labelIndicesBegin[i]; - indexIterator = std::lower_bound(indexIterator, indicesEnd, index); - bool trueLabel = indexIterator != indicesEnd && *indexIterator == index; - const IndexedValue* scoreMatrixEntry = scoreMatrixRow[index]; - float64 predictedScore = scoreMatrixEntry ? scoreMatrixEntry->value : 0; - (*LabelWiseLoss::updateFunction_)(trueLabel, predictedScore, &tuple.first, &tuple.second); - - if (tuple.first != 0) { - IndexedValue>& statisticViewEntry = statisticViewRow.emplace(index); - statisticViewEntry.value = tuple; - } else { - statisticViewRow.erase(index); - } - } - } - - // Keep "evaluate" functions from the parent class rather than hiding them - using LabelWiseLoss::evaluate; - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix) const override { - auto indicesBegin = make_non_zero_index_forward_iterator(labelMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cend(exampleIndex)); - auto indicesEnd = make_non_zero_index_forward_iterator(labelMatrix.values_cend(exampleIndex), - labelMatrix.values_cend(exampleIndex)); - return evaluateInternally(indicesBegin, indicesEnd, scoreMatrix.cbegin(exampleIndex), - scoreMatrix.cend(exampleIndex), LabelWiseLoss::evaluateFunction_, - labelMatrix.getNumCols()); - } - - /** - * @see `IEvaluationMeasure::evaluate` - */ - float64 evaluate(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix) const override { - return evaluateInternally(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex), scoreMatrix.cbegin(exampleIndex), - scoreMatrix.cend(exampleIndex), LabelWiseLoss::evaluateFunction_, - labelMatrix.getNumCols()); - } - }; - -} - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_squared_error.cpp b/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_squared_error.cpp deleted file mode 100644 index 3872fa9b..00000000 --- a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_squared_error.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "boosting/losses/loss_label_wise_squared_error.hpp" - -#include "loss_label_wise_common.hpp" - -namespace boosting { - - static inline void updateGradientAndHessian(bool trueLabel, float64 predictedScore, float64* gradient, - float64* hessian) { - float64 expectedScore = trueLabel ? 1 : -1; - *gradient = (predictedScore - expectedScore); - *hessian = 1; - } - - static inline float64 evaluatePrediction(bool trueLabel, float64 predictedScore) { - float64 expectedScore = trueLabel ? 1 : -1; - float64 difference = (expectedScore - predictedScore); - return difference * difference; - } - - /** - * Allows to create instances of the type `ILabelWiseLoss` that implement a multi-label variant of the squared error - * loss that is applied label-wise. - */ - class LabelWiseSquaredErrorLossFactory final : public ILabelWiseLossFactory { - public: - - std::unique_ptr createLabelWiseLoss() const override { - return std::make_unique(&updateGradientAndHessian, &evaluatePrediction); - } - }; - - LabelWiseSquaredErrorLossConfig::LabelWiseSquaredErrorLossConfig(const std::unique_ptr& headConfigPtr) - : headConfigPtr_(headConfigPtr) {} - - std::unique_ptr LabelWiseSquaredErrorLossConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const { - return headConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, *this); - } - - std::unique_ptr - LabelWiseSquaredErrorLossConfig::createMarginalProbabilityFunctionFactory() const { - return nullptr; - } - - std::unique_ptr - LabelWiseSquaredErrorLossConfig::createJointProbabilityFunctionFactory() const { - return nullptr; - } - - float64 LabelWiseSquaredErrorLossConfig::getDefaultPrediction() const { - return 0; - } - - std::unique_ptr LabelWiseSquaredErrorLossConfig::createLabelWiseLossFactory() const { - return std::make_unique(); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_squared_hinge.cpp b/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_squared_hinge.cpp deleted file mode 100644 index 306a739e..00000000 --- a/cpp/subprojects/boosting/src/boosting/losses/loss_label_wise_squared_hinge.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include "boosting/losses/loss_label_wise_squared_hinge.hpp" - -#include "loss_label_wise_sparse_common.hpp" - -namespace boosting { - - static inline void updateGradientAndHessian(bool trueLabel, float64 predictedScore, float64* gradient, - float64* hessian) { - if (trueLabel) { - if (predictedScore < 1) { - *gradient = (predictedScore - 1); - } else { - *gradient = 0; - } - } else { - if (predictedScore > 0) { - *gradient = predictedScore; - } else { - *gradient = 0; - } - } - - *hessian = 1; - } - - static inline float64 evaluatePrediction(bool trueLabel, float64 predictedScore) { - if (trueLabel) { - if (predictedScore < 1) { - return (1 - predictedScore) * (1 - predictedScore); - } else { - return 0; - } - } else { - if (predictedScore > 0) { - return predictedScore * predictedScore; - } else { - return 0; - } - } - } - - /** - * Allows to create instances of the type `ILabelWiseLoss` that implement a multi-label variant of the squared hinge - * loss that is applied label-wise. - */ - class LabelWiseSquaredHingeLossFactory final : public ISparseLabelWiseLossFactory { - public: - - std::unique_ptr createSparseLabelWiseLoss() const override { - return std::make_unique(&updateGradientAndHessian, &evaluatePrediction); - } - }; - - LabelWiseSquaredHingeLossConfig::LabelWiseSquaredHingeLossConfig(const std::unique_ptr& headConfigPtr) - : headConfigPtr_(headConfigPtr) {} - - std::unique_ptr LabelWiseSquaredHingeLossConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack, bool preferSparseStatistics) const { - if (preferSparseStatistics) { - return headConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, *this); - } else { - return headConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, - static_cast(*this)); - } - } - - std::unique_ptr - LabelWiseSquaredHingeLossConfig::createMarginalProbabilityFunctionFactory() const { - return nullptr; - } - - std::unique_ptr - LabelWiseSquaredHingeLossConfig::createJointProbabilityFunctionFactory() const { - return nullptr; - } - - float64 LabelWiseSquaredHingeLossConfig::getDefaultPrediction() const { - return 0.5; - } - - std::unique_ptr LabelWiseSquaredHingeLossConfig::createSparseLabelWiseLossFactory() - const { - return std::make_unique(); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/math/blas.cpp b/cpp/subprojects/boosting/src/boosting/math/blas.cpp deleted file mode 100644 index de0cfa15..00000000 --- a/cpp/subprojects/boosting/src/boosting/math/blas.cpp +++ /dev/null @@ -1,28 +0,0 @@ -#include "boosting/math/blas.hpp" - -namespace boosting { - - Blas::Blas(DdotFunction ddotFunction, DspmvFunction dspmvFunction) - : ddotFunction_(ddotFunction), dspmvFunction_(dspmvFunction) {} - - float64 Blas::ddot(float64* x, float64* y, int n) const { - // Storage spacing between the elements of the arrays x and y - int inc = 1; - // Invoke the DDOT routine... - return ddotFunction_(&n, x, &inc, y, &inc); - } - - void Blas::dspmv(float64* a, float64* x, float64* output, int n) const { - // "U" if the upper-right triangle of A should be used, "L" if the lower-left triangle should be used - char* uplo = const_cast("U"); - // A scalar to be multiplied with the matrix A - double alpha = 1; - // The increment for the elements of x and y - int inc = 1; - // A scalar to be multiplied with vector y - double beta = 0; - // Invoke the DSPMV routine... - dspmvFunction_(uplo, &n, &alpha, a, x, &inc, &beta, output, &inc); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/math/lapack.cpp b/cpp/subprojects/boosting/src/boosting/math/lapack.cpp deleted file mode 100644 index b20d27c2..00000000 --- a/cpp/subprojects/boosting/src/boosting/math/lapack.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "boosting/math/lapack.hpp" - -#include -#include - -namespace boosting { - - Lapack::Lapack(DsysvFunction dsysvFunction) : dsysvFunction_(dsysvFunction) {} - - int Lapack::queryDsysvLworkParameter(float64* tmpArray1, float64* output, int n) const { - // "U" if the upper-right triangle of A should be used, "L" if the lower-left triangle should be used - char* uplo = const_cast("U"); - // The number of right-hand sides, i.e, the number of columns of the matrix B - int nrhs = 1; - // Set "lwork" parameter to -1, which indicates that the optimal value should be queried - int lwork = -1; - // Variable to hold the queried value - double worksize; - // Variable to hold the result of the solver. Will be 0 when terminated successfully, unlike 0 otherwise - int info; - - // Query the optimal value for the "lwork" parameter... - dsysvFunction_(uplo, &n, &nrhs, tmpArray1, &n, (int*) 0, output, &n, &worksize, &lwork, &info); - - if (info != 0) { - throw std::runtime_error( - std::string("DSYSV terminated with non-zero info code when querying the optimal lwork parameter: " - + std::to_string(info))); - } - - return (int) worksize; - } - - void Lapack::dsysv(float64* tmpArray1, int* tmpArray2, double* tmpArray3, float64* output, int n, int lwork) const { - // "U" if the upper-right triangle of A should be used, "L" if the lower-left triangle should be used - char* uplo = const_cast("U"); - // The number of right-hand sides, i.e, the number of columns of the matrix B - int nrhs = 1; - // Variable to hold the result of the solver. Will be 0 when terminated successfully, unlike 0 otherwise - int info; - - // Run the DSYSV solver... - dsysvFunction_(uplo, &n, &nrhs, tmpArray1, &n, tmpArray2, output, &n, tmpArray3, &lwork, &info); - - if (info != 0) { - throw std::runtime_error(std::string("DSYSV terminated with non-zero info code: " + std::to_string(info))); - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/model/rule_list_builder.cpp b/cpp/subprojects/boosting/src/boosting/model/rule_list_builder.cpp deleted file mode 100644 index 3416f48c..00000000 --- a/cpp/subprojects/boosting/src/boosting/model/rule_list_builder.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "boosting/model/rule_list_builder.hpp" - -#include "common/model/rule_list.hpp" - -namespace boosting { - - /** - * Allows to build models that store several rules in the order they have been added. - */ - class RuleListBuilder final : public IModelBuilder { - private: - - std::unique_ptr modelPtr_; - - public: - - RuleListBuilder() : modelPtr_(std::make_unique(true)) {} - - /** - * @see `IModelBuilder::setDefaultRule` - */ - void setDefaultRule(std::unique_ptr& predictionPtr) override { - modelPtr_->addDefaultRule(predictionPtr->createHead()); - } - - /** - * @see `IModelBuilder::addRule` - */ - void addRule(std::unique_ptr& conditionListPtr, - std::unique_ptr& predictionPtr) override { - modelPtr_->addRule(conditionListPtr->createConjunctiveBody(), predictionPtr->createHead()); - } - - /** - * @see `IModelBuilder::setNumUsedRules` - */ - void setNumUsedRules(uint32 numUsedRules) override { - modelPtr_->setNumUsedRules(numUsedRules); - } - - /** - * @see `IModelBuilder::buildModel` - */ - std::unique_ptr buildModel() override { - return std::move(modelPtr_); - } - }; - - std::unique_ptr RuleListBuilderFactory::create() const { - return std::make_unique(); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/multi_threading/parallel_rule_refinement_auto.cpp b/cpp/subprojects/boosting/src/boosting/multi_threading/parallel_rule_refinement_auto.cpp deleted file mode 100644 index 2925b8e5..00000000 --- a/cpp/subprojects/boosting/src/boosting/multi_threading/parallel_rule_refinement_auto.cpp +++ /dev/null @@ -1,24 +0,0 @@ -#include "boosting/multi_threading/parallel_rule_refinement_auto.hpp" - -#include "common/util/threads.hpp" - -namespace boosting { - - AutoParallelRuleRefinementConfig::AutoParallelRuleRefinementConfig( - const std::unique_ptr& lossConfigPtr, const std::unique_ptr& headConfigPtr, - const std::unique_ptr& featureSamplingConfigPtr) - : lossConfigPtr_(lossConfigPtr), headConfigPtr_(headConfigPtr), - featureSamplingConfigPtr_(featureSamplingConfigPtr) {} - - uint32 AutoParallelRuleRefinementConfig::getNumThreads(const IFeatureMatrix& featureMatrix, - uint32 numLabels) const { - if (!lossConfigPtr_->isDecomposable() && !headConfigPtr_->isSingleLabel()) { - return 1; - } else if (featureMatrix.isSparse() && !featureSamplingConfigPtr_->isSamplingUsed()) { - return 1; - } else { - return getNumAvailableThreads(0); - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/multi_threading/parallel_statistic_update_auto.cpp b/cpp/subprojects/boosting/src/boosting/multi_threading/parallel_statistic_update_auto.cpp deleted file mode 100644 index 76f60a81..00000000 --- a/cpp/subprojects/boosting/src/boosting/multi_threading/parallel_statistic_update_auto.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include "boosting/multi_threading/parallel_statistic_update_auto.hpp" - -#include "common/util/threads.hpp" - -namespace boosting { - - AutoParallelStatisticUpdateConfig::AutoParallelStatisticUpdateConfig( - const std::unique_ptr& lossConfigPtr) - : lossConfigPtr_(lossConfigPtr) {} - - uint32 AutoParallelStatisticUpdateConfig::getNumThreads(const IFeatureMatrix& featureMatrix, - uint32 numLabels) const { - if (!lossConfigPtr_->isDecomposable() && numLabels >= 20) { - return getNumAvailableThreads(0); - } else { - return 1; - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/post_processing/shrinkage_constant.cpp b/cpp/subprojects/boosting/src/boosting/post_processing/shrinkage_constant.cpp deleted file mode 100644 index 5fd0cf7a..00000000 --- a/cpp/subprojects/boosting/src/boosting/post_processing/shrinkage_constant.cpp +++ /dev/null @@ -1,76 +0,0 @@ -#include "boosting/post_processing/shrinkage_constant.hpp" - -#include "common/util/validation.hpp" - -namespace boosting { - - /** - * Post-processes the predictions of rules by shrinking their weights by a constant shrinkage parameter. - */ - class ConstantShrinkage final : public IPostProcessor { - private: - - const float64 shrinkage_; - - public: - - /** - * @param shrinkage The shrinkage parameter. Must be in (0, 1) - */ - ConstantShrinkage(float64 shrinkage) : shrinkage_(shrinkage) {} - - /** - * @see `IPostProcessor::postProcess` - */ - void postProcess(AbstractPrediction& prediction) const override { - uint32 numElements = prediction.getNumElements(); - AbstractPrediction::score_iterator iterator = prediction.scores_begin(); - - for (uint32 i = 0; i < numElements; i++) { - iterator[i] *= shrinkage_; - } - } - }; - - /** - * Allows to create instances of the type `IPostProcessor` that post-process the predictions of rules by shrinking - * their weights by a constant "shrinkage" parameter. - */ - class ConstantShrinkageFactory final : public IPostProcessorFactory { - private: - - const float64 shrinkage_; - - public: - - /** - * @param shrinkage The value of the "shrinkage" parameter. Must be in (0, 1) - */ - ConstantShrinkageFactory(float64 shrinkage) : shrinkage_(shrinkage) {} - - /** - * @see `IPostProcessorFactory::create` - */ - std::unique_ptr create() const override { - return std::make_unique(shrinkage_); - } - }; - - ConstantShrinkageConfig::ConstantShrinkageConfig() : shrinkage_(0.3) {} - - float64 ConstantShrinkageConfig::getShrinkage() const { - return shrinkage_; - } - - IConstantShrinkageConfig& ConstantShrinkageConfig::setShrinkage(float64 shrinkage) { - assertGreater("shrinkage", shrinkage, 0); - assertLess("shrinkage", shrinkage, 1); - shrinkage_ = shrinkage; - return *this; - } - - std::unique_ptr ConstantShrinkageConfig::createPostProcessorFactory() const { - return std::make_unique(shrinkage_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/discretization_function_probability.cpp b/cpp/subprojects/boosting/src/boosting/prediction/discretization_function_probability.cpp deleted file mode 100644 index 21763f5b..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/discretization_function_probability.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "boosting/prediction/discretization_function_probability.hpp" - -namespace boosting { - - /** - * An implementation of the type `IDiscretizationFunction` that allows to discretize regression scores by - * transforming them into marginal probabilities. - */ - class ProbabilityDiscretizationFunction final : public IDiscretizationFunction { - private: - - std::unique_ptr marginalProbabilityFunctionPtr_; - - public: - - /** - * @param marginalProbabilityFunctionPtr An unique pointer to an object of type - * `IMarginalProbabilityFunction` that should be used to transform - * regression scores into marginal probabilities - */ - ProbabilityDiscretizationFunction( - std::unique_ptr marginalProbabilityFunctionPtr) - : marginalProbabilityFunctionPtr_(std::move(marginalProbabilityFunctionPtr)) {} - - bool discretizeScore(uint32 labelIndex, float64 score) const override { - float64 marginalProbability = - marginalProbabilityFunctionPtr_->transformScoreIntoMarginalProbability(labelIndex, score); - return marginalProbability > 0.5; - } - }; - - ProbabilityDiscretizationFunctionFactory::ProbabilityDiscretizationFunctionFactory( - std::unique_ptr marginalProbabilityFunctionFactoryPtr) - : marginalProbabilityFunctionFactoryPtr_(std::move(marginalProbabilityFunctionFactoryPtr)) {} - - std::unique_ptr ProbabilityDiscretizationFunctionFactory::create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const { - std::unique_ptr marginalProbabilityFunctionPtr = - marginalProbabilityFunctionFactoryPtr_->create(marginalProbabilityCalibrationModel); - return std::make_unique(std::move(marginalProbabilityFunctionPtr)); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/discretization_function_score.cpp b/cpp/subprojects/boosting/src/boosting/prediction/discretization_function_score.cpp deleted file mode 100644 index a831c780..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/discretization_function_score.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "boosting/prediction/discretization_function_score.hpp" - -namespace boosting { - - /** - * An implementation of the type `IDiscretizationFunction` that allows to discretize regression scores by comparing - * them to a threshold. - */ - class ScoreDiscretizationFunction final : public IDiscretizationFunction { - private: - - float64 threshold_; - - public: - - /** - * @param threshold The threshold that should be used for discretization - */ - ScoreDiscretizationFunction(float64 threshold) : threshold_(threshold) {} - - bool discretizeScore(uint32 labelIndex, float64 score) const override { - return score > threshold_; - } - }; - - ScoreDiscretizationFunctionFactory::ScoreDiscretizationFunctionFactory(float64 threshold) : threshold_(threshold) {} - - std::unique_ptr ScoreDiscretizationFunctionFactory::create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const { - return std::make_unique(threshold_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_auto.cpp b/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_auto.cpp deleted file mode 100644 index fbf5ccc2..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_auto.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "boosting/prediction/predictor_binary_auto.hpp" - -#include "boosting/prediction/predictor_binary_example_wise.hpp" -#include "boosting/prediction/predictor_binary_label_wise.hpp" - -namespace boosting { - - AutomaticBinaryPredictorConfig::AutomaticBinaryPredictorConfig( - const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : lossConfigPtr_(lossConfigPtr), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - std::unique_ptr AutomaticBinaryPredictorConfig::createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - if (lossConfigPtr_->isDecomposable()) { - return LabelWiseBinaryPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_) - .createPredictorFactory(featureMatrix, numLabels); - } else { - return ExampleWiseBinaryPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_) - .createPredictorFactory(featureMatrix, numLabels); - } - } - - std::unique_ptr AutomaticBinaryPredictorConfig::createSparsePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - if (lossConfigPtr_->isDecomposable()) { - return LabelWiseBinaryPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_) - .createSparsePredictorFactory(featureMatrix, numLabels); - } else { - return ExampleWiseBinaryPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_) - .createSparsePredictorFactory(featureMatrix, numLabels); - } - } - - bool AutomaticBinaryPredictorConfig::isLabelVectorSetNeeded() const { - if (lossConfigPtr_->isDecomposable()) { - return LabelWiseBinaryPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_).isLabelVectorSetNeeded(); - } else { - return ExampleWiseBinaryPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_).isLabelVectorSetNeeded(); - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_example_wise.cpp b/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_example_wise.cpp deleted file mode 100644 index 1c4e75eb..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_example_wise.cpp +++ /dev/null @@ -1,287 +0,0 @@ -#include "boosting/prediction/predictor_binary_example_wise.hpp" - -#include "boosting/prediction/predictor_binary_common.hpp" -#include "boosting/prediction/transformation_binary_example_wise.hpp" -#include "common/prediction/probability_calibration_no.hpp" - -#include - -namespace boosting { - - static inline std::unique_ptr createBinaryTransformation( - const LabelVectorSet* labelVectorSet, const IDistanceMeasureFactory& distanceMeasureFactory, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) { - if (!labelVectorSet) { - throw std::runtime_error( - "Information about the label vectors that have been encountered in the training data is required for " - "predicting binary labels, but no such information is provided by the model. Most probably, the model " - "was intended to use a different prediction method when it has been trained."); - } - - std::unique_ptr binaryTransformationPtr; - - if (labelVectorSet->getNumLabelVectors() > 0) { - binaryTransformationPtr = std::make_unique( - *labelVectorSet, distanceMeasureFactory.createDistanceMeasure(marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel)); - } - - return binaryTransformationPtr; - } - - template - static inline std::unique_ptr createPredictor( - const FeatureMatrix& featureMatrix, const Model& model, uint32 numLabels, uint32 numThreads, - const LabelVectorSet* labelVectorSet, const IDistanceMeasureFactory& distanceMeasureFactory, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) { - std::unique_ptr binaryTransformationPtr = - createBinaryTransformation(labelVectorSet, distanceMeasureFactory, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel); - return std::make_unique>(featureMatrix, model, numLabels, numThreads, - std::move(binaryTransformationPtr)); - } - - /** - * Allows to create instances of the type `IBinaryPredictor` that allow to predict known label vectors for given - * query examples by comparing the predicted regression scores or probability estimates to the label vectors - * encountered in the training data. - */ - class ExampleWiseBinaryPredictorFactory final : public IBinaryPredictorFactory { - private: - - const std::unique_ptr distanceMeasureFactoryPtr_; - - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel_; - - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel_; - - const uint32 numThreads_; - - public: - - /** - * @param distanceMeasureFactoryPtr An unique pointer to an object of type - * `IDistanceMeasureFactory` that allows to create - * implementations of the distance measure that should be used - * to calculate the distance between predicted scores and known - * label vectors - * @param marginalProbabilityCalibrationModel A pointer to an object of type - * `IMarginalProbabilityCalibrationModel` to be used for the - * calibration of marginal probabilities or a null pointer, if - * no such model is available - * @param jointProbabilityCalibrationModel A pointer to an object of type - * `IJointProbabilityCalibrationModel` to be used for the - * calibration of joint probabilities or a null pointer, if no - * such model is available - * @param numThreads The number of CPU threads to be used to make predictions for - * different query examples in parallel. Must be at least 1 - */ - ExampleWiseBinaryPredictorFactory( - std::unique_ptr distanceMeasureFactoryPtr, - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel, uint32 numThreads) - : distanceMeasureFactoryPtr_(std::move(distanceMeasureFactoryPtr)), - marginalProbabilityCalibrationModel_(marginalProbabilityCalibrationModel), - jointProbabilityCalibrationModel_(jointProbabilityCalibrationModel), numThreads_(numThreads) {} - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createPredictor(featureMatrix, model, numLabels, numThreads_, labelVectorSet, - *distanceMeasureFactoryPtr_, - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel); - } - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createPredictor(featureMatrix, model, numLabels, numThreads_, labelVectorSet, - *distanceMeasureFactoryPtr_, - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel); - } - }; - - template - static inline std::unique_ptr createSparsePredictor( - const FeatureMatrix& featureMatrix, const Model& model, uint32 numLabels, uint32 numThreads, - const LabelVectorSet* labelVectorSet, const IDistanceMeasureFactory& distanceMeasureFactory, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) { - std::unique_ptr binaryTransformationPtr = - createBinaryTransformation(labelVectorSet, distanceMeasureFactory, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel); - return std::make_unique>( - featureMatrix, model, numLabels, numThreads, std::move(binaryTransformationPtr)); - } - - /** - * Allows to create instances of the type `ISparseBinaryPredictor` that allow to predict known label vectors for - * given query examples by comparing the predicted regression scores or probability estimates to the label vectors - * encountered in the training data. - */ - class ExampleWiseSparseBinaryPredictorFactory final : public ISparseBinaryPredictorFactory { - private: - - const std::unique_ptr distanceMeasureFactoryPtr_; - - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel_; - - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel_; - - const uint32 numThreads_; - - public: - - /** - * @param distanceMeasureFactoryPtr An unique pointer to an object of type - * `IDistanceMeasureFactory` that allows to create - * implementations of the distance measure that should be used - * to calculate the distance between predicted scores and known - * label vectors - * @param marginalProbabilityCalibrationModel A pointer to an object of type - * `IMarginalProbabilityCalibrationModel` to be used for the - * calibration of marginal probabilities or a null pointer, if - * no such model is available - * @param jointProbabilityCalibrationModel A pointer to an object of type - * `IJointProbabilityCalibrationModel` to be used for the - * calibration of joint probabilities or a null pointer, if no - * such model is available - * @param numThreads The number of CPU threads to be used to make predictions for - * different query examples in parallel. Must be at least 1 - */ - ExampleWiseSparseBinaryPredictorFactory( - std::unique_ptr distanceMeasureFactoryPtr, - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel, uint32 numThreads) - : distanceMeasureFactoryPtr_(std::move(distanceMeasureFactoryPtr)), - marginalProbabilityCalibrationModel_(marginalProbabilityCalibrationModel), - jointProbabilityCalibrationModel_(jointProbabilityCalibrationModel), numThreads_(numThreads) {} - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createSparsePredictor( - featureMatrix, model, numLabels, numThreads_, labelVectorSet, *distanceMeasureFactoryPtr_, - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel); - } - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createSparsePredictor( - featureMatrix, model, numLabels, numThreads_, labelVectorSet, *distanceMeasureFactoryPtr_, - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel); - } - }; - - static inline std::unique_ptr createDistanceMeasureFactory(bool basedOnProbabilities, - const ILossConfig& lossConfig) { - if (basedOnProbabilities) { - return lossConfig.createJointProbabilityFunctionFactory(); - } else { - return lossConfig.createDistanceMeasureFactory(); - } - } - - ExampleWiseBinaryPredictorConfig::ExampleWiseBinaryPredictorConfig( - const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : basedOnProbabilities_(false), lossConfigPtr_(lossConfigPtr), - multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - bool ExampleWiseBinaryPredictorConfig::isBasedOnProbabilities() const { - return basedOnProbabilities_; - } - - IExampleWiseBinaryPredictorConfig& ExampleWiseBinaryPredictorConfig::setBasedOnProbabilities( - bool basedOnProbabilities) { - basedOnProbabilities_ = basedOnProbabilities; - return *this; - } - - bool ExampleWiseBinaryPredictorConfig::isProbabilityCalibrationModelUsed() const { - return noMarginalProbabilityCalibrationModelPtr_ == nullptr; - } - - IExampleWiseBinaryPredictorConfig& ExampleWiseBinaryPredictorConfig::setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) { - noMarginalProbabilityCalibrationModelPtr_ = - useProbabilityCalibrationModel ? nullptr : createNoProbabilityCalibrationModel(); - noJointProbabilityCalibrationModelPtr_ = - useProbabilityCalibrationModel ? nullptr : createNoProbabilityCalibrationModel(); - return *this; - } - - std::unique_ptr ExampleWiseBinaryPredictorConfig::createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - std::unique_ptr distanceMeasureFactoryPtr = - createDistanceMeasureFactory(basedOnProbabilities_, *lossConfigPtr_); - - if (distanceMeasureFactoryPtr) { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, numLabels); - return std::make_unique( - std::move(distanceMeasureFactoryPtr), noMarginalProbabilityCalibrationModelPtr_.get(), - noJointProbabilityCalibrationModelPtr_.get(), numThreads); - } - - return nullptr; - } - - std::unique_ptr ExampleWiseBinaryPredictorConfig::createSparsePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - std::unique_ptr distanceMeasureFactoryPtr = - createDistanceMeasureFactory(basedOnProbabilities_, *lossConfigPtr_); - - if (distanceMeasureFactoryPtr) { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, numLabels); - return std::make_unique( - std::move(distanceMeasureFactoryPtr), noMarginalProbabilityCalibrationModelPtr_.get(), - noJointProbabilityCalibrationModelPtr_.get(), numThreads); - } - - return nullptr; - } - - bool ExampleWiseBinaryPredictorConfig::isLabelVectorSetNeeded() const { - return true; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_gfm.cpp b/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_gfm.cpp deleted file mode 100644 index 1e08b35e..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_gfm.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#include "boosting/prediction/predictor_binary_gfm.hpp" - -#include "boosting/prediction/predictor_binary_common.hpp" -#include "boosting/prediction/transformation_binary_gfm.hpp" -#include "common/prediction/probability_calibration_no.hpp" - -#include - -namespace boosting { - - static inline std::unique_ptr createBinaryTransformation( - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - const IJointProbabilityFunctionFactory& jointProbabilityFunctionFactory) { - if (!labelVectorSet) { - throw std::runtime_error( - "Information about the label vectors that have been encountered in the training data is required for " - "predicting binary labels, but no such information is provided by the model. Most probably, the model " - "was intended to use a different prediction method when it has been trained."); - } - - std::unique_ptr binaryTransformationPtr; - - if (labelVectorSet->getNumLabelVectors() > 0) { - std::unique_ptr jointProbabilityFunctionPtr = - jointProbabilityFunctionFactory.create(marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel); - binaryTransformationPtr = - std::make_unique(*labelVectorSet, std::move(jointProbabilityFunctionPtr)); - } - - return binaryTransformationPtr; - } - - template - static inline std::unique_ptr createPredictor( - const FeatureMatrix& featureMatrix, const Model& model, uint32 numLabels, uint32 numThreads, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - const IJointProbabilityFunctionFactory& jointProbabilityFunctionFactory) { - std::unique_ptr binaryTransformationPtr = - createBinaryTransformation(labelVectorSet, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, jointProbabilityFunctionFactory); - return std::make_unique>(featureMatrix, model, numLabels, numThreads, - std::move(binaryTransformationPtr)); - } - - /** - * Allows to create instances of the type `IBinaryPredictor` that allow to predict whether individual labels of - * given query examples are relevant or irrelevant by discretizing the regression scores or probability estimates - * that are predicted for each label according to the general F-measure maximizer (GFM). - */ - class GfmBinaryPredictorFactory final : public IBinaryPredictorFactory { - private: - - const std::unique_ptr jointProbabilityFunctionFactoryPtr_; - - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel_; - - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel_; - - const uint32 numThreads_; - - public: - - /** - * @param jointProbabilityFunctionFactoryPtr An unique pointer to an object of type - * `IJointProbabilityFunctionFactory` that allows to create - * implementations of the transformation function to be used to - * transform regression scores that are predicted for an - * example into a joint probability - * @param marginalProbabilityCalibrationModel A pointer to an object of type - * `IMarginalProbabilityCalibrationModel` to be used for the - * calibration of marginal probabilities or a null pointer, if - * no such model is available - * @param jointProbabilityCalibrationModel A pointer to an object of type - * `IJointProbabilityCalibrationModel` to be used for the - * calibration of joint probabilities or a null pointer, if no - * such model is available - * @param numThreads The number of CPU threads to be used to make predictions for - * different query examples in parallel. Must be at least 1 - */ - GfmBinaryPredictorFactory( - std::unique_ptr jointProbabilityFunctionFactoryPtr, - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel, uint32 numThreads) - : jointProbabilityFunctionFactoryPtr_(std::move(jointProbabilityFunctionFactoryPtr)), - marginalProbabilityCalibrationModel_(marginalProbabilityCalibrationModel), - jointProbabilityCalibrationModel_(jointProbabilityCalibrationModel), numThreads_(numThreads) {} - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createPredictor(featureMatrix, model, numLabels, numThreads_, labelVectorSet, - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel, - *jointProbabilityFunctionFactoryPtr_); - } - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createPredictor(featureMatrix, model, numLabels, numThreads_, labelVectorSet, - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel, - *jointProbabilityFunctionFactoryPtr_); - } - }; - - template - static inline std::unique_ptr createSparsePredictor( - const FeatureMatrix& featureMatrix, const Model& model, uint32 numLabels, uint32 numThreads, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - const IJointProbabilityFunctionFactory& jointProbabilityFunctionFactory) { - std::unique_ptr binaryTransformationPtr = - createBinaryTransformation(labelVectorSet, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, jointProbabilityFunctionFactory); - return std::make_unique>( - featureMatrix, model, numLabels, numThreads, std::move(binaryTransformationPtr)); - } - - /** - * Allows to create instances of the type `ISparseBinaryPredictor` that allow to predict whether individual labels - * of given query examples are relevant or irrelevant by discretizing the regression scores or probability estimates - * that are predicted for each label according to the general F-measure maximizer (GFM). - */ - class GfmSparseBinaryPredictorFactory final : public ISparseBinaryPredictorFactory { - private: - - const std::unique_ptr jointProbabilityFunctionFactoryPtr_; - - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel_; - - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel_; - - const uint32 numThreads_; - - public: - - /** - * @param jointProbabilityFunctionFactoryPtr An unique pointer to an object of type - * `IJointProbabilityFunctionFactory` that allows to create - * implementations of the function to be used to transform - * regression scores that are predicted for an example into - * a joint probability - * @param marginalProbabilityCalibrationModel A pointer to an object of type - * `IMarginalProbabilityCalibrationModel` to be used for the - * calibration of marginal probabilities or a null pointer, if - * no such model is available - * @param jointProbabilityCalibrationModel A pointer to an object of type - * `IJointProbabilityCalibrationModel` to be used for the - * calibration of joint probabilities or a null pointer, if no - * such model is available - * @param numThreads The number of CPU threads to be used to make predictions for - * different query examples in parallel. Must be at least 1 - */ - GfmSparseBinaryPredictorFactory( - std::unique_ptr jointProbabilityFunctionFactoryPtr, - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel, uint32 numThreads) - : jointProbabilityFunctionFactoryPtr_(std::move(jointProbabilityFunctionFactoryPtr)), - marginalProbabilityCalibrationModel_(marginalProbabilityCalibrationModel), - jointProbabilityCalibrationModel_(jointProbabilityCalibrationModel), numThreads_(numThreads) {} - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createSparsePredictor(featureMatrix, model, numLabels, numThreads_, labelVectorSet, - marginalProbabilityCalibrationModel_ - ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel, - *jointProbabilityFunctionFactoryPtr_); - } - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createSparsePredictor(featureMatrix, model, numLabels, numThreads_, labelVectorSet, - marginalProbabilityCalibrationModel_ - ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel, - *jointProbabilityFunctionFactoryPtr_); - } - }; - - GfmBinaryPredictorConfig::GfmBinaryPredictorConfig( - const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : lossConfigPtr_(std::move(lossConfigPtr)), multiThreadingConfigPtr_(std::move(multiThreadingConfigPtr)) {} - - bool GfmBinaryPredictorConfig::isProbabilityCalibrationModelUsed() const { - return noMarginalProbabilityCalibrationModelPtr_ == nullptr; - } - - IGfmBinaryPredictorConfig& GfmBinaryPredictorConfig::setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) { - noMarginalProbabilityCalibrationModelPtr_ = - useProbabilityCalibrationModel ? nullptr : createNoProbabilityCalibrationModel(); - noJointProbabilityCalibrationModelPtr_ = - useProbabilityCalibrationModel ? nullptr : createNoProbabilityCalibrationModel(); - return *this; - } - - std::unique_ptr GfmBinaryPredictorConfig::createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - std::unique_ptr jointProbabilityFunctionFactoryPtr = - lossConfigPtr_->createJointProbabilityFunctionFactory(); - - if (jointProbabilityFunctionFactoryPtr) { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, numLabels); - return std::make_unique( - std::move(jointProbabilityFunctionFactoryPtr), noMarginalProbabilityCalibrationModelPtr_.get(), - noJointProbabilityCalibrationModelPtr_.get(), numThreads); - } else { - return nullptr; - } - } - - std::unique_ptr GfmBinaryPredictorConfig::createSparsePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - std::unique_ptr jointProbabilityFunctionFactoryPtr = - lossConfigPtr_->createJointProbabilityFunctionFactory(); - - if (jointProbabilityFunctionFactoryPtr) { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, numLabels); - return std::make_unique( - std::move(jointProbabilityFunctionFactoryPtr), noMarginalProbabilityCalibrationModelPtr_.get(), - noJointProbabilityCalibrationModelPtr_.get(), numThreads); - } else { - return nullptr; - } - } - - bool GfmBinaryPredictorConfig::isLabelVectorSetNeeded() const { - return true; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_label_wise.cpp b/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_label_wise.cpp deleted file mode 100644 index 29031ddd..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/predictor_binary_label_wise.cpp +++ /dev/null @@ -1,231 +0,0 @@ -#include "boosting/prediction/predictor_binary_label_wise.hpp" - -#include "boosting/prediction/discretization_function_probability.hpp" -#include "boosting/prediction/discretization_function_score.hpp" -#include "boosting/prediction/predictor_binary_common.hpp" -#include "boosting/prediction/transformation_binary_label_wise.hpp" -#include "common/prediction/probability_calibration_no.hpp" - -namespace boosting { - - /** - * Allows to create instances of the type `IBinaryPredictor` that allow to predict whether individual labels of - * given query examples are relevant or irrelevant by discretizing the regression scores or probability estimates - * that are predicted for each label individually. - */ - class LabelWiseBinaryPredictorFactory final : public IBinaryPredictorFactory { - private: - - const std::unique_ptr discretizationFunctionFactoryPtr_; - - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel_; - - const uint32 numThreads_; - - public: - - /** - * @param discretizationFunctionFactoryPtr An unique pointer to an object of type - * `IDiscretizationFunctionFactory` that allows to create the - * implementation to be used for discretization - * @param marginalProbabilityCalibrationModel A pointer to an object of type - * `IMarginalProbabilityCalibrationModel` to be used for the - * calibration of marginal probabilities or a null pointer, if - * no such model is available - * @param numThreads The number of CPU threads to be used to make predictions for - * different query examples in parallel. Must be at least 1 - */ - LabelWiseBinaryPredictorFactory( - std::unique_ptr discretizationFunctionFactoryPtr, - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel, uint32 numThreads) - : discretizationFunctionFactoryPtr_(std::move(discretizationFunctionFactoryPtr)), - marginalProbabilityCalibrationModel_(marginalProbabilityCalibrationModel), numThreads_(numThreads) {} - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - std::unique_ptr discretizationFunctionPtr = - discretizationFunctionFactoryPtr_->create(marginalProbabilityCalibrationModel_ - ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel); - std::unique_ptr binaryTransformationPtr = - std::make_unique(std::move(discretizationFunctionPtr)); - return std::make_unique, RuleList>>( - featureMatrix, model, numLabels, numThreads_, std::move(binaryTransformationPtr)); - } - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - std::unique_ptr discretizationFunctionPtr = - discretizationFunctionFactoryPtr_->create(marginalProbabilityCalibrationModel_ - ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel); - std::unique_ptr binaryTransformationPtr = - std::make_unique(std::move(discretizationFunctionPtr)); - return std::make_unique, RuleList>>( - featureMatrix, model, numLabels, numThreads_, std::move(binaryTransformationPtr)); - } - }; - - /** - * Allows to create instances of the type `ISparseBinaryPredictor` that allow to predict whether individual labels - * of given query examples are relevant or irrelevant by discretizing the regression scores or probability estimates - * that are predicted for each label individually. - */ - class LabelWiseSparseBinaryPredictorFactory final : public ISparseBinaryPredictorFactory { - private: - - const std::unique_ptr discretizationFunctionFactoryPtr_; - - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel_; - - const uint32 numThreads_; - - public: - - /** - * @param discretizationFunctionFactoryPtr An unique pointer to an object of type - * `IDiscretizationFunctionFactory` that allows to create the - * implementation to be used for discretization - * @param marginalProbabilityCalibrationModel A pointer to an object of type - * `IMarginalProbabilityCalibrationModel` to be used for the - * calibration of marginal probabilities or a null pointer, if - * no such model is available - * @param numThreads The number of CPU threads to be used to make predictions for - * different query examples in parallel. Must be at least 1 - */ - LabelWiseSparseBinaryPredictorFactory( - std::unique_ptr discretizationFunctionFactoryPtr, - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel, uint32 numThreads) - : discretizationFunctionFactoryPtr_(std::move(discretizationFunctionFactoryPtr)), - marginalProbabilityCalibrationModel_(marginalProbabilityCalibrationModel), numThreads_(numThreads) {} - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - std::unique_ptr discretizationFunctionPtr = - discretizationFunctionFactoryPtr_->create(marginalProbabilityCalibrationModel_ - ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel); - std::unique_ptr binaryTransformationPtr = - std::make_unique(std::move(discretizationFunctionPtr)); - return std::make_unique, RuleList>>( - featureMatrix, model, numLabels, numThreads_, std::move(binaryTransformationPtr)); - } - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - std::unique_ptr discretizationFunctionPtr = - discretizationFunctionFactoryPtr_->create(marginalProbabilityCalibrationModel_ - ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel); - std::unique_ptr binaryTransformationPtr = - std::make_unique(std::move(discretizationFunctionPtr)); - return std::make_unique, RuleList>>( - featureMatrix, model, numLabels, numThreads_, std::move(binaryTransformationPtr)); - } - }; - - static inline std::unique_ptr createDiscretizationFunctionFactory( - bool basedOnProbabilities, const ILossConfig& lossConfig) { - if (basedOnProbabilities) { - std::unique_ptr marginalProbabilityFunctionFactory = - lossConfig.createMarginalProbabilityFunctionFactory(); - - if (marginalProbabilityFunctionFactory) { - return std::make_unique( - std::move(marginalProbabilityFunctionFactory)); - } else { - return nullptr; - } - } else { - float64 threshold = lossConfig.getDefaultPrediction(); - return std::make_unique(threshold); - } - } - - LabelWiseBinaryPredictorConfig::LabelWiseBinaryPredictorConfig( - const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : basedOnProbabilities_(false), lossConfigPtr_(lossConfigPtr), - multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - bool LabelWiseBinaryPredictorConfig::isBasedOnProbabilities() const { - return basedOnProbabilities_; - } - - ILabelWiseBinaryPredictorConfig& LabelWiseBinaryPredictorConfig::setBasedOnProbabilities( - bool basedOnProbabilities) { - basedOnProbabilities_ = basedOnProbabilities; - return *this; - } - - bool LabelWiseBinaryPredictorConfig::isProbabilityCalibrationModelUsed() const { - return noMarginalProbabilityCalibrationModelPtr_ == nullptr; - } - - ILabelWiseBinaryPredictorConfig& LabelWiseBinaryPredictorConfig::setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) { - noMarginalProbabilityCalibrationModelPtr_ = - useProbabilityCalibrationModel ? nullptr : createNoProbabilityCalibrationModel(); - return *this; - } - - std::unique_ptr LabelWiseBinaryPredictorConfig::createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - std::unique_ptr discretizationFunctionFactoryPtr = - createDiscretizationFunctionFactory(basedOnProbabilities_, *lossConfigPtr_); - - if (discretizationFunctionFactoryPtr) { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, numLabels); - return std::make_unique( - std::move(discretizationFunctionFactoryPtr), noMarginalProbabilityCalibrationModelPtr_.get(), numThreads); - } - - return nullptr; - } - - std::unique_ptr LabelWiseBinaryPredictorConfig::createSparsePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - std::unique_ptr discretizationFunctionFactoryPtr = - createDiscretizationFunctionFactory(basedOnProbabilities_, *lossConfigPtr_); - - if (discretizationFunctionFactoryPtr) { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, numLabels); - return std::make_unique( - std::move(discretizationFunctionFactoryPtr), noMarginalProbabilityCalibrationModelPtr_.get(), numThreads); - } - - return nullptr; - } - - bool LabelWiseBinaryPredictorConfig::isLabelVectorSetNeeded() const { - return false; - } -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_auto.cpp b/cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_auto.cpp deleted file mode 100644 index a766eee3..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_auto.cpp +++ /dev/null @@ -1,34 +0,0 @@ -#include "boosting/prediction/predictor_probability_auto.hpp" - -#include "boosting/prediction/predictor_probability_label_wise.hpp" -#include "boosting/prediction/predictor_probability_marginalized.hpp" - -namespace boosting { - - AutomaticProbabilityPredictorConfig::AutomaticProbabilityPredictorConfig( - const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : lossConfigPtr_(lossConfigPtr), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - std::unique_ptr AutomaticProbabilityPredictorConfig::createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - if (lossConfigPtr_->isDecomposable()) { - return LabelWiseProbabilityPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_) - .createPredictorFactory(featureMatrix, numLabels); - } else { - return MarginalizedProbabilityPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_) - .createPredictorFactory(featureMatrix, numLabels); - } - } - - bool AutomaticProbabilityPredictorConfig::isLabelVectorSetNeeded() const { - if (lossConfigPtr_->isDecomposable()) { - return LabelWiseProbabilityPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_) - .isLabelVectorSetNeeded(); - } else { - return MarginalizedProbabilityPredictorConfig(lossConfigPtr_, multiThreadingConfigPtr_) - .isLabelVectorSetNeeded(); - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_label_wise.cpp b/cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_label_wise.cpp deleted file mode 100644 index d92ee94c..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_label_wise.cpp +++ /dev/null @@ -1,114 +0,0 @@ -#include "boosting/prediction/predictor_probability_label_wise.hpp" - -#include "boosting/prediction/predictor_probability_common.hpp" -#include "boosting/prediction/transformation_probability_label_wise.hpp" -#include "common/prediction/probability_calibration_no.hpp" - -namespace boosting { - - /** - * Allows to create instances of the type `IProbabilityPredictor` that allow to predict label-wise probabilities for - * given query examples by transforming the regression scores that are predicted for each label individually into - * probabilities. - */ - class LabelWiseProbabilityPredictorFactory final : public IProbabilityPredictorFactory { - private: - - const std::unique_ptr marginalProbabilityFunctionFactoryPtr_; - - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel_; - - const uint32 numThreads_; - - public: - - /** - * @param marginalProbabilityFunctionFactoryPtr An unique pointer to an object of type - * `IMarginalProbabilityFunctionFactory` that allows to create - * implementations of the transformation function to be used to - * transform regression scores that are predicted for - * individual labels into probabilities - * @param marginalProbabilityCalibrationModel A pointer to an object of type - * `IMarginalProbabilityCalibrationModel` to be used for the - * calibration of marginal probabilities or a null pointer, if - * no such model is available - * @param numThreads The number of CPU threads to be used to make predictions for - * different query examples in parallel. Must be at least 1 - */ - LabelWiseProbabilityPredictorFactory( - std::unique_ptr marginalProbabilityFunctionFactoryPtr, - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel, uint32 numThreads) - : marginalProbabilityFunctionFactoryPtr_(std::move(marginalProbabilityFunctionFactoryPtr)), - marginalProbabilityCalibrationModel_(marginalProbabilityCalibrationModel), numThreads_(numThreads) {} - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - std::unique_ptr probabilityTransformationPtr = - std::make_unique(marginalProbabilityFunctionFactoryPtr_->create( - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel)); - return std::make_unique, RuleList>>( - featureMatrix, model, numLabels, numThreads_, std::move(probabilityTransformationPtr)); - } - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - std::unique_ptr probabilityTransformationPtr = - std::make_unique(marginalProbabilityFunctionFactoryPtr_->create( - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel)); - return std::make_unique, RuleList>>( - featureMatrix, model, numLabels, numThreads_, std::move(probabilityTransformationPtr)); - } - }; - - LabelWiseProbabilityPredictorConfig::LabelWiseProbabilityPredictorConfig( - const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : lossConfigPtr_(lossConfigPtr), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - bool LabelWiseProbabilityPredictorConfig::isProbabilityCalibrationModelUsed() const { - return noMarginalProbabilityCalibrationModelPtr_ == nullptr; - } - - ILabelWiseProbabilityPredictorConfig& LabelWiseProbabilityPredictorConfig::setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) { - noMarginalProbabilityCalibrationModelPtr_ = - useProbabilityCalibrationModel ? nullptr : createNoProbabilityCalibrationModel(); - return *this; - } - - std::unique_ptr LabelWiseProbabilityPredictorConfig::createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - std::unique_ptr marginalProbabilityFunctionFactoryPtr = - lossConfigPtr_->createMarginalProbabilityFunctionFactory(); - - if (marginalProbabilityFunctionFactoryPtr) { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, numLabels); - return std::make_unique( - std::move(marginalProbabilityFunctionFactoryPtr), noMarginalProbabilityCalibrationModelPtr_.get(), - numThreads); - } else { - return nullptr; - } - } - - bool LabelWiseProbabilityPredictorConfig::isLabelVectorSetNeeded() const { - return false; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_marginalized.cpp b/cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_marginalized.cpp deleted file mode 100644 index 1b2a3530..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/predictor_probability_marginalized.cpp +++ /dev/null @@ -1,151 +0,0 @@ -#include "boosting/prediction/predictor_probability_marginalized.hpp" - -#include "boosting/prediction/predictor_probability_common.hpp" -#include "boosting/prediction/transformation_probability_marginalized.hpp" -#include "common/prediction/probability_calibration_no.hpp" - -#include - -namespace boosting { - - template - static inline std::unique_ptr createPredictor( - const FeatureMatrix& featureMatrix, const Model& model, uint32 numLabels, uint32 numThreads, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - const IJointProbabilityFunctionFactory& jointProbabilityFunctionFactory) { - if (!labelVectorSet) { - throw std::runtime_error( - "Information about the label vectors that have been encountered in the training data is required for " - "predicting binary labels, but no such information is provided by the model. Most probably, the model " - "was intended to use a different prediction method when it has been trained."); - } - - std::unique_ptr probabilityTransformationPtr; - - if (labelVectorSet->getNumLabelVectors() > 0) { - probabilityTransformationPtr = std::make_unique( - *labelVectorSet, jointProbabilityFunctionFactory.create(marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel)); - } - - return std::make_unique>(featureMatrix, model, numLabels, numThreads, - std::move(probabilityTransformationPtr)); - } - - /** - * Allows to create instances of the type `IProbabilityPredictor` that allow to predict label-wise probabilities for - * given query examples by marginalizing over the joint probabilities of known label vectors. - */ - class MarginalizedProbabilityPredictorFactory final : public IProbabilityPredictorFactory { - private: - - const std::unique_ptr jointProbabilityFunctionFactoryPtr_; - - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel_; - - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel_; - - const uint32 numThreads_; - - public: - - /** - * @param jointProbabilityFunctionFactoryPtr An unique pointer to an object of type - * `IJointProbabilityFunctionFactory` that allows to create - * implementations of the function to be used to transform - * regression scores that are predicted for an example into - * joint probabilities - * @param marginalProbabilityCalibrationModel A pointer to an object of type - * `IMarginalProbabilityCalibrationModel` to be used for the - * calibration of marginal probabilities or a null pointer, if - * no such model is available - * @param jointProbabilityCalibrationModel A pointer to an object of type - * `IJointProbabilityCalibrationModel` to be used for the - * calibration of joint probabilities or a null pointer, if no - * such model is available - * @param numThreads The number of CPU threads to be used to make predictions for - * different query examples in parallel. Must be at least 1 - */ - MarginalizedProbabilityPredictorFactory( - std::unique_ptr jointProbabilityFunctionFactoryPtr, - const IMarginalProbabilityCalibrationModel* marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel* jointProbabilityCalibrationModel, uint32 numThreads) - : jointProbabilityFunctionFactoryPtr_(std::move(jointProbabilityFunctionFactoryPtr)), - marginalProbabilityCalibrationModel_(marginalProbabilityCalibrationModel), - jointProbabilityCalibrationModel_(jointProbabilityCalibrationModel), numThreads_(numThreads) {} - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createPredictor(featureMatrix, model, numLabels, numThreads_, labelVectorSet, - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel, - *jointProbabilityFunctionFactoryPtr_); - } - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, - uint32 numLabels) const override { - return createPredictor(featureMatrix, model, numLabels, numThreads_, labelVectorSet, - marginalProbabilityCalibrationModel_ ? *marginalProbabilityCalibrationModel_ - : marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel_ ? *jointProbabilityCalibrationModel_ - : jointProbabilityCalibrationModel, - *jointProbabilityFunctionFactoryPtr_); - } - }; - - MarginalizedProbabilityPredictorConfig::MarginalizedProbabilityPredictorConfig( - const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : lossConfigPtr_(std::move(lossConfigPtr)), multiThreadingConfigPtr_(std::move(multiThreadingConfigPtr)) {} - - bool MarginalizedProbabilityPredictorConfig::isProbabilityCalibrationModelUsed() const { - return noMarginalProbabilityCalibrationModelPtr_ == nullptr; - } - - IMarginalizedProbabilityPredictorConfig& MarginalizedProbabilityPredictorConfig::setUseProbabilityCalibrationModel( - bool useProbabilityCalibrationModel) { - noMarginalProbabilityCalibrationModelPtr_ = - useProbabilityCalibrationModel ? nullptr : createNoProbabilityCalibrationModel(); - noJointProbabilityCalibrationModelPtr_ = - useProbabilityCalibrationModel ? nullptr : createNoProbabilityCalibrationModel(); - return *this; - } - - std::unique_ptr MarginalizedProbabilityPredictorConfig::createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - std::unique_ptr jointProbabilityFunctionFactoryPtr = - lossConfigPtr_->createJointProbabilityFunctionFactory(); - - if (jointProbabilityFunctionFactoryPtr) { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, numLabels); - return std::make_unique( - std::move(jointProbabilityFunctionFactoryPtr), noMarginalProbabilityCalibrationModelPtr_.get(), - noJointProbabilityCalibrationModelPtr_.get(), numThreads); - } else { - return nullptr; - } - } - - bool MarginalizedProbabilityPredictorConfig::isLabelVectorSetNeeded() const { - return true; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/predictor_score_label_wise.cpp b/cpp/subprojects/boosting/src/boosting/prediction/predictor_score_label_wise.cpp deleted file mode 100644 index 413ac781..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/predictor_score_label_wise.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "boosting/prediction/predictor_score_label_wise.hpp" - -#include "boosting/prediction/predictor_score_common.hpp" - -namespace boosting { - - /** - * Allows to create instances of the type `IScorePredictor` that predict label-wise regression scores for given - * query examples by summing up the scores that are provided by individual rules for each label individually. - */ - class LabelWiseScorePredictorFactory final : public IScorePredictorFactory { - private: - - const uint32 numThreads_; - - public: - - /** - * @param numThreads The number of CPU threads to be used to make predictions for different query examples - * in parallel. Must be at least 1 - */ - LabelWiseScorePredictorFactory(uint32 numThreads) : numThreads_(numThreads) {} - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create(const CContiguousConstView& featureMatrix, - const RuleList& model, const LabelVectorSet* labelVectorSet, - uint32 numLabels) const override { - return std::make_unique, RuleList>>( - featureMatrix, model, numLabels, numThreads_); - } - - /** - * @see `IPredictorFactory::create` - */ - std::unique_ptr create(const CsrConstView& featureMatrix, - const RuleList& model, const LabelVectorSet* labelVectorSet, - uint32 numLabels) const override { - return std::make_unique, RuleList>>(featureMatrix, model, - numLabels, numThreads_); - } - }; - - LabelWiseScorePredictorConfig::LabelWiseScorePredictorConfig( - const std::unique_ptr& multiThreadingConfigPtr) - : multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - std::unique_ptr LabelWiseScorePredictorConfig::createPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, numLabels); - return std::make_unique(numThreads); - } - - bool LabelWiseScorePredictorConfig::isLabelVectorSetNeeded() const { - return false; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/probability_calibration_isotonic.cpp b/cpp/subprojects/boosting/src/boosting/prediction/probability_calibration_isotonic.cpp deleted file mode 100644 index 1456f77e..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/probability_calibration_isotonic.cpp +++ /dev/null @@ -1,691 +0,0 @@ -#include "boosting/prediction/probability_calibration_isotonic.hpp" - -#include "boosting/statistics/statistics.hpp" -#include "common/data/arrays.hpp" -#include "common/iterator/binary_forward_iterator.hpp" -#include "common/iterator/non_zero_index_forward_iterator.hpp" -#include "common/prediction/probability_calibration_no.hpp" - -#include -#include - -namespace boosting { - - template - static inline void extractThresholdsAndProbabilities( - IndexIterator indexIterator, uint32 numExamples, uint32 numLabels, - IsotonicProbabilityCalibrationModel& calibrationModel, const CContiguousLabelMatrix& labelMatrix, - const CContiguousConstView& scoreMatrix, - const IMarginalProbabilityFunction& marginalProbabilityFunction) { - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - CContiguousLabelMatrix::value_const_iterator labelIterator = labelMatrix.values_cbegin(exampleIndex); - CContiguousConstView::value_const_iterator scoreIterator = scoreMatrix.values_cbegin(exampleIndex); - - for (uint32 j = 0; j < numLabels; j++) { - float64 trueProbability = labelIterator[j] ? 1 : 0; - float64 score = scoreIterator[j]; - float64 marginalProbability = - marginalProbabilityFunction.transformScoreIntoMarginalProbability(j, score); - calibrationModel.addBin(j, marginalProbability, trueProbability); - } - } - } - - template - static inline void extractThresholdsAndProbabilities( - IndexIterator indexIterator, uint32 numExamples, uint32 numLabels, - IsotonicProbabilityCalibrationModel& calibrationModel, const CsrLabelMatrix& labelMatrix, - const CContiguousConstView& scoreMatrix, - const IMarginalProbabilityFunction& marginalProbabilityFunction) { - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - auto labelIterator = make_binary_forward_iterator(labelMatrix.indices_cbegin(exampleIndex), - labelMatrix.indices_cend(exampleIndex)); - CContiguousConstView::value_const_iterator scoreIterator = scoreMatrix.values_cbegin(exampleIndex); - - for (uint32 j = 0; j < numLabels; j++) { - float64 trueProbability = (*labelIterator) ? 1 : 0; - float64 score = scoreIterator[j]; - float64 marginalProbability = - marginalProbabilityFunction.transformScoreIntoMarginalProbability(j, score); - calibrationModel.addBin(j, marginalProbability, trueProbability); - labelIterator++; - } - } - } - - template - static inline void extractThresholdsAndProbabilities( - IndexIterator indexIterator, uint32 numExamples, uint32 numLabels, - IsotonicProbabilityCalibrationModel& calibrationModel, const CContiguousLabelMatrix& labelMatrix, - const SparseSetMatrix& scoreMatrix, const IMarginalProbabilityFunction& marginalProbabilityFunction) { - for (uint32 i = 0; i < numLabels; i++) { - calibrationModel.addBin(i, 0, 0); - } - - uint32* numSparsePerLabel = new uint32[numLabels] {}; - - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - CContiguousLabelMatrix::value_const_iterator labelIterator = labelMatrix.values_cbegin(exampleIndex); - SparseSetMatrix::const_row scoreRow = scoreMatrix[exampleIndex]; - - for (uint32 j = 0; j < numLabels; j++) { - float64 trueProbability = labelIterator[j] ? 1 : 0; - const IndexedValue* entry = scoreRow[j]; - - if (entry) { - float64 score = entry->value; - float64 marginalProbability = - marginalProbabilityFunction.transformScoreIntoMarginalProbability(j, score); - calibrationModel.addBin(j, marginalProbability, trueProbability); - } else { - IsotonicProbabilityCalibrationModel::bin_list bins = calibrationModel[j]; - Tuple& firstBin = bins[0]; - uint32 numSparse = numSparsePerLabel[j] + 1; - - if (numSparse > 1) { - firstBin.second = iterativeArithmeticMean(numSparse, trueProbability, firstBin.second); - } else { - firstBin.second = trueProbability; - } - - numSparsePerLabel[j] = numSparse; - } - } - } - - delete[] numSparsePerLabel; - } - - template - static inline void extractThresholdsAndProbabilities( - IndexIterator indexIterator, uint32 numExamples, uint32 numLabels, - IsotonicProbabilityCalibrationModel& calibrationModel, const CsrLabelMatrix& labelMatrix, - const SparseSetMatrix& scoreMatrix, const IMarginalProbabilityFunction& marginalProbabilityFunction) { - for (uint32 i = 0; i < numLabels; i++) { - calibrationModel.addBin(i, 0, 0); - } - - uint32* numSparsePerLabel = new uint32[numLabels]; - setArrayToValue(numSparsePerLabel, numLabels, numExamples); - uint32* numSparseRelevantPerLabel = new uint32[numLabels] {}; - - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - CsrLabelMatrix::index_const_iterator labelIndicesBegin = labelMatrix.indices_cbegin(exampleIndex); - CsrLabelMatrix::index_const_iterator labelIndicesEnd = labelMatrix.indices_cend(exampleIndex); - uint32 numRelevantLabels = labelIndicesEnd - labelIndicesBegin; - - for (uint32 j = 0; j < numRelevantLabels; j++) { - uint32 labelIndex = labelIndicesBegin[j]; - numSparseRelevantPerLabel[labelIndex] += 1; - } - - for (auto it = scoreMatrix.cbegin(exampleIndex); it != scoreMatrix.cend(exampleIndex); it++) { - const IndexedValue& entry = *it; - uint32 labelIndex = entry.index; - float64 score = entry.value; - float64 marginalProbability = - marginalProbabilityFunction.transformScoreIntoMarginalProbability(labelIndex, score); - bool trueLabel = std::binary_search(labelIndicesBegin, labelIndicesEnd, labelIndex); - calibrationModel.addBin(labelIndex, marginalProbability, trueLabel ? 1 : 0); - numSparsePerLabel[labelIndex] -= 1; - - if (trueLabel) { - numSparseRelevantPerLabel[labelIndex] -= 1; - } - } - } - - for (uint32 i = 0; i < numLabels; i++) { - IsotonicProbabilityCalibrationModel::bin_list bins = calibrationModel[i]; - Tuple& firstBin = bins[0]; - firstBin.second = (float64) numSparseRelevantPerLabel[i] / (float64) numSparsePerLabel[i]; - } - - delete[] numSparsePerLabel; - delete[] numSparseRelevantPerLabel; - } - - template - static inline std::unique_ptr fitMarginalProbabilityCalibrationModel( - IndexIterator indexIterator, uint32 numExamples, const LabelMatrix& labelMatrix, const IStatistics& statistics, - const IMarginalProbabilityFunction& marginalProbabilityFunction) { - // Extract thresholds and ground truth probabilities from score matrix and label matrix, respectively... - uint32 numLabels = labelMatrix.getNumCols(); - std::unique_ptr calibrationModelPtr = - std::make_unique(numLabels); - const IBoostingStatistics& boostingStatistics = dynamic_cast(statistics); - auto denseVisitor = - [=, &marginalProbabilityFunction, &calibrationModelPtr](const CContiguousConstView& scoreMatrix) { - extractThresholdsAndProbabilities(indexIterator, numExamples, numLabels, *calibrationModelPtr, labelMatrix, - scoreMatrix, marginalProbabilityFunction); - }; - auto sparseVisitor = - [=, &marginalProbabilityFunction, &calibrationModelPtr](const SparseSetMatrix& scoreMatrix) { - extractThresholdsAndProbabilities(indexIterator, numExamples, numLabels, *calibrationModelPtr, labelMatrix, - scoreMatrix, marginalProbabilityFunction); - }; - boostingStatistics.visitScoreMatrix(denseVisitor, sparseVisitor); - - // Build and return the isotonic calibration model... - calibrationModelPtr->fit(); - return calibrationModelPtr; - } - - template - static inline std::unique_ptr fitMarginalProbabilityCalibrationModel( - const SinglePartition& partition, const LabelMatrix& labelMatrix, const IStatistics& statistics, - const IMarginalProbabilityFunction& marginalProbabilityFunction) { - return fitMarginalProbabilityCalibrationModel(partition.cbegin(), partition.getNumElements(), labelMatrix, - statistics, marginalProbabilityFunction); - } - - template - static inline std::unique_ptr fitMarginalProbabilityCalibrationModel( - const BiPartition& partition, uint32 useHoldoutSet, const LabelMatrix& labelMatrix, const IStatistics& statistics, - const IMarginalProbabilityFunction& marginalProbabilityFunction) { - BiPartition::const_iterator indexIterator; - uint32 numExamples; - - if (useHoldoutSet) { - indexIterator = partition.second_cbegin(); - numExamples = partition.getNumSecond(); - } else { - indexIterator = partition.first_cbegin(); - numExamples = partition.getNumFirst(); - } - - return fitMarginalProbabilityCalibrationModel(indexIterator, numExamples, labelMatrix, statistics, - marginalProbabilityFunction); - } - - /** - * An implementation of the type `IMarginalProbabilityCalibrator` that does fit a model for the calibration of - * marginal probabilities via isotonic regression. - */ - class IsotonicMarginalProbabilityCalibrator final : public IMarginalProbabilityCalibrator { - private: - - const std::unique_ptr marginalProbabilityCalibrationModelPtr_; - - const std::unique_ptr marginalProbabilityFunctionPtr_; - - const bool useHoldoutSet_; - - public: - - /** - * @param marginalProbabilityFunctionFactory A reference to an object of type - * `IMarginalProbabilityFunctionFactory` that allows to create - * implementations of the transformation function to be used to - * transform regression scores that are predicted for - * individual labels into marginal probabilities - * @param useHoldoutSet True, if the calibration model should be fit to the examples - * in the holdout set, if available, false otherwise - */ - IsotonicMarginalProbabilityCalibrator( - const IMarginalProbabilityFunctionFactory& marginalProbabilityFunctionFactory, bool useHoldoutSet) - : marginalProbabilityCalibrationModelPtr_(createNoProbabilityCalibrationModel()), - marginalProbabilityFunctionPtr_( - marginalProbabilityFunctionFactory.create(*marginalProbabilityCalibrationModelPtr_)), - useHoldoutSet_(useHoldoutSet) {} - - /** - * @see `IMarginalProbabilityCalibrator::fitProbabilityCalibrationModel` - */ - std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CContiguousLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return fitMarginalProbabilityCalibrationModel(partition, labelMatrix, statistics, - *marginalProbabilityFunctionPtr_); - } - - /** - * @see `IMarginalProbabilityCalibrator::fitProbabilityCalibrationModel` - */ - std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CsrLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return fitMarginalProbabilityCalibrationModel(partition, labelMatrix, statistics, - *marginalProbabilityFunctionPtr_); - } - - /** - * @see `IMarginalProbabilityCalibrator::fitProbabilityCalibrationModel` - */ - std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CContiguousLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return fitMarginalProbabilityCalibrationModel(partition, useHoldoutSet_, labelMatrix, statistics, - *marginalProbabilityFunctionPtr_); - } - - /** - * @see `IMarginalProbabilityCalibrator::fitProbabilityCalibrationModel` - */ - std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CsrLabelMatrix& labelMatrix, const IStatistics& statistics) const override { - return fitMarginalProbabilityCalibrationModel(partition, useHoldoutSet_, labelMatrix, statistics, - *marginalProbabilityFunctionPtr_); - } - }; - - /** - * A factory that allows to create instances of the type `IsotonicMarginalProbabilityCalibrator`. - */ - class IsotonicMarginalProbabilityCalibratorFactory final : public IMarginalProbabilityCalibratorFactory { - private: - - const std::unique_ptr marginalProbabilityFunctionFactoryPtr_; - - const bool useHoldoutSet_; - - public: - - /** - * @param marginalProbabilityFunctionFactoryPtr An unique pointer to an object of type - * `IMarginalProbabilityFunctionFactory` that allows to create - * implementations of the transformation function to be used to - * transform regression scores that are predicted for - * individual labels into marginal probabilities - * @param useHoldoutSet True, if the calibration model should be fit to the examples - * in the holdout set, if available, false otherwise - */ - IsotonicMarginalProbabilityCalibratorFactory( - std::unique_ptr marginalProbabilityFunctionFactoryPtr, - bool useHoldoutSet) - : marginalProbabilityFunctionFactoryPtr_(std::move(marginalProbabilityFunctionFactoryPtr)), - useHoldoutSet_(useHoldoutSet) {} - - /** - * @see `IMarginalProbabilityCalibratorFactory::create` - */ - std::unique_ptr create() const override { - return std::make_unique(*marginalProbabilityFunctionFactoryPtr_, - useHoldoutSet_); - } - }; - - IsotonicMarginalProbabilityCalibratorConfig::IsotonicMarginalProbabilityCalibratorConfig( - const std::unique_ptr& lossConfigPtr) - : useHoldoutSet_(true), lossConfigPtr_(lossConfigPtr) {} - - bool IsotonicMarginalProbabilityCalibratorConfig::isHoldoutSetUsed() const { - return useHoldoutSet_; - } - - IIsotonicMarginalProbabilityCalibratorConfig& IsotonicMarginalProbabilityCalibratorConfig::setUseHoldoutSet( - bool useHoldoutSet) { - useHoldoutSet_ = useHoldoutSet; - return *this; - } - - bool IsotonicMarginalProbabilityCalibratorConfig::shouldUseHoldoutSet() const { - return useHoldoutSet_; - } - - std::unique_ptr - IsotonicMarginalProbabilityCalibratorConfig::createMarginalProbabilityCalibratorFactory() const { - std::unique_ptr marginalProbabilityFunctionFactoryPtr = - lossConfigPtr_->createMarginalProbabilityFunctionFactory(); - - if (marginalProbabilityFunctionFactoryPtr) { - return std::make_unique( - std::move(marginalProbabilityFunctionFactoryPtr), useHoldoutSet_); - } else { - return std::make_unique(); - } - } - - template - static inline bool areLabelVectorsEqual(LabelIndexIterator labelIndicesBegin, LabelIndexIterator labelIndicesEnd, - const LabelVector& labelVector) { - uint32 numRelevantLabels = labelVector.getNumElements(); - LabelVector::const_iterator labelIndexIterator = labelVector.cbegin(); - - for (uint32 i = 0; i < numRelevantLabels; i++) { - if (labelIndicesBegin == labelIndicesEnd || *labelIndicesBegin != labelIndexIterator[i]) { - return false; - } - - labelIndicesBegin++; - } - - return true; - } - - template - static inline void extractThresholdsAndProbabilities(IndexIterator indexIterator, uint32 numExamples, - IsotonicProbabilityCalibrationModel& calibrationModel, - const CContiguousLabelMatrix& labelMatrix, - const CContiguousConstView& scoreMatrix, - const IJointProbabilityFunction& jointProbabilityFunction, - const LabelVectorSet& labelVectorSet) { - LabelVectorSet::const_iterator labelVectorIterator = labelVectorSet.cbegin(); - uint32 numLabelVectors = labelVectorSet.getNumLabelVectors(); - - for (uint32 i = 0; i < numLabelVectors; i++) { - IsotonicProbabilityCalibrationModel::bin_list bins = calibrationModel[i]; - const LabelVector& labelVector = *labelVectorIterator[i]; - - for (uint32 j = 0; j < numExamples; j++) { - uint32 exampleIndex = indexIterator[j]; - auto labelIndicesBegin = make_non_zero_index_forward_iterator(labelMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cend(exampleIndex)); - auto labelIndicesEnd = make_non_zero_index_forward_iterator(labelMatrix.values_cend(exampleIndex), - labelMatrix.values_cend(exampleIndex)); - float64 trueProbability = areLabelVectorsEqual(labelIndicesBegin, labelIndicesEnd, labelVector) ? 1 : 0; - CContiguousConstView::value_const_iterator scoresBegin = - scoreMatrix.values_cbegin(exampleIndex); - CContiguousConstView::value_const_iterator scoresEnd = scoreMatrix.values_cend(exampleIndex); - float64 jointProbability = - jointProbabilityFunction.transformScoresIntoJointProbability(i, labelVector, scoresBegin, scoresEnd); - bins.emplace_back(jointProbability, trueProbability); - } - } - } - - template - static inline void extractThresholdsAndProbabilities(IndexIterator indexIterator, uint32 numExamples, - IsotonicProbabilityCalibrationModel& calibrationModel, - const CsrLabelMatrix& labelMatrix, - const CContiguousConstView& scoreMatrix, - const IJointProbabilityFunction& jointProbabilityFunction, - const LabelVectorSet& labelVectorSet) { - LabelVectorSet::const_iterator labelVectorIterator = labelVectorSet.cbegin(); - uint32 numLabelVectors = labelVectorSet.getNumLabelVectors(); - - for (uint32 i = 0; i < numLabelVectors; i++) { - IsotonicProbabilityCalibrationModel::bin_list bins = calibrationModel[i]; - const LabelVector& labelVector = *labelVectorIterator[i]; - - for (uint32 j = 0; j < numExamples; j++) { - uint32 exampleIndex = indexIterator[j]; - CsrLabelMatrix::index_const_iterator labelIndicesBegin = labelMatrix.indices_cbegin(exampleIndex); - CsrLabelMatrix::index_const_iterator labelIndicesEnd = labelMatrix.indices_cend(exampleIndex); - float64 trueProbability = areLabelVectorsEqual(labelIndicesBegin, labelIndicesEnd, labelVector) ? 1 : 0; - CContiguousConstView::value_const_iterator scoresBegin = - scoreMatrix.values_cbegin(exampleIndex); - CContiguousConstView::value_const_iterator scoresEnd = scoreMatrix.values_cend(exampleIndex); - float64 jointProbability = - jointProbabilityFunction.transformScoresIntoJointProbability(i, labelVector, scoresBegin, scoresEnd); - bins.emplace_back(jointProbability, trueProbability); - } - } - } - - template - static inline void extractThresholdsAndProbabilities(IndexIterator indexIterator, uint32 numExamples, - IsotonicProbabilityCalibrationModel& calibrationModel, - const CContiguousLabelMatrix& labelMatrix, - const SparseSetMatrix& scoreMatrix, - const IJointProbabilityFunction& jointProbabilityFunction, - const LabelVectorSet& labelVectorSet) { - LabelVectorSet::const_iterator labelVectorIterator = labelVectorSet.cbegin(); - uint32 numLabelVectors = labelVectorSet.getNumLabelVectors(); - uint32 numLabels = labelMatrix.getNumCols(); - - for (uint32 i = 0; i < numLabelVectors; i++) { - IsotonicProbabilityCalibrationModel::bin_list bins = calibrationModel[i]; - const LabelVector& labelVector = *labelVectorIterator[i]; - - for (uint32 j = 0; j < numExamples; j++) { - uint32 exampleIndex = indexIterator[j]; - auto labelIndicesBegin = make_non_zero_index_forward_iterator(labelMatrix.values_cbegin(exampleIndex), - labelMatrix.values_cend(exampleIndex)); - auto labelIndicesEnd = make_non_zero_index_forward_iterator(labelMatrix.values_cend(exampleIndex), - labelMatrix.values_cend(exampleIndex)); - float64 trueProbability = areLabelVectorsEqual(labelIndicesBegin, labelIndicesEnd, labelVector) ? 1 : 0; - SparseSetMatrix::const_row scores = scoreMatrix[exampleIndex]; - float64 jointProbability = - jointProbabilityFunction.transformScoresIntoJointProbability(i, labelVector, scores, numLabels); - bins.emplace_back(jointProbability, trueProbability); - } - } - } - - template - static inline void extractThresholdsAndProbabilities(IndexIterator indexIterator, uint32 numExamples, - IsotonicProbabilityCalibrationModel& calibrationModel, - const CsrLabelMatrix& labelMatrix, - const SparseSetMatrix& scoreMatrix, - const IJointProbabilityFunction& jointProbabilityFunction, - const LabelVectorSet& labelVectorSet) { - LabelVectorSet::const_iterator labelVectorIterator = labelVectorSet.cbegin(); - uint32 numLabelVectors = labelVectorSet.getNumLabelVectors(); - uint32 numLabels = labelMatrix.getNumCols(); - - for (uint32 i = 0; i < numLabelVectors; i++) { - IsotonicProbabilityCalibrationModel::bin_list bins = calibrationModel[i]; - const LabelVector& labelVector = *labelVectorIterator[i]; - - for (uint32 j = 0; j < numExamples; j++) { - uint32 exampleIndex = indexIterator[j]; - CsrLabelMatrix::index_const_iterator labelIndicesBegin = labelMatrix.indices_cbegin(exampleIndex); - CsrLabelMatrix::index_const_iterator labelIndicesEnd = labelMatrix.indices_cend(exampleIndex); - float64 trueProbability = areLabelVectorsEqual(labelIndicesBegin, labelIndicesEnd, labelVector) ? 1 : 0; - SparseSetMatrix::const_row scores = scoreMatrix[exampleIndex]; - float64 jointProbability = - jointProbabilityFunction.transformScoresIntoJointProbability(i, labelVector, scores, numLabels); - bins.emplace_back(jointProbability, trueProbability); - } - } - } - - template - static inline std::unique_ptr fitJointProbabilityCalibrationModel( - IndexIterator indexIterator, uint32 numExamples, const LabelMatrix& labelMatrix, const IStatistics& statistics, - const IJointProbabilityFunction& jointProbabilityFunction, const LabelVectorSet& labelVectorSet) { - // Extract thresholds and ground truth probabilities from score matrix and label matrix, respectively... - uint32 numLabelVectors = labelVectorSet.getNumLabelVectors(); - std::unique_ptr calibrationModelPtr = - std::make_unique(numLabelVectors); - const IBoostingStatistics& boostingStatistics = dynamic_cast(statistics); - auto denseVisitor = [=, &jointProbabilityFunction, &calibrationModelPtr, - &labelVectorSet](const CContiguousConstView& scoreMatrix) { - extractThresholdsAndProbabilities(indexIterator, numExamples, *calibrationModelPtr, labelMatrix, - scoreMatrix, jointProbabilityFunction, labelVectorSet); - }; - auto sparseVisitor = [=, &jointProbabilityFunction, &calibrationModelPtr, - &labelVectorSet](const SparseSetMatrix& scoreMatrix) { - extractThresholdsAndProbabilities(indexIterator, numExamples, *calibrationModelPtr, labelMatrix, - scoreMatrix, jointProbabilityFunction, labelVectorSet); - }; - boostingStatistics.visitScoreMatrix(denseVisitor, sparseVisitor); - - // Build and return the isotonic calibration model... - calibrationModelPtr->fit(); - return calibrationModelPtr; - } - - template - static inline std::unique_ptr fitJointProbabilityCalibrationModel( - const SinglePartition& partition, const LabelMatrix& labelMatrix, const IStatistics& statistics, - const IJointProbabilityFunction& jointProbabilityFunction, const LabelVectorSet& labelVectorSet) { - return fitJointProbabilityCalibrationModel(partition.cbegin(), partition.getNumElements(), labelMatrix, - statistics, jointProbabilityFunction, labelVectorSet); - } - - template - static inline std::unique_ptr fitJointProbabilityCalibrationModel( - const BiPartition& partition, bool useHoldoutSet, const LabelMatrix& labelMatrix, const IStatistics& statistics, - const IJointProbabilityFunction& jointProbabilityFunction, const LabelVectorSet& labelVectorSet) { - BiPartition::const_iterator indexIterator; - uint32 numExamples; - - if (useHoldoutSet) { - indexIterator = partition.second_cbegin(); - numExamples = partition.getNumSecond(); - } else { - indexIterator = partition.first_cbegin(); - numExamples = partition.getNumFirst(); - } - - return fitJointProbabilityCalibrationModel(indexIterator, numExamples, labelMatrix, statistics, - jointProbabilityFunction, labelVectorSet); - } - - /** - * An implementation of the type `IJointProbabilityCalibrator` that does fit a model for the calibration of joint - * probabilities via isotonic regression. - */ - class IsotonicJointProbabilityCalibrator final : public IJointProbabilityCalibrator { - private: - - const std::unique_ptr jointProbabilityCalibrationModelPtr_; - - const std::unique_ptr jointProbabilityFunctionPtr_; - - const bool useHoldoutSet_; - - const LabelVectorSet& labelVectorSet_; - - public: - - /** - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for - * the calibration of marginal probabilities - * @param jointProbabilityFunctionFactory A reference to an object of type - * `IJointProbabilityFunctionFactory` that allows to create - * implementations of the transformation function to be used to - * transform regression scores that are predicted for individual - * labels into marginal probabilities - * @param useHoldoutSet True, if the calibration model should be fit to the examples - * in the holdout set, if available, false otherwise - * @param labelVectorSet A reference to an object of type `LabelVectorSet` that stores - * all known label vectors - */ - IsotonicJointProbabilityCalibrator( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityFunctionFactory& jointProbabilityFunctionFactory, bool useHoldoutSet, - const LabelVectorSet& labelVectorSet) - : jointProbabilityCalibrationModelPtr_(createNoProbabilityCalibrationModel()), - jointProbabilityFunctionPtr_(jointProbabilityFunctionFactory.create( - marginalProbabilityCalibrationModel, *jointProbabilityCalibrationModelPtr_)), - useHoldoutSet_(useHoldoutSet), labelVectorSet_(labelVectorSet) {} - - /** - * @see `IJointProbabilityCalibrator::fitProbabilityCalibrationModel` - */ - std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CContiguousLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return fitJointProbabilityCalibrationModel(partition, labelMatrix, statistics, - *jointProbabilityFunctionPtr_, labelVectorSet_); - } - - /** - * @see `IJointProbabilityCalibrator::fitProbabilityCalibrationModel` - */ - std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CsrLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return fitJointProbabilityCalibrationModel(partition, labelMatrix, statistics, - *jointProbabilityFunctionPtr_, labelVectorSet_); - } - - /** - * @see `IJointProbabilityCalibrator::fitProbabilityCalibrationModel` - */ - std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CContiguousLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return fitJointProbabilityCalibrationModel(partition, useHoldoutSet_, labelMatrix, statistics, - *jointProbabilityFunctionPtr_, labelVectorSet_); - } - - /** - * @see `IJointProbabilityCalibrator::fitProbabilityCalibrationModel` - */ - std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CsrLabelMatrix& labelMatrix, const IStatistics& statistics) const override { - return fitJointProbabilityCalibrationModel(partition, useHoldoutSet_, labelMatrix, statistics, - *jointProbabilityFunctionPtr_, labelVectorSet_); - } - }; - - /** - * A factory that allows to create instances of the type `IsotonicJointProbabilityCalibrator`. - */ - class IsotonicJointProbabilityCalibratorFactory final : public IJointProbabilityCalibratorFactory { - private: - - const std::unique_ptr jointProbabilityFunctionFactoryPtr_; - - const bool useHoldoutSet_; - - public: - - /** - * @param jointProbabilityFunctionFactoryPtr An unique pointer to an object of type - * `IJointProbabilityFunctionFactory` that allows to create - * implementations of the transformation function to be used to - * transform regression scores that are predicted for individual - * labels into joint probabilities - * @param useHoldoutSet True, if the calibration model should be fit to the examples - * in the holdout set, if available, false otherwise - */ - IsotonicJointProbabilityCalibratorFactory( - std::unique_ptr jointProbabilityFunctionFactoryPtr, bool useHoldoutSet) - : jointProbabilityFunctionFactoryPtr_(std::move(jointProbabilityFunctionFactoryPtr)), - useHoldoutSet_(useHoldoutSet) {} - - /** - * @see `IJointProbabilityCalibratorFactory::create` - */ - std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const LabelVectorSet* labelVectorSet) const override { - if (!labelVectorSet) { - throw std::runtime_error( - "Information about the label vectors that have been encountered in the training data is required " - "for fitting a model for the calibration of joint probabilities, but no such information is " - "provided by the model. Most probably, the model was intended to use a different calibration " - "method when it has been trained."); - } - - return std::make_unique(marginalProbabilityCalibrationModel, - *jointProbabilityFunctionFactoryPtr_, - useHoldoutSet_, *labelVectorSet); - } - }; - - IsotonicJointProbabilityCalibratorConfig::IsotonicJointProbabilityCalibratorConfig( - const std::unique_ptr& lossConfigPtr) - : useHoldoutSet_(true), lossConfigPtr_(lossConfigPtr) {} - - bool IsotonicJointProbabilityCalibratorConfig::isHoldoutSetUsed() const { - return useHoldoutSet_; - } - - IIsotonicJointProbabilityCalibratorConfig& IsotonicJointProbabilityCalibratorConfig::setUseHoldoutSet( - bool useHoldoutSet) { - useHoldoutSet_ = useHoldoutSet; - return *this; - } - - bool IsotonicJointProbabilityCalibratorConfig::shouldUseHoldoutSet() const { - return useHoldoutSet_; - } - - bool IsotonicJointProbabilityCalibratorConfig::isLabelVectorSetNeeded() const { - return true; - } - - std::unique_ptr - IsotonicJointProbabilityCalibratorConfig::createJointProbabilityCalibratorFactory() const { - std::unique_ptr jointProbabilityFunctionFactoryPtr = - lossConfigPtr_->createJointProbabilityFunctionFactory(); - - if (jointProbabilityFunctionFactoryPtr) { - return std::make_unique( - std::move(jointProbabilityFunctionFactoryPtr), useHoldoutSet_); - } else { - return std::make_unique(); - } - } -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/probability_function_chain_rule.cpp b/cpp/subprojects/boosting/src/boosting/prediction/probability_function_chain_rule.cpp deleted file mode 100644 index a17e156b..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/probability_function_chain_rule.cpp +++ /dev/null @@ -1,96 +0,0 @@ -#include "boosting/prediction/probability_function_chain_rule.hpp" - -#include "common/iterator/binary_forward_iterator.hpp" - -namespace boosting { - - /** - * An implementation of the class `IJointProbabilityFunction` that transforms regression scores that are - * predicted for an example into joint probabilities by applying an `IMarginalProbabilityFunction` to each one and - * calculating the product of the resulting marginal probabilities according to the probabilistic chain rule. - */ - class ChainRule final : public IJointProbabilityFunction { - private: - - const std::unique_ptr marginalProbabilityFunctionPtr_; - - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel_; - - public: - - /** - * @param marginalProbabilityFunctionPtr An unique pointer to an object of type - * `IMarginalProbabilityFunction` to be used to transform - * regression scores into marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that should be used for the - * calibration of marginal probabilities - */ - ChainRule(std::unique_ptr marginalProbabilityFunctionPtr, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) - : marginalProbabilityFunctionPtr_(std::move(marginalProbabilityFunctionPtr)), - jointProbabilityCalibrationModel_(jointProbabilityCalibrationModel) {} - - float64 transformScoresIntoJointProbability( - uint32 labelVectorIndex, const LabelVector& labelVector, - VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd) const override { - auto labelIterator = make_binary_forward_iterator(labelVector.cbegin(), labelVector.cend()); - uint32 numLabels = scoresEnd - scoresBegin; - float64 jointProbability = 1; - - for (uint32 i = 0; i < numLabels; i++) { - float64 score = scoresBegin[i]; - float64 marginalProbability = - marginalProbabilityFunctionPtr_->transformScoreIntoMarginalProbability(i, score); - bool trueLabel = *labelIterator; - - if (!trueLabel) { - marginalProbability = 1 - marginalProbability; - } - - jointProbability *= marginalProbability; - labelIterator++; - } - - return jointProbabilityCalibrationModel_.calibrateJointProbability(labelVectorIndex, jointProbability); - } - - float64 transformScoresIntoJointProbability(uint32 labelVectorIndex, const LabelVector& labelVector, - SparseSetMatrix::const_row scores, - uint32 numLabels) const override { - auto labelIterator = make_binary_forward_iterator(labelVector.cbegin(), labelVector.cend()); - float64 jointProbability = 1; - - for (uint32 i = 0; i < numLabels; i++) { - const IndexedValue* entry = scores[i]; - float64 score = entry ? entry->value : 0; - float64 marginalProbability = - marginalProbabilityFunctionPtr_->transformScoreIntoMarginalProbability(i, score); - bool trueLabel = *labelIterator; - - if (!trueLabel) { - marginalProbability = 1 - marginalProbability; - } - - jointProbability *= marginalProbability; - labelIterator++; - } - - return jointProbabilityCalibrationModel_.calibrateJointProbability(labelVectorIndex, jointProbability); - } - }; - - ChainRuleFactory::ChainRuleFactory( - std::unique_ptr marginalProbabilityFunctionFactoryPtr) - : marginalProbabilityFunctionFactoryPtr_(std::move(marginalProbabilityFunctionFactoryPtr)) {} - - std::unique_ptr ChainRuleFactory::create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) const { - return std::make_unique( - marginalProbabilityFunctionFactoryPtr_->create(marginalProbabilityCalibrationModel), - jointProbabilityCalibrationModel); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/probability_function_logistic.cpp b/cpp/subprojects/boosting/src/boosting/prediction/probability_function_logistic.cpp deleted file mode 100644 index 45538b75..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/probability_function_logistic.cpp +++ /dev/null @@ -1,37 +0,0 @@ -#include "boosting/prediction/probability_function_logistic.hpp" - -#include "boosting/math/math.hpp" - -namespace boosting { - - /** - * An implementation of the class `IMarginalProbabilityFunction` that transforms regression scores that are - * predicted for individual labels into marginal probabilities via the logistic sigmoid function. - */ - class LogisticFunction final : public IMarginalProbabilityFunction { - private: - - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel_; - - public: - - /** - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that should be used for - * the calibration of marginal probabilities - */ - LogisticFunction(const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) - : marginalProbabilityCalibrationModel_(marginalProbabilityCalibrationModel) {} - - float64 transformScoreIntoMarginalProbability(uint32 labelIndex, float64 score) const override { - return marginalProbabilityCalibrationModel_.calibrateMarginalProbability(labelIndex, - logisticFunction(score)); - } - }; - - std::unique_ptr LogisticFunctionFactory::create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const { - return std::make_unique(marginalProbabilityCalibrationModel); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_example_wise.cpp b/cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_example_wise.cpp deleted file mode 100644 index fd3ae0b8..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_example_wise.cpp +++ /dev/null @@ -1,42 +0,0 @@ -#include "boosting/prediction/transformation_binary_example_wise.hpp" - -#include "common/iterator/binary_forward_iterator.hpp" - -namespace boosting { - - ExampleWiseBinaryTransformation::ExampleWiseBinaryTransformation( - const LabelVectorSet& labelVectorSet, std::unique_ptr distanceMeasurePtr) - : labelVectorSet_(labelVectorSet), distanceMeasurePtr_(std::move(distanceMeasurePtr)) {} - - void ExampleWiseBinaryTransformation::apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - VectorView::iterator predictionBegin, - VectorView::iterator predictionEnd) const { - const LabelVector& labelVector = - distanceMeasurePtr_->getClosestLabelVector(labelVectorSet_, scoresBegin, scoresEnd); - uint32 numLabels = predictionEnd - predictionBegin; - auto labelIterator = make_binary_forward_iterator(labelVector.cbegin(), labelVector.cend()); - - for (uint32 i = 0; i < numLabels; i++) { - bool label = *labelIterator; - predictionBegin[i] = label ? 1 : 0; - labelIterator++; - } - } - - void ExampleWiseBinaryTransformation::apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - BinaryLilMatrix::row predictionRow) const { - const LabelVector& labelVector = - distanceMeasurePtr_->getClosestLabelVector(labelVectorSet_, scoresBegin, scoresEnd); - uint32 numIndices = labelVector.getNumElements(); - LabelVector::const_iterator indexIterator = labelVector.cbegin(); - predictionRow.reserve(numIndices); - - for (uint32 i = 0; i < numIndices; i++) { - uint32 labelIndex = indexIterator[i]; - predictionRow.emplace_back(labelIndex); - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_gfm.cpp b/cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_gfm.cpp deleted file mode 100644 index 19d5d711..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_gfm.cpp +++ /dev/null @@ -1,168 +0,0 @@ -#include "boosting/prediction/transformation_binary_gfm.hpp" - -#include "common/data/arrays.hpp" -#include "common/data/matrix_sparse_set.hpp" -#include "common/data/vector_sparse_array.hpp" - -#include - -namespace boosting { - - static inline uint32 getMaxLabelCardinality(const LabelVectorSet& labelVectorSet) { - LabelVectorSet::const_iterator labelVectorIterator = labelVectorSet.cbegin(); - uint32 numLabelVectors = labelVectorSet.getNumLabelVectors(); - uint32 maxLabelCardinality = 0; - - for (uint32 i = 0; i < numLabelVectors; i++) { - const LabelVector& labelVector = *labelVectorIterator[i]; - uint32 numRelevantLabels = labelVector.getNumElements(); - - if (numRelevantLabels > maxLabelCardinality) { - maxLabelCardinality = numRelevantLabels; - } - } - - return maxLabelCardinality; - } - - static inline float64 calculateMarginalizedProbabilities( - SparseSetMatrix& probabilities, uint32 numLabels, - VectorConstView::const_iterator jointProbabilityIterator, const LabelVectorSet& labelVectorSet) { - LabelVectorSet::const_iterator labelVectorIterator = labelVectorSet.cbegin(); - uint32 numLabelVectors = labelVectorSet.getNumLabelVectors(); - float64 nullVectorProbability = 0; - - for (uint32 i = 0; i < numLabelVectors; i++) { - const LabelVector& labelVector = *labelVectorIterator[i]; - uint32 numRelevantLabels = labelVector.getNumElements(); - float64 jointProbability = jointProbabilityIterator[i]; - - if (numRelevantLabels > 0) { - LabelVector::const_iterator labelIndexIterator = labelVector.cbegin(); - - for (uint32 j = 0; j < numRelevantLabels; j++) { - uint32 labelIndex = labelIndexIterator[j]; - SparseSetMatrix::row row = probabilities[labelIndex]; - IndexedValue& indexedValue = row.emplace(numRelevantLabels - 1, 0.0); - indexedValue.value += jointProbability; - } - } else { - nullVectorProbability = jointProbability; - } - } - - return nullVectorProbability; - } - - static inline float64 createAndEvaluateLabelVector(SparseArrayVector::iterator iterator, uint32 numLabels, - const SparseSetMatrix& probabilities, uint32 k) { - for (uint32 i = 0; i < numLabels; i++) { - float64 weightedProbability = 0; - - for (auto it = probabilities.cbegin(i); it != probabilities.cend(i); it++) { - const IndexedValue& indexedValue = *it; - weightedProbability += (2 * indexedValue.value) / (float64) (indexedValue.index + k + 1); - } - - IndexedValue& entry = iterator[i]; - entry.index = i; - entry.value = weightedProbability; - } - - std::partial_sort(iterator, &iterator[k], &iterator[numLabels], - [=](const IndexedValue& a, const IndexedValue& b) { - return a.value > b.value; - }); - - float64 quality = 0; - - for (uint32 i = 0; i < k; i++) { - quality += iterator[i].value; - } - - return quality; - } - - static inline void storePrediction(const SparseArrayVector& tmpVector, - VectorView::iterator predictionIterator, uint32 numLabels) { - setArrayToZeros(predictionIterator, numLabels); - uint32 numRelevantLabels = tmpVector.getNumElements(); - SparseArrayVector::const_iterator iterator = tmpVector.cbegin(); - - for (uint32 i = 0; i < numRelevantLabels; i++) { - uint32 labelIndex = iterator[i].index; - predictionIterator[labelIndex] = 1; - } - } - - static inline void storePrediction(SparseArrayVector& tmpVector, BinaryLilMatrix::row predictionRow, - uint32 numLabels) { - uint32 numRelevantLabels = tmpVector.getNumElements(); - - if (numRelevantLabels > 0) { - SparseArrayVector::iterator iterator = tmpVector.begin(); - std::sort(iterator, tmpVector.end(), IndexedValue::CompareIndex()); - predictionRow.reserve(numRelevantLabels); - - for (uint32 i = 0; i < numRelevantLabels; i++) { - predictionRow.emplace_back(iterator[i].index); - } - } - } - - template - static inline void predictGfm(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, Prediction prediction, - const IJointProbabilityFunction& jointProbabilityFunction, - const LabelVectorSet& labelVectorSet, uint32 maxLabelCardinality) { - std::unique_ptr> jointProbabilityVectorPtr = - jointProbabilityFunction.transformScoresIntoJointProbabilities(labelVectorSet, scoresBegin, scoresEnd); - DenseVector::const_iterator jointProbabilityIterator = jointProbabilityVectorPtr->cbegin(); - uint32 numLabels = scoresEnd - scoresBegin; - SparseSetMatrix marginalizedProbabilities(numLabels, maxLabelCardinality); - float64 bestQuality = calculateMarginalizedProbabilities(marginalizedProbabilities, numLabels, - jointProbabilityIterator, labelVectorSet); - SparseArrayVector tmpVector1(numLabels); - tmpVector1.setNumElements(0, false); - SparseArrayVector tmpVector2(numLabels); - SparseArrayVector* bestVectorPtr = &tmpVector1; - SparseArrayVector* tmpVectorPtr = &tmpVector2; - - for (uint32 i = 0; i < numLabels; i++) { - uint32 k = i + 1; - float64 quality = - createAndEvaluateLabelVector(tmpVectorPtr->begin(), numLabels, marginalizedProbabilities, k); - - if (quality > bestQuality) { - bestQuality = quality; - tmpVectorPtr->setNumElements(k, false); - SparseArrayVector* tmpPtr = bestVectorPtr; - bestVectorPtr = tmpVectorPtr; - tmpVectorPtr = tmpPtr; - } - } - - storePrediction(*bestVectorPtr, prediction, numLabels); - } - - GfmBinaryTransformation::GfmBinaryTransformation( - const LabelVectorSet& labelVectorSet, std::unique_ptr jointProbabilityFunctionPtr) - : labelVectorSet_(labelVectorSet), maxLabelCardinality_(getMaxLabelCardinality(labelVectorSet)), - jointProbabilityFunctionPtr_(std::move(jointProbabilityFunctionPtr)) {} - - void GfmBinaryTransformation::apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - VectorView::iterator predictionBegin, - VectorView::iterator predictionEnd) const { - predictGfm(scoresBegin, scoresEnd, predictionBegin, *jointProbabilityFunctionPtr_, labelVectorSet_, - maxLabelCardinality_); - } - - void GfmBinaryTransformation::apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - BinaryLilMatrix::row predictionRow) const { - predictGfm(scoresBegin, scoresEnd, predictionRow, *jointProbabilityFunctionPtr_, - labelVectorSet_, maxLabelCardinality_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_label_wise.cpp b/cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_label_wise.cpp deleted file mode 100644 index d6154f7f..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/transformation_binary_label_wise.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include "boosting/prediction/transformation_binary_label_wise.hpp" - -namespace boosting { - - LabelWiseBinaryTransformation::LabelWiseBinaryTransformation( - std::unique_ptr discretizationFunctionPtr) - : discretizationFunctionPtr_(std::move(discretizationFunctionPtr)) {} - - void LabelWiseBinaryTransformation::apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - VectorView::iterator predictionBegin, - VectorView::iterator predictionEnd) const { - uint32 numPredictions = scoresEnd - scoresBegin; - - for (uint32 i = 0; i < numPredictions; i++) { - float64 score = scoresBegin[i]; - uint8 binaryPrediction = discretizationFunctionPtr_->discretizeScore(i, score) ? 1 : 0; - predictionBegin[i] = binaryPrediction; - } - } - - void LabelWiseBinaryTransformation::apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - BinaryLilMatrix::row predictionRow) const { - uint32 numPredictions = scoresEnd - scoresBegin; - - for (uint32 i = 0; i < numPredictions; i++) { - float64 score = scoresBegin[i]; - - if (discretizationFunctionPtr_->discretizeScore(i, score)) { - predictionRow.emplace_back(i); - } - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/transformation_probability_label_wise.cpp b/cpp/subprojects/boosting/src/boosting/prediction/transformation_probability_label_wise.cpp deleted file mode 100644 index 8ff42674..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/transformation_probability_label_wise.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "boosting/prediction/transformation_probability_label_wise.hpp" - -namespace boosting { - - LabelWiseProbabilityTransformation::LabelWiseProbabilityTransformation( - std::unique_ptr marginalProbabilityFunctionPtr) - : marginalProbabilityFunctionPtr_(std::move(marginalProbabilityFunctionPtr)) {} - - void LabelWiseProbabilityTransformation::apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - VectorView::iterator probabilitiesBegin, - VectorView::iterator probabilitiesEnd) const { - uint32 numScores = scoresEnd - scoresBegin; - - for (uint32 i = 0; i < numScores; i++) { - float64 score = scoresBegin[i]; - float64 probability = marginalProbabilityFunctionPtr_->transformScoreIntoMarginalProbability(i, score); - probabilitiesBegin[i] = probability; - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/prediction/transformation_probability_marginalized.cpp b/cpp/subprojects/boosting/src/boosting/prediction/transformation_probability_marginalized.cpp deleted file mode 100644 index d7ad666b..00000000 --- a/cpp/subprojects/boosting/src/boosting/prediction/transformation_probability_marginalized.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include "boosting/prediction/transformation_probability_marginalized.hpp" - -#include "common/data/arrays.hpp" - -namespace boosting { - - MarginalizedProbabilityTransformation::MarginalizedProbabilityTransformation( - const LabelVectorSet& labelVectorSet, std::unique_ptr jointProbabilityFunctionPtr) - : labelVectorSet_(labelVectorSet), jointProbabilityFunctionPtr_(std::move(jointProbabilityFunctionPtr)) {} - - void MarginalizedProbabilityTransformation::apply(VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd, - VectorView::iterator probabilitiesBegin, - VectorView::iterator probabilitiesEnd) const { - std::unique_ptr> jointProbabilityVectorPtr = - jointProbabilityFunctionPtr_->transformScoresIntoJointProbabilities(labelVectorSet_, scoresBegin, scoresEnd); - DenseVector::const_iterator jointProbabilityIterator = jointProbabilityVectorPtr->cbegin(); - uint32 numLabels = probabilitiesEnd - probabilitiesBegin; - setArrayToZeros(probabilitiesBegin, numLabels); - LabelVectorSet::const_iterator labelVectorIterator = labelVectorSet_.cbegin(); - uint32 numLabelVectors = labelVectorSet_.getNumLabelVectors(); - - for (uint32 i = 0; i < numLabelVectors; i++) { - const LabelVector& labelVector = *labelVectorIterator[i]; - uint32 numRelevantLabels = labelVector.getNumElements(); - LabelVector::const_iterator labelIndexIterator = labelVector.cbegin(); - float64 jointProbability = jointProbabilityIterator[i]; - - for (uint32 j = 0; j < numRelevantLabels; j++) { - uint32 labelIndex = labelIndexIterator[j]; - probabilitiesBegin[labelIndex] += jointProbability; - } - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_auto.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_auto.cpp deleted file mode 100644 index 0d12dd32..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_auto.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#include "boosting/rule_evaluation/head_type_auto.hpp" - -#include "boosting/rule_evaluation/head_type_complete.hpp" -#include "boosting/rule_evaluation/head_type_single.hpp" - -namespace boosting { - - AutomaticHeadConfig::AutomaticHeadConfig(const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr, - const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr) - : lossConfigPtr_(lossConfigPtr), labelBinningConfigPtr_(labelBinningConfigPtr), - multiThreadingConfigPtr_(multiThreadingConfigPtr), l1RegularizationConfigPtr_(l1RegularizationConfigPtr), - l2RegularizationConfigPtr_(l2RegularizationConfigPtr) {} - - std::unique_ptr AutomaticHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const { - if (labelMatrix.getNumCols() > 1) { - SingleLabelHeadConfig headConfig(labelBinningConfigPtr_, multiThreadingConfigPtr_, - l1RegularizationConfigPtr_, l2RegularizationConfigPtr_); - return headConfig.createStatisticsProviderFactory(featureMatrix, labelMatrix, lossConfig); - } else { - CompleteHeadConfig headConfig(labelBinningConfigPtr_, multiThreadingConfigPtr_, l1RegularizationConfigPtr_, - l2RegularizationConfigPtr_); - return headConfig.createStatisticsProviderFactory(featureMatrix, labelMatrix, lossConfig); - } - } - - std::unique_ptr AutomaticHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const { - if (labelMatrix.getNumCols() > 1) { - SingleLabelHeadConfig headConfig(labelBinningConfigPtr_, multiThreadingConfigPtr_, - l1RegularizationConfigPtr_, l2RegularizationConfigPtr_); - return headConfig.createStatisticsProviderFactory(featureMatrix, labelMatrix, lossConfig); - } else { - CompleteHeadConfig headConfig(labelBinningConfigPtr_, multiThreadingConfigPtr_, l1RegularizationConfigPtr_, - l2RegularizationConfigPtr_); - return headConfig.createStatisticsProviderFactory(featureMatrix, labelMatrix, lossConfig); - } - } - - std::unique_ptr AutomaticHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const { - CompleteHeadConfig headConfig(labelBinningConfigPtr_, multiThreadingConfigPtr_, l1RegularizationConfigPtr_, - l2RegularizationConfigPtr_); - return headConfig.createStatisticsProviderFactory(featureMatrix, labelMatrix, lossConfig, blas, lapack); - } - - bool AutomaticHeadConfig::isPartial() const { - return lossConfigPtr_->isDecomposable(); - } - - bool AutomaticHeadConfig::isSingleLabel() const { - return lossConfigPtr_->isDecomposable(); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_complete.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_complete.cpp deleted file mode 100644 index 5605cb11..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_complete.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "boosting/rule_evaluation/head_type_complete.hpp" - -#include "boosting/rule_evaluation/rule_evaluation_label_wise_complete.hpp" -#include "boosting/statistics/statistics_provider_example_wise_dense.hpp" -#include "boosting/statistics/statistics_provider_label_wise_dense.hpp" - -namespace boosting { - - CompleteHeadConfig::CompleteHeadConfig(const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr, - const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr) - : labelBinningConfigPtr_(labelBinningConfigPtr), multiThreadingConfigPtr_(multiThreadingConfigPtr), - l1RegularizationConfigPtr_(l1RegularizationConfigPtr), l2RegularizationConfigPtr_(l2RegularizationConfigPtr) { - - } - - std::unique_ptr CompleteHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - std::unique_ptr lossFactoryPtr = lossConfig.createLabelWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createEvaluationMeasureFactory(); - std::unique_ptr defaultRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseCompleteRuleEvaluationFactory(); - std::unique_ptr regularRuleEvaluationFactoryPtr = - std::make_unique(l1RegularizationWeight, l2RegularizationWeight); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - std::make_unique(l1RegularizationWeight, l2RegularizationWeight); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(defaultRuleEvaluationFactoryPtr), - std::move(regularRuleEvaluationFactoryPtr), std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - std::unique_ptr CompleteHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const { - return this->createStatisticsProviderFactory(featureMatrix, labelMatrix, - static_cast(lossConfig)); - } - - std::unique_ptr CompleteHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - std::unique_ptr lossFactoryPtr = lossConfig.createExampleWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createExampleWiseLossFactory(); - std::unique_ptr defaultRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseCompleteRuleEvaluationFactory(blas, lapack); - std::unique_ptr regularRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseCompleteRuleEvaluationFactory(blas, lapack); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseCompleteRuleEvaluationFactory(blas, lapack); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(defaultRuleEvaluationFactoryPtr), - std::move(regularRuleEvaluationFactoryPtr), std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - bool CompleteHeadConfig::isPartial() const { - return false; - } - - bool CompleteHeadConfig::isSingleLabel() const { - return false; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_partial_dynamic.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_partial_dynamic.cpp deleted file mode 100644 index 02d31f82..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_partial_dynamic.cpp +++ /dev/null @@ -1,99 +0,0 @@ -#include "boosting/rule_evaluation/head_type_partial_dynamic.hpp" - -#include "boosting/statistics/statistics_provider_example_wise_dense.hpp" -#include "boosting/statistics/statistics_provider_label_wise_dense.hpp" -#include "boosting/statistics/statistics_provider_label_wise_sparse.hpp" -#include "common/util/validation.hpp" - -namespace boosting { - - DynamicPartialHeadConfig::DynamicPartialHeadConfig( - const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : threshold_(0.02f), exponent_(2.0f), labelBinningConfigPtr_(labelBinningConfigPtr), - multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - float32 DynamicPartialHeadConfig::getThreshold() const { - return threshold_; - } - - IDynamicPartialHeadConfig& DynamicPartialHeadConfig::setThreshold(float32 threshold) { - assertGreater("threshold", threshold, 0); - assertLess("threshold", threshold, 1); - threshold_ = threshold; - return *this; - } - - float32 DynamicPartialHeadConfig::getExponent() const { - return exponent_; - } - - IDynamicPartialHeadConfig& DynamicPartialHeadConfig::setExponent(float32 exponent) { - assertGreaterOrEqual("exponent", exponent, 1); - exponent_ = exponent; - return *this; - } - - std::unique_ptr DynamicPartialHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - std::unique_ptr lossFactoryPtr = lossConfig.createLabelWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createEvaluationMeasureFactory(); - std::unique_ptr defaultRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseCompleteRuleEvaluationFactory(); - std::unique_ptr regularRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseDynamicPartialRuleEvaluationFactory(threshold_, exponent_); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseDynamicPartialRuleEvaluationFactory(threshold_, exponent_); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(defaultRuleEvaluationFactoryPtr), - std::move(regularRuleEvaluationFactoryPtr), std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - std::unique_ptr DynamicPartialHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - std::unique_ptr lossFactoryPtr = lossConfig.createSparseLabelWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createSparseEvaluationMeasureFactory(); - std::unique_ptr regularRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseDynamicPartialRuleEvaluationFactory(threshold_, exponent_); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseDynamicPartialRuleEvaluationFactory(threshold_, exponent_); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(regularRuleEvaluationFactoryPtr), - std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - std::unique_ptr DynamicPartialHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - std::unique_ptr lossFactoryPtr = lossConfig.createExampleWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createExampleWiseLossFactory(); - std::unique_ptr defaultRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseCompleteRuleEvaluationFactory(blas, lapack); - std::unique_ptr regularRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseDynamicPartialRuleEvaluationFactory(threshold_, exponent_, blas, - lapack); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseDynamicPartialRuleEvaluationFactory(threshold_, exponent_, blas, - lapack); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(defaultRuleEvaluationFactoryPtr), - std::move(regularRuleEvaluationFactoryPtr), std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - bool DynamicPartialHeadConfig::isPartial() const { - return true; - } - - bool DynamicPartialHeadConfig::isSingleLabel() const { - return false; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_partial_fixed.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_partial_fixed.cpp deleted file mode 100644 index ff212f5c..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_partial_fixed.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include "boosting/rule_evaluation/head_type_partial_fixed.hpp" - -#include "boosting/statistics/statistics_provider_example_wise_dense.hpp" -#include "boosting/statistics/statistics_provider_label_wise_dense.hpp" -#include "boosting/statistics/statistics_provider_label_wise_sparse.hpp" -#include "common/util/validation.hpp" - -namespace boosting { - - static inline float32 calculateLabelRatio(float32 labelRatio, const IRowWiseLabelMatrix& labelMatrix) { - if (labelRatio > 0) { - return labelRatio; - } else { - return labelMatrix.calculateLabelCardinality() / labelMatrix.getNumCols(); - } - } - - FixedPartialHeadConfig::FixedPartialHeadConfig( - const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr) - : labelRatio_(0.0f), minLabels_(2), maxLabels_(0), labelBinningConfigPtr_(labelBinningConfigPtr), - multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - - float32 FixedPartialHeadConfig::getLabelRatio() const { - return labelRatio_; - } - - IFixedPartialHeadConfig& FixedPartialHeadConfig::setLabelRatio(float32 labelRatio) { - if (labelRatio != 0) { - assertGreater("labelRatio", labelRatio, 0); - assertLess("labelRatio", labelRatio, 1); - } - labelRatio_ = labelRatio; - return *this; - } - - uint32 FixedPartialHeadConfig::getMinLabels() const { - return minLabels_; - } - - IFixedPartialHeadConfig& FixedPartialHeadConfig::setMinLabels(uint32 minLabels) { - assertGreaterOrEqual("minLabels", minLabels, 2); - minLabels_ = minLabels; - return *this; - } - - uint32 FixedPartialHeadConfig::getMaxLabels() const { - return maxLabels_; - } - - IFixedPartialHeadConfig& FixedPartialHeadConfig::setMaxLabels(uint32 maxLabels) { - if (maxLabels != 0) assertGreaterOrEqual("maxLabels", maxLabels, minLabels_); - maxLabels_ = maxLabels; - return *this; - } - - std::unique_ptr FixedPartialHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - float32 labelRatio = calculateLabelRatio(labelRatio_, labelMatrix); - std::unique_ptr lossFactoryPtr = lossConfig.createLabelWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createEvaluationMeasureFactory(); - std::unique_ptr defaultRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseCompleteRuleEvaluationFactory(); - std::unique_ptr regularRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseFixedPartialRuleEvaluationFactory(labelRatio, minLabels_, maxLabels_); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseFixedPartialRuleEvaluationFactory(labelRatio, minLabels_, maxLabels_); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(defaultRuleEvaluationFactoryPtr), - std::move(regularRuleEvaluationFactoryPtr), std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - std::unique_ptr FixedPartialHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - float32 labelRatio = calculateLabelRatio(labelRatio_, labelMatrix); - std::unique_ptr lossFactoryPtr = lossConfig.createSparseLabelWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createSparseEvaluationMeasureFactory(); - std::unique_ptr regularRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseFixedPartialRuleEvaluationFactory(labelRatio, minLabels_, maxLabels_); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseFixedPartialRuleEvaluationFactory(labelRatio, minLabels_, maxLabels_); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(regularRuleEvaluationFactoryPtr), - std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - std::unique_ptr FixedPartialHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - float32 labelRatio = calculateLabelRatio(labelRatio_, labelMatrix); - std::unique_ptr lossFactoryPtr = lossConfig.createExampleWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createExampleWiseLossFactory(); - std::unique_ptr defaultRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseCompleteRuleEvaluationFactory(blas, lapack); - std::unique_ptr regularRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseFixedPartialRuleEvaluationFactory(labelRatio, minLabels_, maxLabels_, - blas, lapack); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseFixedPartialRuleEvaluationFactory(labelRatio, minLabels_, maxLabels_, - blas, lapack); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(defaultRuleEvaluationFactoryPtr), - std::move(regularRuleEvaluationFactoryPtr), std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - bool FixedPartialHeadConfig::isPartial() const { - return true; - } - - bool FixedPartialHeadConfig::isSingleLabel() const { - return false; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_single.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_single.cpp deleted file mode 100644 index 9a014ec7..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/head_type_single.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include "boosting/rule_evaluation/head_type_single.hpp" - -#include "boosting/rule_evaluation/rule_evaluation_label_wise_single.hpp" -#include "boosting/statistics/statistics_provider_example_wise_dense.hpp" -#include "boosting/statistics/statistics_provider_label_wise_dense.hpp" -#include "boosting/statistics/statistics_provider_label_wise_sparse.hpp" - -namespace boosting { - - SingleLabelHeadConfig::SingleLabelHeadConfig( - const std::unique_ptr& labelBinningConfigPtr, - const std::unique_ptr& multiThreadingConfigPtr, - const std::unique_ptr& l1RegularizationConfigPtr, - const std::unique_ptr& l2RegularizationConfigPtr) - : labelBinningConfigPtr_(labelBinningConfigPtr), multiThreadingConfigPtr_(multiThreadingConfigPtr), - l1RegularizationConfigPtr_(l1RegularizationConfigPtr), l2RegularizationConfigPtr_(l2RegularizationConfigPtr) { - - } - - std::unique_ptr SingleLabelHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ILabelWiseLossConfig& lossConfig) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - std::unique_ptr lossFactoryPtr = lossConfig.createLabelWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createEvaluationMeasureFactory(); - std::unique_ptr defaultRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createLabelWiseCompleteRuleEvaluationFactory(); - std::unique_ptr regularRuleEvaluationFactoryPtr = - std::make_unique(l1RegularizationWeight, l2RegularizationWeight); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - std::make_unique(l1RegularizationWeight, l2RegularizationWeight); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(defaultRuleEvaluationFactoryPtr), - std::move(regularRuleEvaluationFactoryPtr), std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - std::unique_ptr SingleLabelHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const ISparseLabelWiseLossConfig& lossConfig) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - std::unique_ptr lossFactoryPtr = lossConfig.createSparseLabelWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createSparseEvaluationMeasureFactory(); - std::unique_ptr regularRuleEvaluationFactoryPtr = - std::make_unique(l1RegularizationWeight, l2RegularizationWeight); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - std::make_unique(l1RegularizationWeight, l2RegularizationWeight); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(regularRuleEvaluationFactoryPtr), - std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - std::unique_ptr SingleLabelHeadConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, - const IExampleWiseLossConfig& lossConfig, const Blas& blas, const Lapack& lapack) const { - float64 l1RegularizationWeight = l1RegularizationConfigPtr_->getWeight(); - float64 l2RegularizationWeight = l2RegularizationConfigPtr_->getWeight(); - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - std::unique_ptr lossFactoryPtr = lossConfig.createExampleWiseLossFactory(); - std::unique_ptr evaluationMeasureFactoryPtr = - lossConfig.createExampleWiseLossFactory(); - std::unique_ptr defaultRuleEvaluationFactoryPtr = - labelBinningConfigPtr_->createExampleWiseCompleteRuleEvaluationFactory(blas, lapack); - std::unique_ptr regularRuleEvaluationFactoryPtr = - std::make_unique(l1RegularizationWeight, l2RegularizationWeight); - std::unique_ptr pruningRuleEvaluationFactoryPtr = - std::make_unique(l1RegularizationWeight, l2RegularizationWeight); - return std::make_unique( - std::move(lossFactoryPtr), std::move(evaluationMeasureFactoryPtr), std::move(defaultRuleEvaluationFactoryPtr), - std::move(regularRuleEvaluationFactoryPtr), std::move(pruningRuleEvaluationFactoryPtr), numThreads); - } - - bool SingleLabelHeadConfig::isPartial() const { - return true; - } - - bool SingleLabelHeadConfig::isSingleLabel() const { - return true; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/regularization_manual.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/regularization_manual.cpp deleted file mode 100644 index 9cdf43c2..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/regularization_manual.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include "boosting/rule_evaluation/regularization_manual.hpp" - -#include "common/util/validation.hpp" - -#include - -namespace boosting { - - ManualRegularizationConfig::ManualRegularizationConfig() : regularizationWeight_(1) {} - - float64 ManualRegularizationConfig::getRegularizationWeight() const { - return regularizationWeight_; - } - - IManualRegularizationConfig& ManualRegularizationConfig::setRegularizationWeight(float64 regularizationWeight) { - assertGreater("regularizationWeight", regularizationWeight, 0); - regularizationWeight_ = regularizationWeight; - return *this; - } - - float64 ManualRegularizationConfig::getWeight() const { - return regularizationWeight_; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/regularization_no.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/regularization_no.cpp deleted file mode 100644 index 158aae44..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/regularization_no.cpp +++ /dev/null @@ -1,9 +0,0 @@ -#include "boosting/rule_evaluation/regularization_no.hpp" - -namespace boosting { - - float64 NoRegularizationConfig::getWeight() const { - return 0; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_binned_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_binned_common.hpp deleted file mode 100644 index 4b51dab7..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_binned_common.hpp +++ /dev/null @@ -1,405 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/math.hpp" -#include "common/data/arrays.hpp" -#include "common/rule_evaluation/score_vector_binned_dense.hpp" -#include "rule_evaluation_example_wise_complete_common.hpp" - -namespace boosting { - - /** - * Removes empty bins from an array that keeps track of the number of elements per bin, as well as an array that - * stores the index of each bin. - * - * @param numElementsPerBin A pointer to an array of type `uint32`, shape `(numBins)` that stores the number - * elements per bin - * @param binIndices A pointer to an array of type `uint32`, shape `(numBins)`, that stores the index of each - * bin - * @param numBins The number of available bins - */ - static inline uint32 removeEmptyBins(uint32* numElementsPerBin, uint32* binIndices, uint32 numBins) { - uint32 n = 0; - - for (uint32 i = 0; i < numBins; i++) { - binIndices[i] = n; - uint32 numElements = numElementsPerBin[i]; - - if (numElements > 0) { - numElementsPerBin[n] = numElements; - n++; - } - } - - return n; - } - - /** - * Aggregates the gradients and Hessians of all elements that have been assigned to the same bin. - * - * @tparam BinIndexIterator The type of the iterator that provides access to the indices of the bins individual - * elements have been assigned to - * @param gradientIterator An iterator that provides random access to the gradients - * @param hessianIterator An iterator that provides random access to the Hessians - * @param numElements The total number of available elements - * @param binIndexIterator An iterator that provides random access to the indices of the bins individual elements - * have been assigned to - * @param binIndices A pointer to an array of type `uint32`, shape `(maxBins)` that stores the index of each - * bin - * @param gradients A pointer to an array of type `float64`, shape `(numElements)`, the aggregated gradients - * should be written to - * @param hessians A pointer to an array of type `float64`, shape `(numElements * numElements)`, the - * aggregated Hessians should be written to - * @param maxBins The maximum number of bins - */ - template - static inline void aggregateGradientsAndHessians( - DenseExampleWiseStatisticVector::gradient_const_iterator gradientIterator, - DenseExampleWiseStatisticVector::hessian_const_iterator hessianIterator, uint32 numElements, - BinIndexIterator binIndexIterator, const uint32* binIndices, float64* gradients, float64* hessians, - uint32 maxBins) { - for (uint32 i = 0; i < numElements; i++) { - uint32 originalBinIndex = binIndexIterator[i]; - - if (originalBinIndex != maxBins) { - uint32 binIndex = binIndices[originalBinIndex]; - binIndexIterator[i] = binIndex; - - // Add the gradient that corresponds to the `i`-th element of the original gradient vector to the - // corresponding element of the aggregated gradient vector... - gradients[binIndex] += gradientIterator[i]; - - // Add the Hessian that corresponds to the `i`-th element on the diagonal of the original Hessian matrix - // to the corresponding element of the aggregated Hessian matrix... - hessians[triangularNumber(binIndex + 1) - 1] += hessianIterator[triangularNumber(i + 1) - 1]; - } - } - - for (uint32 i = 1; i < numElements; i++) { - uint32 binIndex = binIndexIterator[i]; - - if (binIndex != maxBins) { - for (uint32 j = 0; j < i; j++) { - uint32 binIndex2 = binIndexIterator[j]; - - // Add the hessian at the `i`-th row and `j`-th column of the original Hessian matrix to the - // corresponding element of the aggregated Hessian matrix, if the labels at indices `i` and `j` do - // not belong to the same bin... - if (binIndex2 != maxBins && binIndex != binIndex2) { - uint32 r, c; - - if (binIndex < binIndex2) { - r = binIndex; - c = binIndex2; - } else { - r = binIndex2; - c = binIndex; - } - - hessians[triangularNumber(c) + r] += hessianIterator[triangularNumber(i) + j]; - } - } - } - } - } - - /** - * Adds a L1 regularization weight to a vector of ordinates. - * - * @param ordinates A pointer to an array of type `float64`, shape `(n)`, the L1 regularization - * weight should be added to - * @param n The number of ordinates - * @param weights A pointer to an array of type `uint32`, shape `(n)` that stores the weight of - * each ordinate - * @param l1RegularizationWeight The L1 regularization weight to be added to the ordinates - */ - static inline void addL1RegularizationWeight(float64* ordinates, uint32 n, const uint32* weights, - float64 l1RegularizationWeight) { - for (uint32 i = 0; i < n; i++) { - uint32 weight = weights[i]; - float64 gradient = ordinates[i]; - ordinates[i] += (weight * getL1RegularizationWeight(gradient, l1RegularizationWeight)); - } - } - - /** - * Adds a L2 regularization weight to the diagonal of a matrix of coefficients. - * - * @param coefficients A pointer to an array of type `float64`, shape `(n * n)`, the regularization - * weight should be added to - * @param n The number of coefficients on the diagonal - * @param weights A pointer to an array of type `uint32`, shape `(n)`, that stores the weight of - * each coefficient - * @param l2RegularizationWeight The L2 regularization weight to be added to the coefficients - */ - static inline void addL2RegularizationWeight(float64* coefficients, uint32 numPredictions, const uint32* weights, - float64 l2RegularizationWeight) { - for (uint32 i = 0; i < numPredictions; i++) { - uint32 weight = weights[i]; - coefficients[(i * numPredictions) + i] += (weight * l2RegularizationWeight); - } - } - - /** - * Calculates and returns the regularization term. - * - * @tparam ScoreIterator The type of the iterator that provides access to the predicted scores - * @param scores An iterator that provides random access to the predicted scores - * @param numElementsPerBin A pointer to an array of type `uint32`, shape `(numBins)`, that provides random - * access to the number of elements per bin - * @param numBins The number of bins - * @param l1RegularizationWeight The weight of the L1 regularization term - * @param l2RegularizationWeight The weight of the L2 regularization term - */ - template - static inline float64 calculateRegularizationTerm(ScoreIterator scores, const uint32* numElementsPerBin, - uint32 numBins, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) { - float64 regularizationTerm; - - if (l1RegularizationWeight > 0) { - regularizationTerm = l1RegularizationWeight * l1Norm(scores, numElementsPerBin, numBins); - } else { - regularizationTerm = 0; - } - - if (l2RegularizationWeight > 0) { - regularizationTerm += 0.5 * l2RegularizationWeight * l2NormPow(scores, numElementsPerBin, numBins); - } - - return regularizationTerm; - } - - /** - * An abstract base class for all classes that allow to calculate the predictions of rules, as well as their overall - * quality, based on the gradients and Hessians that have been calculated according to a loss function that is - * applied example-wise and using gradient-based label binning. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class AbstractExampleWiseBinnedRuleEvaluation - : public AbstractExampleWiseRuleEvaluation { - private: - - const uint32 maxBins_; - - DenseBinnedScoreVector scoreVector_; - - float64* aggregatedGradients_; - - float64* aggregatedHessians_; - - uint32* binIndices_; - - uint32* numElementsPerBin_; - - float64* criteria_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const std::unique_ptr binningPtr_; - - const Blas& blas_; - - const Lapack& lapack_; - - protected: - - /** - * Must be implemented by subclasses in order to calculate label-wise criteria that are used to determine - * the mapping from labels to bins. - * - * @param statisticVector A reference to an object of template type `StatisticVector` that stores - * the gradients and Hessians - * @param criteria A pointer to an array of type `float64`, shape `(numCriteria)`, the - * label-wise criteria should be written to - * @param numCriteria The number of label-wise criteria to be calculated - * @param l1RegularizationWeight The L1 regularization weight - * @param l2RegularizationWeight The L2 regularization weight - * @return The number of label-wise criteria that have been calculated - */ - virtual uint32 calculateLabelWiseCriteria(const StatisticVector& statisticVector, float64* criteria, - uint32 numCriteria, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) = 0; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param indicesSorted True, if the given indices are guaranteed to be sorted, false otherwise - * @param maxBins The maximum number of bins - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param binningPtr An unique pointer to an object of type `ILabelBinning` that should be - * used to assign labels to bins - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK - * routines - */ - AbstractExampleWiseBinnedRuleEvaluation(const IndexVector& labelIndices, bool indicesSorted, uint32 maxBins, - float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr binningPtr, const Blas& blas, - const Lapack& lapack) - : AbstractExampleWiseRuleEvaluation(maxBins, lapack), - maxBins_(maxBins), - scoreVector_(DenseBinnedScoreVector(labelIndices, maxBins + 1, indicesSorted)), - aggregatedGradients_(new float64[maxBins]), - aggregatedHessians_(new float64[triangularNumber(maxBins)]), binIndices_(new uint32[maxBins]), - numElementsPerBin_(new uint32[maxBins]), criteria_(new float64[labelIndices.getNumElements()]), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), - binningPtr_(std::move(binningPtr)), blas_(blas), lapack_(lapack) { - // The last bin is used for labels for which the corresponding criterion is zero. For this particular - // bin, the prediction is always zero. - scoreVector_.scores_binned_begin()[maxBins_] = 0; - } - - virtual ~AbstractExampleWiseBinnedRuleEvaluation() override { - delete[] aggregatedGradients_; - delete[] aggregatedHessians_; - delete[] binIndices_; - delete[] numElementsPerBin_; - delete[] criteria_; - } - - /** - * @see `IRuleEvaluation::evaluate` - */ - const IScoreVector& calculateScores(DenseExampleWiseStatisticVector& statisticVector) override final { - // Calculate label-wise criteria... - uint32 numCriteria = - this->calculateLabelWiseCriteria(statisticVector, criteria_, scoreVector_.getNumElements(), - l1RegularizationWeight_, l2RegularizationWeight_); - - // Obtain information about the bins to be used... - LabelInfo labelInfo = binningPtr_->getLabelInfo(criteria_, numCriteria); - uint32 numBins = labelInfo.numPositiveBins + labelInfo.numNegativeBins; - - if (numBins > 0) { - // Reset arrays to zero... - setArrayToZeros(numElementsPerBin_, numBins); - - // Apply binning method in order to aggregate the gradients and Hessians that belong to the same - // bins... - typename DenseBinnedScoreVector::index_binned_iterator binIndexIterator = - scoreVector_.indices_binned_begin(); - auto callback = [=](uint32 binIndex, uint32 labelIndex) { - numElementsPerBin_[binIndex] += 1; - binIndexIterator[labelIndex] = binIndex; - }; - auto zeroCallback = [=](uint32 labelIndex) { - binIndexIterator[labelIndex] = maxBins_; - }; - binningPtr_->createBins(labelInfo, criteria_, numCriteria, callback, zeroCallback); - - // Determine number of non-empty bins... - numBins = removeEmptyBins(numElementsPerBin_, binIndices_, numBins); - scoreVector_.setNumBins(numBins, false); - - // Aggregate gradients and Hessians... - setArrayToZeros(aggregatedGradients_, numBins); - setArrayToZeros(aggregatedHessians_, triangularNumber(numBins)); - aggregateGradientsAndHessians(statisticVector.gradients_cbegin(), statisticVector.hessians_cbegin(), - numCriteria, binIndexIterator, binIndices_, aggregatedGradients_, - aggregatedHessians_, maxBins_); - - // Copy Hessians to the matrix of coefficients and add regularization weight to its diagonal... - copyCoefficients(aggregatedHessians_, this->dsysvTmpArray1_, numBins); - addL2RegularizationWeight(this->dsysvTmpArray1_, numBins, numElementsPerBin_, - l2RegularizationWeight_); - - // Copy gradients to the vector of ordinates... - typename DenseBinnedScoreVector::score_binned_iterator scoreIterator = - scoreVector_.scores_binned_begin(); - copyOrdinates(aggregatedGradients_, scoreIterator, numBins); - addL1RegularizationWeight(scoreIterator, numBins, numElementsPerBin_, l1RegularizationWeight_); - - // Calculate the scores to be predicted for the individual labels by solving a system of linear - // equations... - lapack_.dsysv(this->dsysvTmpArray1_, this->dsysvTmpArray2_, this->dsysvTmpArray3_, scoreIterator, - numBins, this->dsysvLwork_); - - // Calculate the overall quality... - float64 quality = calculateOverallQuality(scoreIterator, aggregatedGradients_, aggregatedHessians_, - this->dspmvTmpArray_, numBins, blas_); - - // Evaluate regularization term... - quality += calculateRegularizationTerm(scoreIterator, numElementsPerBin_, numBins, - l1RegularizationWeight_, l2RegularizationWeight_); - - scoreVector_.quality = quality; - } else { - setArrayToValue(scoreVector_.indices_binned_begin(), numCriteria, maxBins_); - scoreVector_.quality = 0; - } - - return scoreVector_; - } - }; - - /** - * Allows to calculate the predictions of complete rules, as well as their overall quality, based on the gradients - * and Hessians that are stored by a `DenseExampleWiseStatisticVector` using L1 and L2 regularization. The labels - * are assigned to bins based on the gradients and Hessians. - * - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should be - * calculated - */ - template - class DenseExampleWiseCompleteBinnedRuleEvaluation final - : public AbstractExampleWiseBinnedRuleEvaluation { - protected: - - uint32 calculateLabelWiseCriteria(const DenseExampleWiseStatisticVector& statisticVector, float64* criteria, - uint32 numCriteria, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) override { - DenseExampleWiseStatisticVector::gradient_const_iterator gradientIterator = - statisticVector.gradients_cbegin(); - DenseExampleWiseStatisticVector::hessian_diagonal_const_iterator hessianIterator = - statisticVector.hessians_diagonal_cbegin(); - - for (uint32 i = 0; i < numCriteria; i++) { - criteria[i] = calculateLabelWiseScore(gradientIterator[i], hessianIterator[i], - l1RegularizationWeight, l2RegularizationWeight); - } - - return numCriteria; - } - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param maxBins The maximum number of bins - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param binningPtr An unique pointer to an object of type `ILabelBinning` that should be - * used to assign labels to bins - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK - * routines - */ - DenseExampleWiseCompleteBinnedRuleEvaluation(const IndexVector& labelIndices, uint32 maxBins, - float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr binningPtr, const Blas& blas, - const Lapack& lapack) - : AbstractExampleWiseBinnedRuleEvaluation( - labelIndices, true, maxBins, l1RegularizationWeight, l2RegularizationWeight, std::move(binningPtr), - blas, lapack) {} - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_common.hpp deleted file mode 100644 index 7083db10..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_common.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/lapack.hpp" -#include "boosting/rule_evaluation/rule_evaluation.hpp" - -namespace boosting { - - /** - * An abstract base class for all classes that allow to calculate the predictions of rules, as well as their overall - * quality, based on the gradients and Hessians that have been calculated according to a loss function that is - * applied example-wise. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class AbstractExampleWiseRuleEvaluation : public IRuleEvaluation { - protected: - - /** - * A pointer to a temporary array that is used for executing the LAPACK routine DSPMV. - */ - float64* dspmvTmpArray_; - - /** - * A pointer to a temporary array that is used for executing the LAPACK routine DSYSV. - */ - float64* dsysvTmpArray1_; - - /** - * A pointer to a second temporary array that is used for executing the LAPACK routine DSYSV. - */ - int* dsysvTmpArray2_; - - /** - * The `lwork` parameter that is used for executing the LAPACK routine DSYSV. - */ - const int dsysvLwork_; - - /** - * A pointer to a third temporary array that is used for executing the LAPACK routine DSYSV. - */ - double* dsysvTmpArray3_; - - public: - - /** - * @param numPredictions The number of labels for which the rules may predict - * @param lapack A reference to an object of type `Lapack` that allows to execute different - * LAPACK routines - */ - AbstractExampleWiseRuleEvaluation(uint32 numPredictions, const Lapack& lapack) - : dspmvTmpArray_(new float64[numPredictions]), - dsysvTmpArray1_(new float64[numPredictions * numPredictions]), - dsysvTmpArray2_(new int[numPredictions]), - dsysvLwork_(lapack.queryDsysvLworkParameter(dsysvTmpArray1_, dspmvTmpArray_, numPredictions)), - dsysvTmpArray3_(new double[dsysvLwork_]) {} - - virtual ~AbstractExampleWiseRuleEvaluation() override { - delete[] dspmvTmpArray_; - delete[] dsysvTmpArray1_; - delete[] dsysvTmpArray2_; - delete[] dsysvTmpArray3_; - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete.cpp deleted file mode 100644 index 436499d0..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_example_wise_complete.hpp" - -#include "rule_evaluation_example_wise_complete_common.hpp" - -namespace boosting { - - ExampleWiseCompleteRuleEvaluationFactory::ExampleWiseCompleteRuleEvaluationFactory(float64 l1RegularizationWeight, - float64 l2RegularizationWeight, - const Blas& blas, - const Lapack& lapack) - : l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), blas_(blas), - lapack_(lapack) {} - - std::unique_ptr> ExampleWiseCompleteRuleEvaluationFactory::create( - const DenseExampleWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, blas_, lapack_); - } - - std::unique_ptr> ExampleWiseCompleteRuleEvaluationFactory::create( - const DenseExampleWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, blas_, lapack_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.cpp deleted file mode 100644 index 3810e1c6..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_example_wise_complete_binned.hpp" - -#include "rule_evaluation_example_wise_binned_common.hpp" - -namespace boosting { - - ExampleWiseCompleteBinnedRuleEvaluationFactory::ExampleWiseCompleteBinnedRuleEvaluationFactory( - float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr labelBinningFactoryPtr, const Blas& blas, const Lapack& lapack) - : l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), - labelBinningFactoryPtr_(std::move(labelBinningFactoryPtr)), blas_(blas), lapack_(lapack) {} - - std::unique_ptr> - ExampleWiseCompleteBinnedRuleEvaluationFactory::create(const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - uint32 maxBins = labelBinningPtr->getMaxBins(indexVector.getNumElements()); - return std::make_unique>( - indexVector, maxBins, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr), blas_, - lapack_); - } - - std::unique_ptr> - ExampleWiseCompleteBinnedRuleEvaluationFactory::create(const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - uint32 maxBins = labelBinningPtr->getMaxBins(indexVector.getNumElements()); - return std::make_unique>( - indexVector, maxBins, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr), blas_, - lapack_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete_common.hpp deleted file mode 100644 index 18f172ae..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_complete_common.hpp +++ /dev/null @@ -1,212 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/math.hpp" -#include "common/rule_evaluation/score_vector_dense.hpp" -#include "rule_evaluation_example_wise_common.hpp" -#include "rule_evaluation_label_wise_common.hpp" - -namespace boosting { - - /** - * Copies Hessians from an iterator to a matrix of coefficients that may be passed to LAPACK's DSYSV routine. - * - * @tparam HessianIterator The type of the iterator that provides access to the Hessians - * @param hessianIterator An iterator that provides random access to the Hessians - * @param coefficients A pointer to an array of type `float64`, shape `(n * n)`, the Hessians should be copied - * to - * @param n The dimensionality of the matrix of coefficients - */ - template - static inline void copyCoefficients(HessianIterator hessianIterator, float64* coefficients, uint32 n) { - for (uint32 c = 0; c < n; c++) { - uint32 offset = c * n; - - for (uint32 r = 0; r <= c; r++) { - coefficients[offset + r] = *hessianIterator; - hessianIterator++; - } - } - } - - /** - * Adds a L2 regularization weight to the diagonal of a matrix of coefficients. - * - * @param coefficients A pointer to an array of type `float64`, shape `(n * n)`, the regularization - * weight should be added to - * @param n The number of coefficients on the diagonal - * @param l2RegularizationWeight The L2 regularization weight to be added to the coefficients - */ - static inline void addL2RegularizationWeight(float64* coefficients, uint32 numPredictions, - float64 l2RegularizationWeight) { - if (l2RegularizationWeight > 0) { - for (uint32 i = 0; i < numPredictions; i++) { - coefficients[(i * numPredictions) + i] += l2RegularizationWeight; - } - } - } - - /** - * Copies gradients from an iterator to a vector of ordinates that may be passed to LAPACK's DSYSV routine. - * - * @tparam GradientIterator The type of the iterator that provides access to the gradients - * @param gradientIterator An iterator that provides random access to the gradients - * @param ordinates A pointer to an array of type `float64`, shape `(n)`, the gradients should be - * copied to - * @param n The number of gradients - */ - template - static inline void copyOrdinates(GradientIterator gradientIterator, float64* ordinates, uint32 n) { - for (uint32 i = 0; i < n; i++) { - ordinates[i] = -gradientIterator[i]; - } - } - - /** - * Adds a L1 regularization weight to a vector of ordinates. - * - * @param ordinates A pointer to an array of type `float64`, shape `(n)`, the L1 regularization - * weight should be added to - * @param n The number of ordinates - * @param l1RegularizationWeight The L1 regularization weight to be added to the ordinates - **/ - static inline void addL1RegularizationWeight(float64* ordinates, uint32 n, float64 l1RegularizationWeight) { - if (l1RegularizationWeight > 0) { - for (uint32 i = 0; i < n; i++) { - float64 gradient = ordinates[i]; - ordinates[i] += getL1RegularizationWeight(gradient, l1RegularizationWeight); - } - } - } - - /** - * Calculates and returns the overall quality of predictions for several labels. - * - * @tparam ScoreIterator The type of the iterator that provides access to the predicted scores - * @tparam GradientIterator The type of the iterator that provides access to the gradients - * @tparam HessianIterator The type of the iterator that provides access to the Hessians - * @param scores An iterator that provides random access to the predicted scores - * @param gradients An iterator that provides random access to the gradients - * @param hessians An iterator that provides random access to the Hessians - * @param tmpArray A pointer to an array of type `float64`, shape `(numPredictions)`, that should be used - * by BLAS' DSPMV routine to store temporary values - * @param numPredictions The number of predictions - * @param blas A reference to an object of type `Blas` that allows to execute different BLAS routines - * @return The quality that has been calculated - */ - template - static inline float64 calculateOverallQuality(ScoreIterator scores, GradientIterator gradients, - HessianIterator hessians, float64* tmpArray, uint32 numPredictions, - const Blas& blas) { - blas.dspmv(hessians, scores, tmpArray, numPredictions); - return blas.ddot(scores, gradients, numPredictions) + (0.5 * blas.ddot(scores, tmpArray, numPredictions)); - } - - /** - * Calculates and returns the regularization term. - * - * @tparam ScoreIterator The type of the iterator that provides access to the predicted scores - * @param scores An iterator that provides random access to the predicted scores - * @param numPredictions The number of predictions - * @param l1RegularizationWeight The weight of the L1 regularization term - * @param l2RegularizationWeight The weight of the L2 regularization term - */ - template - static inline float64 calculateRegularizationTerm(ScoreIterator scores, uint32 numPredictions, - float64 l1RegularizationWeight, float64 l2RegularizationWeight) { - float64 regularizationTerm; - - if (l1RegularizationWeight > 0) { - regularizationTerm = l1RegularizationWeight * l1Norm(scores, numPredictions); - } else { - regularizationTerm = 0; - } - - if (l2RegularizationWeight > 0) { - regularizationTerm += 0.5 * l2RegularizationWeight * l2NormPow(scores, numPredictions); - } - - return regularizationTerm; - } - - /** - * Allows to calculate the predictions of complete rules, as well as their overall quality, based on the gradients - * and Hessians that are stored by a `DenseExampleWiseStatisticVector` using L1 and L2 regularization. - * - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should be - * calculated - */ - template - class DenseExampleWiseCompleteRuleEvaluation final - : public AbstractExampleWiseRuleEvaluation { - private: - - DenseScoreVector scoreVector_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const Blas& blas_; - - const Lapack& lapack_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK - * routines - */ - DenseExampleWiseCompleteRuleEvaluation(const IndexVector& labelIndices, float64 l1RegularizationWeight, - float64 l2RegularizationWeight, const Blas& blas, - const Lapack& lapack) - : AbstractExampleWiseRuleEvaluation( - labelIndices.getNumElements(), lapack), - scoreVector_(DenseScoreVector(labelIndices, true)), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), - blas_(blas), lapack_(lapack) {} - - /** - * @see `IRuleEvaluation::evaluate` - */ - const IScoreVector& calculateScores(DenseExampleWiseStatisticVector& statisticVector) override { - uint32 numPredictions = scoreVector_.getNumElements(); - - // Copy Hessians to the matrix of coefficients and add the L2 regularization weight to its diagonal... - copyCoefficients(statisticVector.hessians_cbegin(), this->dsysvTmpArray1_, numPredictions); - addL2RegularizationWeight(this->dsysvTmpArray1_, numPredictions, l2RegularizationWeight_); - - // Copy gradients to the vector of ordinates and add the L1 regularization weight... - typename DenseScoreVector::score_iterator scoreIterator = scoreVector_.scores_begin(); - copyOrdinates(statisticVector.gradients_cbegin(), scoreIterator, numPredictions); - addL1RegularizationWeight(scoreIterator, numPredictions, l1RegularizationWeight_); - - // Calculate the scores to be predicted for individual labels by solving a system of linear equations... - lapack_.dsysv(this->dsysvTmpArray1_, this->dsysvTmpArray2_, this->dsysvTmpArray3_, scoreIterator, - numPredictions, this->dsysvLwork_); - - // Calculate the overall quality... - float64 quality = calculateOverallQuality(scoreIterator, statisticVector.gradients_begin(), - statisticVector.hessians_begin(), this->dspmvTmpArray_, - numPredictions, blas_); - - // Evaluate regularization term... - quality += calculateRegularizationTerm(scoreIterator, numPredictions, l1RegularizationWeight_, - l2RegularizationWeight_); - - scoreVector_.quality = quality; - return scoreVector_; - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_common.hpp deleted file mode 100644 index fdd2bcb8..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_common.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/math/math.hpp" - -namespace boosting { - - /** - * Copies Hessians from an iterator to a matrix of coefficients that may be passed to LAPACK's DSYSV routine. Only - * the Hessians that correspond to the indices in a second iterator are taken into account. - * - * @tparam HessianIterator The type of the iterator that provides access to the Hessians - * @tparam IndexIterator The type of the iterator that provides access to the indices - * @param hessianIterator An iterator that provides random access to the Hessians - * @param indexIterator An iterator that provides random access to the indices - * @param coefficients A pointer to an array of type `float64`, shape `(n * n)`, the Hessians should be copied - * to - * @param n The dimensionality of the matrix of coefficients - */ - template - static inline void copyCoefficients(HessianIterator hessianIterator, IndexIterator indexIterator, - float64* coefficients, uint32 n) { - for (uint32 c = 0; c < n; c++) { - uint32 offset = c * n; - uint32 offset2 = triangularNumber(indexIterator[c]); - - for (uint32 r = 0; r <= c; r++) { - coefficients[offset + r] = hessianIterator[offset2 + indexIterator[r]]; - } - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.cpp deleted file mode 100644 index fffc1218..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.cpp +++ /dev/null @@ -1,145 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic.hpp" - -#include "rule_evaluation_example_wise_complete_common.hpp" -#include "rule_evaluation_example_wise_partial_common.hpp" -#include "rule_evaluation_example_wise_partial_dynamic_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of partial rules that predict for a subset of the available labels that is - * determined dynamically, as well as their overall quality, based on the gradients and Hessians that are stored by - * a `DenseExampleWiseStatisticVector` using L1 and L2 regularization. - * - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should be - * calculated - */ - template - class DenseExampleWiseDynamicPartialRuleEvaluation final - : public AbstractExampleWiseRuleEvaluation { - private: - - const IndexVector& labelIndices_; - - PartialIndexVector indexVector_; - - DenseScoreVector scoreVector_; - - const float64 threshold_; - - const float64 exponent_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const Blas& blas_; - - const Lapack& lapack_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param threshold A threshold that affects for how many labels the rule heads should - * predict - * @param exponent An exponent that is used to weigh the estimated predictive quality for - * individual labels - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK - * routines - */ - DenseExampleWiseDynamicPartialRuleEvaluation(const IndexVector& labelIndices, float32 threshold, - float32 exponent, float64 l1RegularizationWeight, - float64 l2RegularizationWeight, const Blas& blas, - const Lapack& lapack) - : AbstractExampleWiseRuleEvaluation( - labelIndices.getNumElements(), lapack), - labelIndices_(labelIndices), indexVector_(PartialIndexVector(labelIndices.getNumElements())), - scoreVector_(DenseScoreVector(indexVector_, true)), threshold_(1.0 - threshold), - exponent_(exponent), l1RegularizationWeight_(l1RegularizationWeight), - l2RegularizationWeight_(l2RegularizationWeight), blas_(blas), lapack_(lapack) {} - - /** - * @see `IRuleEvaluation::evaluate` - */ - const IScoreVector& calculateScores(DenseExampleWiseStatisticVector& statisticVector) override { - uint32 numLabels = statisticVector.getNumElements(); - DenseExampleWiseStatisticVector::gradient_const_iterator gradientIterator = - statisticVector.gradients_cbegin(); - DenseExampleWiseStatisticVector::hessian_diagonal_const_iterator hessianIterator = - statisticVector.hessians_diagonal_cbegin(); - typename DenseScoreVector::score_iterator scoreIterator = scoreVector_.scores_begin(); - const std::pair pair = - getMinAndMaxScore(scoreIterator, gradientIterator, hessianIterator, numLabels, - l1RegularizationWeight_, l2RegularizationWeight_); - float64 minAbsScore = pair.first; - - // Copy gradients to the vector of ordinates and add the L1 regularization weight... - float64 threshold = calculateThreshold(minAbsScore, pair.second, threshold_, exponent_); - PartialIndexVector::iterator indexIterator = indexVector_.begin(); - typename IndexVector::const_iterator labelIndexIterator = labelIndices_.cbegin(); - uint32 n = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 score = scoreIterator[i]; - - if (calculateWeightedScore(score, minAbsScore, exponent_) > threshold) { - indexIterator[n] = labelIndexIterator[i]; - scoreIterator[n] = -gradientIterator[i]; - n++; - } - } - - indexVector_.setNumElements(n, false); - addL1RegularizationWeight(scoreIterator, n, l1RegularizationWeight_); - - // Copy Hessians to the matrix of coefficients and add the L2 regularization weight to its diagonal... - copyCoefficients(statisticVector.hessians_cbegin(), indexIterator, this->dsysvTmpArray1_, n); - addL2RegularizationWeight(this->dsysvTmpArray1_, n, l2RegularizationWeight_); - - // Calculate the scores to be predicted for individual labels by solving a system of linear equations... - lapack_.dsysv(this->dsysvTmpArray1_, this->dsysvTmpArray2_, this->dsysvTmpArray3_, scoreIterator, n, - this->dsysvLwork_); - - // Calculate the overall quality... - float64 quality = - calculateOverallQuality(scoreIterator, statisticVector.gradients_begin(), - statisticVector.hessians_begin(), this->dspmvTmpArray_, n, blas_); - - // Evaluate regularization term... - quality += - calculateRegularizationTerm(scoreIterator, n, l1RegularizationWeight_, l2RegularizationWeight_); - - scoreVector_.quality = quality; - return scoreVector_; - } - }; - - ExampleWiseDynamicPartialRuleEvaluationFactory::ExampleWiseDynamicPartialRuleEvaluationFactory( - float32 threshold, float32 exponent, float64 l1RegularizationWeight, float64 l2RegularizationWeight, - const Blas& blas, const Lapack& lapack) - : threshold_(threshold), exponent_(exponent), l1RegularizationWeight_(l1RegularizationWeight), - l2RegularizationWeight_(l2RegularizationWeight), blas_(blas), lapack_(lapack) {} - - std::unique_ptr> - ExampleWiseDynamicPartialRuleEvaluationFactory::create(const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - return std::make_unique>( - indexVector, threshold_, exponent_, l1RegularizationWeight_, l2RegularizationWeight_, blas_, lapack_); - } - - std::unique_ptr> - ExampleWiseDynamicPartialRuleEvaluationFactory::create(const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, blas_, lapack_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.cpp deleted file mode 100644 index 73b038ac..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.cpp +++ /dev/null @@ -1,127 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_binned.hpp" - -#include "rule_evaluation_example_wise_binned_common.hpp" -#include "rule_evaluation_example_wise_partial_dynamic_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of partial rules that predict for a subset of the available labels that is - * determined dynamically, as well as their overall quality, based on the gradients and Hessians that are stored by - * a `DenseExampleWiseStatisticVector` using L1 and L2 regularization. The labels are assigned to bins based on the - * gradients and Hessians. - * - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should be - * calculated - */ - template - class DenseExampleWiseDynamicPartialBinnedRuleEvaluation final - : public AbstractExampleWiseBinnedRuleEvaluation { - private: - - const IndexVector& labelIndices_; - - const std::unique_ptr indexVectorPtr_; - - const float64 threshold_; - - const float64 exponent_; - - protected: - - uint32 calculateLabelWiseCriteria(const DenseExampleWiseStatisticVector& statisticVector, float64* criteria, - uint32 numCriteria, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) override { - uint32 numLabels = statisticVector.getNumElements(); - DenseExampleWiseStatisticVector::gradient_const_iterator gradientIterator = - statisticVector.gradients_cbegin(); - DenseExampleWiseStatisticVector::hessian_diagonal_const_iterator hessianIterator = - statisticVector.hessians_diagonal_cbegin(); - - const std::pair pair = - getMinAndMaxScore(criteria, gradientIterator, hessianIterator, numLabels, l1RegularizationWeight, - l2RegularizationWeight); - float64 minAbsScore = pair.first; - float64 threshold = calculateThreshold(minAbsScore, pair.second, threshold_, exponent_); - PartialIndexVector::iterator indexIterator = indexVectorPtr_->begin(); - typename IndexVector::const_iterator labelIndexIterator = labelIndices_.cbegin(); - uint32 n = 0; - - for (uint32 i = 0; i < numLabels; i++) { - float64 score = criteria[i]; - - if (calculateWeightedScore(score, minAbsScore, exponent_) > threshold) { - indexIterator[n] = labelIndexIterator[i]; - criteria[n] = score; - n++; - } - } - - indexVectorPtr_->setNumElements(n, false); - return n; - } - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param maxBins The maximum number of bins - * @param indexVectorPtr An unique pointer to an object of type `PartialIndexVector` that stores - * the indices of the labels for which a rule predicts - * @param threshold A threshold that affects for how many labels the rule heads should - * predict - * @param exponent An exponent that is used to weight the estimated predictive quality for - * individual labels - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param binningPtr An unique pointer to an object of type `ILabelBinning` that should be - * used to assign labels to bins - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK - * routines - */ - DenseExampleWiseDynamicPartialBinnedRuleEvaluation( - const IndexVector& labelIndices, uint32 maxBins, std::unique_ptr indexVectorPtr, - float32 threshold, float32 exponent, float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr binningPtr, const Blas& blas, const Lapack& lapack) - : AbstractExampleWiseBinnedRuleEvaluation( - *indexVectorPtr, true, maxBins, l1RegularizationWeight, l2RegularizationWeight, std::move(binningPtr), - blas, lapack), - labelIndices_(labelIndices), indexVectorPtr_(std::move(indexVectorPtr)), threshold_(1.0 - threshold), - exponent_(exponent) {} - }; - - ExampleWiseDynamicPartialBinnedRuleEvaluationFactory::ExampleWiseDynamicPartialBinnedRuleEvaluationFactory( - float32 threshold, float32 exponent, float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr labelBinningFactoryPtr, const Blas& blas, const Lapack& lapack) - : threshold_(threshold), exponent_(exponent), l1RegularizationWeight_(l1RegularizationWeight), - l2RegularizationWeight_(l2RegularizationWeight), labelBinningFactoryPtr_(std::move(labelBinningFactoryPtr)), - blas_(blas), lapack_(lapack) {} - - std::unique_ptr> - ExampleWiseDynamicPartialBinnedRuleEvaluationFactory::create( - const DenseExampleWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const { - uint32 numElements = indexVector.getNumElements(); - std::unique_ptr indexVectorPtr = std::make_unique(numElements); - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - uint32 maxBins = labelBinningPtr->getMaxBins(numElements); - return std::make_unique>( - indexVector, maxBins, std::move(indexVectorPtr), threshold_, exponent_, l1RegularizationWeight_, - l2RegularizationWeight_, std::move(labelBinningPtr), blas_, lapack_); - } - - std::unique_ptr> - ExampleWiseDynamicPartialBinnedRuleEvaluationFactory::create( - const DenseExampleWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - uint32 maxBins = labelBinningPtr->getMaxBins(indexVector.getNumElements()); - return std::make_unique>( - indexVector, maxBins, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr), blas_, - lapack_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_common.hpp deleted file mode 100644 index 6252b150..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_dynamic_common.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_vector_example_wise_dense.hpp" -#include "rule_evaluation_label_wise_partial_dynamic_common.hpp" - -namespace boosting { - - /** - * Determines and returns the minimum and maximum absolute score to be predicted for a label. The scores to be - * predicted for individual labels are also written to a given iterator. - * - * @tparam ScoreIterator The type of the iterator, the scores should be written to - * @param scoreIterator An iterator, the scores should be written to - * @param gradientIterator An iterator that provides access to the gradient for each label - * @param hessianIterator An iterator that provides access to the Hessian for each label - * @param numLabels The total number of available labels - * @param l1RegularizationWeight The l2 regularization weight - * @param l2RegularizationWeight The L1 regularization weight - * @return A `std::pair` that stores the minimum and maximum absolute score - */ - template - static inline std::pair getMinAndMaxScore( - ScoreIterator scoreIterator, DenseExampleWiseStatisticVector::gradient_const_iterator gradientIterator, - DenseExampleWiseStatisticVector::hessian_diagonal_const_iterator hessianIterator, uint32 numLabels, - float64 l1RegularizationWeight, float64 l2RegularizationWeight) { - float64 score = calculateLabelWiseScore(gradientIterator[0], hessianIterator[0], l1RegularizationWeight, - l2RegularizationWeight); - scoreIterator[0] = score; - float64 maxAbsScore = std::abs(score); - float64 minAbsScore = maxAbsScore; - - for (uint32 i = 1; i < numLabels; i++) { - score = calculateLabelWiseScore(gradientIterator[i], hessianIterator[i], l1RegularizationWeight, - l2RegularizationWeight); - scoreIterator[i] = score; - score = std::abs(score); - - if (score > maxAbsScore) { - maxAbsScore = score; - } else if (score < minAbsScore) { - minAbsScore = score; - } - } - - return std::make_pair(minAbsScore, maxAbsScore); - } -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.cpp deleted file mode 100644 index 2e8810d7..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.cpp +++ /dev/null @@ -1,137 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed.hpp" - -#include "rule_evaluation_example_wise_complete_common.hpp" -#include "rule_evaluation_example_wise_partial_common.hpp" -#include "rule_evaluation_example_wise_partial_fixed_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of partial rules that predict for a predefined number of labels, as well as - * their overall quality, based on the gradients and Hessians that are stored by a `DenseExampleWiseStatisticVector` - * using L1 and L2 regularization. - * - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should be - * calculated - */ - template - class DenseExampleWiseFixedPartialRuleEvaluation final - : public AbstractExampleWiseRuleEvaluation { - private: - - const IndexVector& labelIndices_; - - PartialIndexVector indexVector_; - - DenseScoreVector scoreVector_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const Blas& blas_; - - const Lapack& lapack_; - - SparseArrayVector tmpVector_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param numPredictions The number of labels for which the rules should predict - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK - * routines - */ - DenseExampleWiseFixedPartialRuleEvaluation(const IndexVector& labelIndices, uint32 numPredictions, - float64 l1RegularizationWeight, float64 l2RegularizationWeight, - const Blas& blas, const Lapack& lapack) - : AbstractExampleWiseRuleEvaluation(numPredictions, - lapack), - labelIndices_(labelIndices), indexVector_(PartialIndexVector(numPredictions)), - scoreVector_(DenseScoreVector(indexVector_, false)), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), - blas_(blas), lapack_(lapack), tmpVector_(SparseArrayVector(labelIndices.getNumElements())) {} - - /** - * @see `IRuleEvaluation::evaluate` - */ - const IScoreVector& calculateScores(DenseExampleWiseStatisticVector& statisticVector) override { - uint32 numLabels = statisticVector.getNumElements(); - uint32 numPredictions = indexVector_.getNumElements(); - DenseExampleWiseStatisticVector::gradient_const_iterator gradientIterator = - statisticVector.gradients_cbegin(); - DenseExampleWiseStatisticVector::hessian_diagonal_const_iterator hessianIterator = - statisticVector.hessians_diagonal_cbegin(); - SparseArrayVector::iterator tmpIterator = tmpVector_.begin(); - sortLabelWiseCriteria(tmpIterator, gradientIterator, hessianIterator, numLabels, numPredictions, - l1RegularizationWeight_, l2RegularizationWeight_); - - // Copy gradients to the vector of ordinates and add the L1 regularization weight... - PartialIndexVector::iterator indexIterator = indexVector_.begin(); - typename DenseScoreVector::score_iterator scoreIterator = scoreVector_.scores_begin(); - typename IndexVector::const_iterator labelIndexIterator = labelIndices_.cbegin(); - - for (uint32 i = 0; i < numPredictions; i++) { - const IndexedValue& entry = tmpIterator[i]; - uint32 index = entry.index; - indexIterator[i] = labelIndexIterator[index]; - scoreIterator[i] = -gradientIterator[index]; - } - - addL1RegularizationWeight(scoreIterator, numPredictions, l1RegularizationWeight_); - - // Copy Hessians to the matrix of coefficients and add the L2 regularization weight to its diagonal... - copyCoefficients(statisticVector.hessians_cbegin(), indexIterator, this->dsysvTmpArray1_, - numPredictions); - addL2RegularizationWeight(this->dsysvTmpArray1_, numPredictions, l2RegularizationWeight_); - - // Calculate the scores to be predicted for individual labels by solving a system of linear equations... - lapack_.dsysv(this->dsysvTmpArray1_, this->dsysvTmpArray2_, this->dsysvTmpArray3_, scoreIterator, - numPredictions, this->dsysvLwork_); - - // Calculate the overall quality... - float64 quality = calculateOverallQuality(scoreIterator, statisticVector.gradients_begin(), - statisticVector.hessians_begin(), this->dspmvTmpArray_, - numPredictions, blas_); - - // Evaluate regularization term... - quality += calculateRegularizationTerm(scoreIterator, numPredictions, l1RegularizationWeight_, - l2RegularizationWeight_); - - scoreVector_.quality = quality; - return scoreVector_; - } - }; - - ExampleWiseFixedPartialRuleEvaluationFactory::ExampleWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, float64 l1RegularizationWeight, - float64 l2RegularizationWeight, const Blas& blas, const Lapack& lapack) - : labelRatio_(labelRatio), minLabels_(minLabels), maxLabels_(maxLabels), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), blas_(blas), - lapack_(lapack) {} - - std::unique_ptr> - ExampleWiseFixedPartialRuleEvaluationFactory::create(const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - uint32 numPredictions = - calculateBoundedFraction(indexVector.getNumElements(), labelRatio_, minLabels_, maxLabels_); - return std::make_unique>( - indexVector, numPredictions, l1RegularizationWeight_, l2RegularizationWeight_, blas_, lapack_); - } - - std::unique_ptr> - ExampleWiseFixedPartialRuleEvaluationFactory::create(const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, blas_, lapack_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.cpp deleted file mode 100644 index 596d93ef..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_binned.hpp" - -#include "rule_evaluation_example_wise_binned_common.hpp" -#include "rule_evaluation_example_wise_partial_fixed_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of partial rules that predict for a predefined number of labels, as well as - * their overall quality, based on the gradients and Hessians that are stored by a `DenseExampleWiseStatisticVector` - * using L1 and L2 regularization. The labels are assigned to bins based on the gradients and Hessians. - * - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should be - * calculated - */ - template - class DenseExampleWiseFixedPartialBinnedRuleEvaluation final - : public AbstractExampleWiseBinnedRuleEvaluation { - private: - - const IndexVector& labelIndices_; - - const std::unique_ptr indexVectorPtr_; - - SparseArrayVector tmpVector_; - - protected: - - uint32 calculateLabelWiseCriteria(const DenseExampleWiseStatisticVector& statisticVector, float64* criteria, - uint32 numCriteria, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) override { - uint32 numLabels = statisticVector.getNumElements(); - uint32 numPredictions = indexVectorPtr_->getNumElements(); - DenseExampleWiseStatisticVector::gradient_const_iterator gradientIterator = - statisticVector.gradients_cbegin(); - DenseExampleWiseStatisticVector::hessian_diagonal_const_iterator hessianIterator = - statisticVector.hessians_diagonal_cbegin(); - SparseArrayVector::iterator tmpIterator = tmpVector_.begin(); - sortLabelWiseCriteria(tmpIterator, gradientIterator, hessianIterator, numLabels, numPredictions, - l1RegularizationWeight, l2RegularizationWeight); - PartialIndexVector::iterator indexIterator = indexVectorPtr_->begin(); - typename IndexVector::const_iterator labelIndexIterator = labelIndices_.cbegin(); - - for (uint32 i = 0; i < numCriteria; i++) { - const IndexedValue& entry = tmpIterator[i]; - indexIterator[i] = labelIndexIterator[entry.index]; - criteria[i] = entry.value; - } - - return numCriteria; - } - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param maxBins The maximum number of bins - * @param indexVectorPtr An unique pointer to an object of type `PartialIndexVector` that stores - * the indices of the labels for which a rule predicts - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param binningPtr An unique pointer to an object of type `ILabelBinning` that should be - * used to assign labels to bins - * @param blas A reference to an object of type `Blas` that allows to execute BLAS - * routines - * @param lapack A reference to an object of type `Lapack` that allows to execute LAPACK - * routines - */ - DenseExampleWiseFixedPartialBinnedRuleEvaluation(const IndexVector& labelIndices, uint32 maxBins, - std::unique_ptr indexVectorPtr, - float64 l1RegularizationWeight, - float64 l2RegularizationWeight, - std::unique_ptr binningPtr, - const Blas& blas, const Lapack& lapack) - : AbstractExampleWiseBinnedRuleEvaluation( - *indexVectorPtr, false, maxBins, l1RegularizationWeight, l2RegularizationWeight, - std::move(binningPtr), blas, lapack), - labelIndices_(labelIndices), indexVectorPtr_(std::move(indexVectorPtr)), - tmpVector_(SparseArrayVector(labelIndices.getNumElements())) {} - }; - - ExampleWiseFixedPartialBinnedRuleEvaluationFactory::ExampleWiseFixedPartialBinnedRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, float64 l1RegularizationWeight, - float64 l2RegularizationWeight, std::unique_ptr labelBinningFactoryPtr, const Blas& blas, - const Lapack& lapack) - : labelRatio_(labelRatio), minLabels_(minLabels), maxLabels_(maxLabels), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), - labelBinningFactoryPtr_(std::move(labelBinningFactoryPtr)), blas_(blas), lapack_(lapack) {} - - std::unique_ptr> - ExampleWiseFixedPartialBinnedRuleEvaluationFactory::create(const DenseExampleWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - uint32 numPredictions = - calculateBoundedFraction(statisticVector.getNumElements(), labelRatio_, minLabels_, maxLabels_); - std::unique_ptr indexVectorPtr = std::make_unique(numPredictions); - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - uint32 maxBins = labelBinningPtr->getMaxBins(numPredictions); - return std::make_unique>( - indexVector, maxBins, std::move(indexVectorPtr), l1RegularizationWeight_, l2RegularizationWeight_, - std::move(labelBinningPtr), blas_, lapack_); - } - - std::unique_ptr> - ExampleWiseFixedPartialBinnedRuleEvaluationFactory::create(const DenseExampleWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - uint32 maxBins = labelBinningPtr->getMaxBins(indexVector.getNumElements()); - return std::make_unique>( - indexVector, maxBins, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr), blas_, - lapack_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_common.hpp deleted file mode 100644 index 6b9f9787..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_example_wise_partial_fixed_common.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_vector_example_wise_dense.hpp" -#include "rule_evaluation_label_wise_partial_fixed_common.hpp" - -namespace boosting { - - /** - * Calculates scores that assess the quality of optimal predictions for each label and sorts them, such that the - * first `numPredictions` elements are the best-rated ones. - * - * @param tmpIterator An iterator that provides random access to a temporary array, which should be - * used to store the sorted scores and their original indices - * @param gradientIterator An iterator that provides access to the gradient for each label - * @param hessianIterator An iterator that provides access to the Hessian for each label - * @param numLabels The total number of available labels - * @param numPrediction The number of the best-rated predictions to be determined - * @param l1RegularizationWeight The l2 regularization weight - * @param l2RegularizationWeight The L1 regularization weight - */ - static inline void sortLabelWiseCriteria( - SparseArrayVector::iterator tmpIterator, - DenseExampleWiseStatisticVector::gradient_const_iterator gradientIterator, - DenseExampleWiseStatisticVector::hessian_diagonal_const_iterator hessianIterator, uint32 numLabels, - uint32 numPredictions, float64 l1RegularizationWeight, float64 l2RegularizationWeight) { - for (uint32 i = 0; i < numLabels; i++) { - IndexedValue& entry = tmpIterator[i]; - entry.index = i; - entry.value = calculateLabelWiseScore(gradientIterator[i], hessianIterator[i], l1RegularizationWeight, - l2RegularizationWeight); - } - - std::partial_sort(tmpIterator, &tmpIterator[numPredictions], &tmpIterator[numLabels], - CompareLabelWiseCriteria()); - } - -} \ No newline at end of file diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_binned_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_binned_common.hpp deleted file mode 100644 index 9c12d694..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_binned_common.hpp +++ /dev/null @@ -1,215 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_vector_label_wise_dense.hpp" -#include "common/data/arrays.hpp" -#include "common/rule_evaluation/score_vector_binned_dense.hpp" -#include "rule_evaluation_label_wise_common.hpp" - -namespace boosting { - - /** - * Calculates the score to be predicted for individual bins and returns the overall quality of the predictions. - * - * @tparam ScoreIterator The type of the iterator that provides access to the gradients and Hessians - * @param statisticIterator An iterator that provides random access to the gradients and Hessians - * @param scoreIterator An iterator, the calculated scores should be written to - * @param weights An iterator that provides access to the weights of individual bins - * @param numElements The number of bins - * @param l1RegularizationWeight The L1 regularization weight - * @param l2RegularizationWeight The L2 regularization weight - * @return The overall quality that has been calculated - */ - template - static inline float64 calculateBinnedScores(DenseLabelWiseStatisticVector::const_iterator statisticIterator, - ScoreIterator scoreIterator, const uint32* weights, uint32 numElements, - float64 l1RegularizationWeight, float64 l2RegularizationWeight) { - float64 quality = 0; - - for (uint32 i = 0; i < numElements; i++) { - uint32 weight = weights[i]; - const Tuple& tuple = statisticIterator[i]; - float64 predictedScore = calculateLabelWiseScore(tuple.first, tuple.second, weight * l1RegularizationWeight, - weight * l2RegularizationWeight); - scoreIterator[i] = predictedScore; - quality += calculateLabelWiseQuality(predictedScore, tuple.first, tuple.second, - weight * l1RegularizationWeight, weight * l2RegularizationWeight); - } - - return quality; - } - - /** - * An abstract base class for all classes that allow to calculate the predictions of rules, as well as their overall - * quality, based on the gradients and Hessians that have been calculated according to a loss function that is - * applied label-wise and using gradient-based label binning. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class AbstractLabelWiseBinnedRuleEvaluation : public IRuleEvaluation { - private: - - const uint32 maxBins_; - - DenseBinnedScoreVector scoreVector_; - - DenseLabelWiseStatisticVector aggregatedStatisticVector_; - - uint32* numElementsPerBin_; - - float64* criteria_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - const std::unique_ptr binningPtr_; - - protected: - - /** - * Must be implemented by subclasses in order to calculate label-wise criteria that are used to determine - * the mapping from labels to bins. - * - * @param statisticVector A reference to an object of template type `StatisticVector` that stores - * the gradients and Hessians - * @param criteria A pointer to an array of type `float64`, shape `(numCriteria)`, the - * label-wise criteria should be written to - * @param numCriteria The number of label-wise criteria to be calculated - * @param l1RegularizationWeight The L1 regularization weight - * @param l2RegularizationWeight The L2 regularization weight - * @return The number of label-wise criteria that have been calculated - */ - virtual uint32 calculateLabelWiseCriteria(const StatisticVector& statisticVector, float64* criteria, - uint32 numCriteria, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) = 0; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param indicesSorted True, if the given indices are guaranteed to be sorted, false otherwise - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param binningPtr An unique pointer to an object of type `ILabelBinning` that should be - * used to assign labels to bins - */ - AbstractLabelWiseBinnedRuleEvaluation(const IndexVector& labelIndices, bool indicesSorted, - float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr binningPtr) - : maxBins_(binningPtr->getMaxBins(labelIndices.getNumElements())), - scoreVector_(DenseBinnedScoreVector(labelIndices, maxBins_ + 1, indicesSorted)), - aggregatedStatisticVector_(DenseLabelWiseStatisticVector(maxBins_)), - numElementsPerBin_(new uint32[maxBins_]), criteria_(new float64[labelIndices.getNumElements()]), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), - binningPtr_(std::move(binningPtr)) { - // The last bin is used for labels for which the corresponding criterion is zero. For this particular - // bin, the prediction is always zero. - scoreVector_.scores_binned_begin()[maxBins_] = 0; - } - - virtual ~AbstractLabelWiseBinnedRuleEvaluation() override { - delete[] numElementsPerBin_; - delete[] criteria_; - } - - /** - * @see `IRuleEvaluation::evaluate` - */ - const IScoreVector& calculateScores(StatisticVector& statisticVector) override final { - // Calculate label-wise criteria... - uint32 numCriteria = - this->calculateLabelWiseCriteria(statisticVector, criteria_, scoreVector_.getNumElements(), - l1RegularizationWeight_, l2RegularizationWeight_); - - // Obtain information about the bins to be used... - LabelInfo labelInfo = binningPtr_->getLabelInfo(criteria_, numCriteria); - uint32 numBins = labelInfo.numPositiveBins + labelInfo.numNegativeBins; - scoreVector_.setNumBins(numBins, false); - - // Reset arrays to zero... - DenseLabelWiseStatisticVector::iterator aggregatedStatisticIterator = - aggregatedStatisticVector_.begin(); - setArrayToZeros(aggregatedStatisticIterator, numBins); - setArrayToZeros(numElementsPerBin_, numBins); - - // Apply binning method in order to aggregate the gradients and Hessians that belong to the same bins... - typename StatisticVector::const_iterator statisticIterator = statisticVector.cbegin(); - typename DenseBinnedScoreVector::index_binned_iterator binIndexIterator = - scoreVector_.indices_binned_begin(); - auto callback = [=](uint32 binIndex, uint32 labelIndex) { - aggregatedStatisticIterator[binIndex] += statisticIterator[labelIndex]; - numElementsPerBin_[binIndex] += 1; - binIndexIterator[labelIndex] = binIndex; - }; - auto zeroCallback = [=](uint32 labelIndex) { - binIndexIterator[labelIndex] = maxBins_; - }; - binningPtr_->createBins(labelInfo, criteria_, numCriteria, callback, zeroCallback); - - // Compute predictions, as well as their overall quality... - typename DenseBinnedScoreVector::score_binned_iterator scoreIterator = - scoreVector_.scores_binned_begin(); - scoreVector_.quality = - calculateBinnedScores(aggregatedStatisticIterator, scoreIterator, numElementsPerBin_, numBins, - l1RegularizationWeight_, l2RegularizationWeight_); - return scoreVector_; - } - }; - - /** - * Allows to calculate the predictions of complete rules, as well as their overall quality, based on the gradients - * Hessians that are stored by a vector using L1 and L2 regularization. The labels are assigned to bins based on the - * gradients and Hessians. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class LabelWiseCompleteBinnedRuleEvaluation final - : public AbstractLabelWiseBinnedRuleEvaluation { - protected: - - uint32 calculateLabelWiseCriteria(const StatisticVector& statisticVector, float64* criteria, - uint32 numCriteria, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) override { - typename StatisticVector::const_iterator statisticIterator = statisticVector.cbegin(); - - for (uint32 i = 0; i < numCriteria; i++) { - const Tuple& tuple = statisticIterator[i]; - criteria[i] = calculateLabelWiseScore(tuple.first, tuple.second, l1RegularizationWeight, - l2RegularizationWeight); - } - - return numCriteria; - } - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param binningPtr An unique pointer to an object of type `ILabelBinning` that should be - * used to assign labels to bins - */ - LabelWiseCompleteBinnedRuleEvaluation(const IndexVector& labelIndices, float64 l1RegularizationWeight, - float64 l2RegularizationWeight, - std::unique_ptr binningPtr) - : AbstractLabelWiseBinnedRuleEvaluation( - labelIndices, true, l1RegularizationWeight, l2RegularizationWeight, std::move(binningPtr)) {} - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_common.hpp deleted file mode 100644 index b9fc8560..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_common.hpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/math/math.hpp" - -namespace boosting { - - /** - * Returns the L1 regularization weight to be added to a specific gradient. - * - * @param gradient The gradient, the L1 regularization weight should be added to - * @param l1RegularizationWeight The L1 regularization weight - * @return The L1 regularization weight to be added to the gradient - */ - static inline constexpr float64 getL1RegularizationWeight(float64 gradient, float64 l1RegularizationWeight) { - if (gradient > l1RegularizationWeight) { - return -l1RegularizationWeight; - } else if (gradient < -l1RegularizationWeight) { - return l1RegularizationWeight; - } else { - return 0; - } - } - - /** - * Calculates and returns the optimal score to be predicted for a single label, based on the corresponding gradient - * and Hessian and taking L1 and L2 regularization into account. - * - * @param gradient The gradient that corresponds to the label - * @param hessian The Hessian that corresponds to the label - * @param l1RegularizationWeight The weight of the L1 regularization - * @param l2RegularizationWeight The weight of the L2 regularization - * @return The predicted score that has been calculated - */ - static inline constexpr float64 calculateLabelWiseScore(float64 gradient, float64 hessian, - float64 l1RegularizationWeight, - float64 l2RegularizationWeight) { - return divideOrZero(-gradient + getL1RegularizationWeight(gradient, l1RegularizationWeight), - hessian + l2RegularizationWeight); - } - - /** - * Calculates and returns the quality of the prediction for a single label, taking L1 and L2 regularization into - * account. - * - * @param score The predicted score - * @param gradient The gradient - * @param hessian The Hessian - * @param l1RegularizationWeight The weight of the L1 regularization - * @param l2RegularizationWeight The weight of the L2 regularization - * @return The quality that has been calculated - */ - static inline float64 calculateLabelWiseQuality(float64 score, float64 gradient, float64 hessian, - float64 l1RegularizationWeight, float64 l2RegularizationWeight) { - float64 scorePow = score * score; - float64 quality = (gradient * score) + (0.5 * hessian * scorePow); - float64 l1RegularizationTerm = l1RegularizationWeight * std::abs(score); - float64 l2RegularizationTerm = 0.5 * l2RegularizationWeight * scorePow; - return quality + l1RegularizationTerm + l2RegularizationTerm; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete.cpp deleted file mode 100644 index 1f36f164..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete.cpp +++ /dev/null @@ -1,23 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_label_wise_complete.hpp" - -#include "rule_evaluation_label_wise_complete_common.hpp" - -namespace boosting { - - LabelWiseCompleteRuleEvaluationFactory::LabelWiseCompleteRuleEvaluationFactory(float64 l1RegularizationWeight, - float64 l2RegularizationWeight) - : l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight) {} - - std::unique_ptr> LabelWiseCompleteRuleEvaluationFactory::create( - const DenseLabelWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> LabelWiseCompleteRuleEvaluationFactory::create( - const DenseLabelWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - -} \ No newline at end of file diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.cpp deleted file mode 100644 index 2b8d0204..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_label_wise_complete_binned.hpp" - -#include "rule_evaluation_label_wise_binned_common.hpp" - -namespace boosting { - - LabelWiseCompleteBinnedRuleEvaluationFactory::LabelWiseCompleteBinnedRuleEvaluationFactory( - float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr labelBinningFactoryPtr) - : l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), - labelBinningFactoryPtr_(std::move(labelBinningFactoryPtr)) {} - - std::unique_ptr> - LabelWiseCompleteBinnedRuleEvaluationFactory::create(const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseCompleteBinnedRuleEvaluation>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr)); - } - - std::unique_ptr> - LabelWiseCompleteBinnedRuleEvaluationFactory::create(const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseCompleteBinnedRuleEvaluation>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr)); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete_common.hpp deleted file mode 100644 index 7e447f90..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_complete_common.hpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_evaluation/score_vector_dense.hpp" -#include "rule_evaluation_label_wise_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of complete rules, as well as their overall quality, based on the gradients - * and Hessians that are stored by a vector using L1 and L2 regularization. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class LabelWiseCompleteRuleEvaluation final : public IRuleEvaluation { - private: - - DenseScoreVector scoreVector_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - */ - LabelWiseCompleteRuleEvaluation(const IndexVector& labelIndices, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) - : scoreVector_(DenseScoreVector(labelIndices, true)), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight) {} - - const IScoreVector& calculateScores(StatisticVector& statisticVector) override { - uint32 numElements = statisticVector.getNumElements(); - typename StatisticVector::const_iterator statisticIterator = statisticVector.cbegin(); - typename DenseScoreVector::score_iterator scoreIterator = scoreVector_.scores_begin(); - float64 quality = 0; - - for (uint32 i = 0; i < numElements; i++) { - const Tuple& tuple = statisticIterator[i]; - float64 predictedScore = calculateLabelWiseScore(tuple.first, tuple.second, l1RegularizationWeight_, - l2RegularizationWeight_); - scoreIterator[i] = predictedScore; - quality += calculateLabelWiseQuality(predictedScore, tuple.first, tuple.second, - l1RegularizationWeight_, l2RegularizationWeight_); - } - - scoreVector_.quality = quality; - return scoreVector_; - } - }; - -} \ No newline at end of file diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.cpp deleted file mode 100644 index 4b4da491..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.cpp +++ /dev/null @@ -1,124 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic.hpp" - -#include "rule_evaluation_label_wise_complete_common.hpp" -#include "rule_evaluation_label_wise_partial_dynamic_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of partial rules, which predict for a subset of the available labels that is - * determined dynamically, as well as their overall quality, based on the gradients and Hessians that are stored by - * a vector using L1 and L2 regularization. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class LabelWiseDynamicPartialRuleEvaluation final : public IRuleEvaluation { - private: - - const IndexVector& labelIndices_; - - PartialIndexVector indexVector_; - - DenseScoreVector scoreVector_; - - const float64 threshold_; - - const float64 exponent_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param threshold A threshold that affects for how many labels the rule heads should - * predict - * @param exponent An exponent that is used to weigh that estimated predictive quality for - * individual labels - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - */ - LabelWiseDynamicPartialRuleEvaluation(const IndexVector& labelIndices, float32 threshold, float32 exponent, - float64 l1RegularizationWeight, float64 l2RegularizationWeight) - : labelIndices_(labelIndices), indexVector_(PartialIndexVector(labelIndices.getNumElements())), - scoreVector_(DenseScoreVector(indexVector_, true)), threshold_(1.0 - threshold), - exponent_(exponent), l1RegularizationWeight_(l1RegularizationWeight), - l2RegularizationWeight_(l2RegularizationWeight) {} - - const IScoreVector& calculateScores(StatisticVector& statisticVector) override { - uint32 numElements = statisticVector.getNumElements(); - typename StatisticVector::const_iterator statisticIterator = statisticVector.cbegin(); - const std::pair pair = - getMinAndMaxScore(statisticIterator, numElements, l1RegularizationWeight_, l2RegularizationWeight_); - float64 minAbsScore = pair.first; - float64 threshold = calculateThreshold(minAbsScore, pair.second, threshold_, exponent_); - PartialIndexVector::iterator indexIterator = indexVector_.begin(); - DenseScoreVector::score_iterator scoreIterator = scoreVector_.scores_begin(); - typename IndexVector::const_iterator labelIndexIterator = labelIndices_.cbegin(); - float64 quality = 0; - uint32 n = 0; - - for (uint32 i = 0; i < numElements; i++) { - const Tuple& tuple = statisticIterator[i]; - float64 score = calculateLabelWiseScore(tuple.first, tuple.second, l1RegularizationWeight_, - l2RegularizationWeight_); - - if (calculateWeightedScore(score, minAbsScore, exponent_) > threshold) { - indexIterator[n] = labelIndexIterator[i]; - scoreIterator[n] = score; - quality += calculateLabelWiseQuality(score, tuple.first, tuple.second, l1RegularizationWeight_, - l2RegularizationWeight_); - n++; - } - } - - indexVector_.setNumElements(n, false); - scoreVector_.quality = quality; - return scoreVector_; - } - }; - - LabelWiseDynamicPartialRuleEvaluationFactory::LabelWiseDynamicPartialRuleEvaluationFactory( - float32 threshold, float32 exponent, float64 l1RegularizationWeight, float64 l2RegularizationWeight) - : threshold_(threshold), exponent_(exponent), l1RegularizationWeight_(l1RegularizationWeight), - l2RegularizationWeight_(l2RegularizationWeight) {} - - std::unique_ptr> - LabelWiseDynamicPartialRuleEvaluationFactory::create(const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - return std::make_unique< - LabelWiseDynamicPartialRuleEvaluation>( - indexVector, threshold_, exponent_, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> - LabelWiseDynamicPartialRuleEvaluationFactory::create(const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> - LabelWiseDynamicPartialRuleEvaluationFactory::create(const SparseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - return std::make_unique< - LabelWiseDynamicPartialRuleEvaluation>( - indexVector, threshold_, exponent_, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> - LabelWiseDynamicPartialRuleEvaluationFactory::create(const SparseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.cpp deleted file mode 100644 index 4a2a5b92..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.cpp +++ /dev/null @@ -1,138 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_binned.hpp" - -#include "rule_evaluation_label_wise_binned_common.hpp" -#include "rule_evaluation_label_wise_partial_dynamic_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of partial rules that predict for a subset of the available labels that is - * determined dynamically, as well as their overall quality, based on the gradients and Hessians that are stored by - * a vector using L1 and L2 regularization. The labels are assigned to bins based on the gradients and Hessians. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class LabelWiseDynamicPartialBinnedRuleEvaluation final - : public AbstractLabelWiseBinnedRuleEvaluation { - private: - - const IndexVector& labelIndices_; - - const std::unique_ptr indexVectorPtr_; - - const float64 threshold_; - - const float64 exponent_; - - protected: - - uint32 calculateLabelWiseCriteria(const StatisticVector& statisticVector, float64* criteria, - uint32 numCriteria, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) override { - uint32 numElements = statisticVector.getNumElements(); - typename StatisticVector::const_iterator statisticIterator = statisticVector.cbegin(); - const std::pair pair = - getMinAndMaxScore(statisticIterator, numElements, l1RegularizationWeight, l2RegularizationWeight); - float64 minAbsScore = pair.first; - float64 threshold = calculateThreshold(minAbsScore, pair.second, threshold_, exponent_); - PartialIndexVector::iterator indexIterator = indexVectorPtr_->begin(); - typename IndexVector::const_iterator labelIndexIterator = labelIndices_.cbegin(); - uint32 n = 0; - - for (uint32 i = 0; i < numElements; i++) { - const Tuple& tuple = statisticIterator[i]; - float64 score = calculateLabelWiseScore(tuple.first, tuple.second, l1RegularizationWeight, - l2RegularizationWeight); - - if (calculateWeightedScore(score, minAbsScore, exponent_) > threshold) { - indexIterator[n] = labelIndexIterator[i]; - criteria[n] = score; - n++; - } - } - - indexVectorPtr_->setNumElements(n, false); - return n; - } - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param indexVectorPtr An unique pointer to an object of type `PartialIndexVector` that stores - * the indices of the labels for which a rule predicts - * @param threshold A threshold that affects for how many labels the rule heads should - * predict - * @param exponent An exponent that is used to weigh the estimated predictive quality for - * individual labels - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param binningPtr An unique pointer to an object of type `ILabelBinning` that should be - * used to assign labels to bins - */ - LabelWiseDynamicPartialBinnedRuleEvaluation(const IndexVector& labelIndices, - std::unique_ptr indexVectorPtr, - float32 threshold, float32 exponent, - float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr binningPtr) - : AbstractLabelWiseBinnedRuleEvaluation( - *indexVectorPtr, true, l1RegularizationWeight, l2RegularizationWeight, std::move(binningPtr)), - labelIndices_(labelIndices), indexVectorPtr_(std::move(indexVectorPtr)), threshold_(1.0 - threshold), - exponent_(exponent) {} - }; - - LabelWiseDynamicPartialBinnedRuleEvaluationFactory::LabelWiseDynamicPartialBinnedRuleEvaluationFactory( - float32 threshold, float32 exponent, float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr labelBinningFactoryPtr) - : threshold_(threshold), exponent_(exponent), l1RegularizationWeight_(l1RegularizationWeight), - l2RegularizationWeight_(l2RegularizationWeight), labelBinningFactoryPtr_(std::move(labelBinningFactoryPtr)) {} - - std::unique_ptr> - LabelWiseDynamicPartialBinnedRuleEvaluationFactory::create(const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - std::unique_ptr indexVectorPtr = - std::make_unique(indexVector.getNumElements()); - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseDynamicPartialBinnedRuleEvaluation>( - indexVector, std::move(indexVectorPtr), threshold_, exponent_, l1RegularizationWeight_, - l2RegularizationWeight_, std::move(labelBinningPtr)); - } - - std::unique_ptr> - LabelWiseDynamicPartialBinnedRuleEvaluationFactory::create(const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseCompleteBinnedRuleEvaluation>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr)); - } - - std::unique_ptr> - LabelWiseDynamicPartialBinnedRuleEvaluationFactory::create(const SparseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - std::unique_ptr indexVectorPtr = - std::make_unique(indexVector.getNumElements()); - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseDynamicPartialBinnedRuleEvaluation>( - indexVector, std::move(indexVectorPtr), threshold_, exponent_, l1RegularizationWeight_, - l2RegularizationWeight_, std::move(labelBinningPtr)); - } - - std::unique_ptr> - LabelWiseDynamicPartialBinnedRuleEvaluationFactory::create(const SparseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseCompleteBinnedRuleEvaluation>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr)); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_common.hpp deleted file mode 100644 index a3369737..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_dynamic_common.hpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_vector_label_wise_dense.hpp" -#include "rule_evaluation_label_wise_common.hpp" - -#include - -namespace boosting { - - /** - * Determines and returns the minimum and maximum absolute score to be predicted for a label. - * - * @tparam StatisticIterator The type of the iterator that provides access to the gradients and Hessians - * @param statisticIterator An iterator that provides access to the gradients and Hessians for each label - * @param numLabels The total number of available labels - * @param l1RegularizationWeight The l2 regularization weight - * @param l2RegularizationWeight The L1 regularization weight - * @return A `std::pair` that stores the minimum and maximum absolute score - */ - template - static inline std::pair getMinAndMaxScore(StatisticIterator& statisticIterator, uint32 numLabels, - float64 l1RegularizationWeight, - float64 l2RegularizationWeight) { - const Tuple& firstTuple = statisticIterator[0]; - float64 maxAbsScore = std::abs( - calculateLabelWiseScore(firstTuple.first, firstTuple.second, l1RegularizationWeight, l2RegularizationWeight)); - float64 minAbsScore = maxAbsScore; - - for (uint32 i = 1; i < numLabels; i++) { - const Tuple& tuple = statisticIterator[i]; - float64 absScore = std::abs( - calculateLabelWiseScore(tuple.first, tuple.second, l1RegularizationWeight, l2RegularizationWeight)); - - if (absScore > maxAbsScore) { - maxAbsScore = absScore; - } else if (absScore < minAbsScore) { - minAbsScore = absScore; - } - } - - return std::make_pair(minAbsScore, maxAbsScore); - } - - /** - * Calculates and returns the threshold that should be used to decide whether a rule should predict for a label or - * not. - * - * @param minAbsScore The minimum absolute score to be predicted for a label - * @param maxAbsScore The maximum absolute score to be predicted for a label - * @param threshold A threshold that affects for how many labels the rule heads should predict - * @param exponent An exponent that is used to weigh the estimated predictive quality for individual labels - * @return The threshold that has been calculated - */ - static inline float64 calculateThreshold(float64 minAbsScore, float64 maxAbsScore, float64 threshold, - float64 exponent) { - return std::pow(maxAbsScore - minAbsScore, exponent) * threshold; - } - - /** - * Weighs and returns the score that is predicted for a particular label, depending on the minimum absolute score - * that has been determined via the function `getMinMaxScore` and a given exponent. - * - * @param score The score to be predicted - * @param minAbsScore The minimum absolute score to be predicted for a label - * @param exponent An exponent that is used to weigh the estimated predictive quality for individual labels - * @return The weighted score that has been calculated - */ - static inline float64 calculateWeightedScore(float64 score, float64 minAbsScore, float64 exponent) { - return std::pow(std::abs(score) - minAbsScore, exponent); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.cpp deleted file mode 100644 index cbebf63e..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.cpp +++ /dev/null @@ -1,115 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed.hpp" - -#include "rule_evaluation_label_wise_complete_common.hpp" -#include "rule_evaluation_label_wise_partial_fixed_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of partial rules that predict for a predefined number of labels, as well as - * their overall quality, based on the gradients and Hessians that are stored by a vector using L1 and L2 - * regularization. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class LabelWiseFixedPartialRuleEvaluation final : public IRuleEvaluation { - private: - - const IndexVector& labelIndices_; - - PartialIndexVector indexVector_; - - DenseScoreVector scoreVector_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - SparseArrayVector tmpVector_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param numPredictions The number of labels for which the rules should predict - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - */ - LabelWiseFixedPartialRuleEvaluation(const IndexVector& labelIndices, uint32 numPredictions, - float64 l1RegularizationWeight, float64 l2RegularizationWeight) - : labelIndices_(labelIndices), indexVector_(PartialIndexVector(numPredictions)), - scoreVector_(DenseScoreVector(indexVector_, false)), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), - tmpVector_(SparseArrayVector(labelIndices.getNumElements())) {} - - const IScoreVector& calculateScores(StatisticVector& statisticVector) override { - uint32 numElements = statisticVector.getNumElements(); - uint32 numPredictions = indexVector_.getNumElements(); - typename StatisticVector::const_iterator statisticIterator = statisticVector.cbegin(); - SparseArrayVector::iterator tmpIterator = tmpVector_.begin(); - sortLabelWiseScores(tmpIterator, statisticIterator, numElements, numPredictions, - l1RegularizationWeight_, l2RegularizationWeight_); - PartialIndexVector::iterator indexIterator = indexVector_.begin(); - DenseScoreVector::score_iterator scoreIterator = scoreVector_.scores_begin(); - typename IndexVector::const_iterator labelIndexIterator = labelIndices_.cbegin(); - float64 quality = 0; - - for (uint32 i = 0; i < numPredictions; i++) { - const IndexedValue& entry = tmpIterator[i]; - uint32 index = entry.index; - float64 predictedScore = entry.value; - indexIterator[i] = labelIndexIterator[index]; - scoreIterator[i] = predictedScore; - const Tuple& tuple = statisticIterator[index]; - quality += calculateLabelWiseQuality(predictedScore, tuple.first, tuple.second, - l1RegularizationWeight_, l2RegularizationWeight_); - } - - scoreVector_.quality = quality; - return scoreVector_; - } - }; - - LabelWiseFixedPartialRuleEvaluationFactory::LabelWiseFixedPartialRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) - : labelRatio_(labelRatio), minLabels_(minLabels), maxLabels_(maxLabels), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight) {} - - std::unique_ptr> LabelWiseFixedPartialRuleEvaluationFactory::create( - const DenseLabelWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const { - uint32 numPredictions = - calculateBoundedFraction(indexVector.getNumElements(), labelRatio_, minLabels_, maxLabels_); - return std::make_unique< - LabelWiseFixedPartialRuleEvaluation>( - indexVector, numPredictions, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> LabelWiseFixedPartialRuleEvaluationFactory::create( - const DenseLabelWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> LabelWiseFixedPartialRuleEvaluationFactory::create( - const SparseLabelWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const { - uint32 numPredictions = - calculateBoundedFraction(indexVector.getNumElements(), labelRatio_, minLabels_, maxLabels_); - return std::make_unique< - LabelWiseFixedPartialRuleEvaluation>( - indexVector, numPredictions, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> LabelWiseFixedPartialRuleEvaluationFactory::create( - const SparseLabelWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.cpp deleted file mode 100644 index f7cb036d..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_binned.hpp" - -#include "rule_evaluation_label_wise_binned_common.hpp" -#include "rule_evaluation_label_wise_partial_fixed_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of partial rules that predict for a predefined number of labels, as well as - * their overall quality, based on the gradients and Hessians that are stored by a vector using L1 and L2 - * regularization. The labels are assigned to bins based on the gradients and Hessians. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class LabelWiseFixedPartialBinnedRuleEvaluation final - : public AbstractLabelWiseBinnedRuleEvaluation { - private: - - const IndexVector& labelIndices_; - - const std::unique_ptr indexVectorPtr_; - - SparseArrayVector tmpVector_; - - protected: - - uint32 calculateLabelWiseCriteria(const StatisticVector& statisticVector, float64* criteria, - uint32 numCriteria, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) override { - uint32 numElements = statisticVector.getNumElements(); - typename StatisticVector::const_iterator statisticIterator = statisticVector.cbegin(); - SparseArrayVector::iterator tmpIterator = tmpVector_.begin(); - sortLabelWiseScores(tmpIterator, statisticIterator, numElements, numCriteria, l1RegularizationWeight, - l2RegularizationWeight); - PartialIndexVector::iterator indexIterator = indexVectorPtr_->begin(); - typename IndexVector::const_iterator labelIndexIterator = labelIndices_.cbegin(); - - for (uint32 i = 0; i < numCriteria; i++) { - const IndexedValue& entry = tmpIterator[i]; - indexIterator[i] = labelIndexIterator[entry.index]; - criteria[i] = entry.value; - } - - return numCriteria; - } - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param indexVectorPtr An unique pointer to an object of type `PartialIndexVector` that stores - * the indices of the labels for which a rule predicts - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - * @param binningPtr An unique pointer to an object of type `ILabelBinning` that should be - * used to assign labels to bins - */ - LabelWiseFixedPartialBinnedRuleEvaluation(const IndexVector& labelIndices, - std::unique_ptr indexVectorPtr, - float64 l1RegularizationWeight, float64 l2RegularizationWeight, - std::unique_ptr binningPtr) - : AbstractLabelWiseBinnedRuleEvaluation( - *indexVectorPtr, false, l1RegularizationWeight, l2RegularizationWeight, std::move(binningPtr)), - labelIndices_(labelIndices), indexVectorPtr_(std::move(indexVectorPtr)), - tmpVector_(SparseArrayVector(labelIndices.getNumElements())) {} - }; - - LabelWiseFixedPartialBinnedRuleEvaluationFactory::LabelWiseFixedPartialBinnedRuleEvaluationFactory( - float32 labelRatio, uint32 minLabels, uint32 maxLabels, float64 l1RegularizationWeight, - float64 l2RegularizationWeight, std::unique_ptr labelBinningFactoryPtr) - : labelRatio_(labelRatio), minLabels_(minLabels), maxLabels_(maxLabels), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight), - labelBinningFactoryPtr_(std::move(labelBinningFactoryPtr)) {} - - std::unique_ptr> - LabelWiseFixedPartialBinnedRuleEvaluationFactory::create(const DenseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - std::unique_ptr indexVectorPtr = std::make_unique( - calculateBoundedFraction(indexVector.getNumElements(), labelRatio_, minLabels_, maxLabels_)); - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseFixedPartialBinnedRuleEvaluation>( - indexVector, std::move(indexVectorPtr), l1RegularizationWeight_, l2RegularizationWeight_, - std::move(labelBinningPtr)); - } - - std::unique_ptr> - LabelWiseFixedPartialBinnedRuleEvaluationFactory::create(const DenseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseCompleteBinnedRuleEvaluation>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr)); - } - - std::unique_ptr> - LabelWiseFixedPartialBinnedRuleEvaluationFactory::create(const SparseLabelWiseStatisticVector& statisticVector, - const CompleteIndexVector& indexVector) const { - std::unique_ptr indexVectorPtr = std::make_unique( - calculateBoundedFraction(indexVector.getNumElements(), labelRatio_, minLabels_, maxLabels_)); - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseFixedPartialBinnedRuleEvaluation>( - indexVector, std::move(indexVectorPtr), l1RegularizationWeight_, l2RegularizationWeight_, - std::move(labelBinningPtr)); - } - - std::unique_ptr> - LabelWiseFixedPartialBinnedRuleEvaluationFactory::create(const SparseLabelWiseStatisticVector& statisticVector, - const PartialIndexVector& indexVector) const { - std::unique_ptr labelBinningPtr = labelBinningFactoryPtr_->create(); - return std::make_unique< - LabelWiseCompleteBinnedRuleEvaluation>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_, std::move(labelBinningPtr)); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_common.hpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_common.hpp deleted file mode 100644 index 4942b7c9..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_partial_fixed_common.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/statistic_vector_label_wise_dense.hpp" -#include "common/data/vector_sparse_array.hpp" -#include "rule_evaluation_label_wise_common.hpp" - -#include - -namespace boosting { - - /** - * Allows to compare two objects of type `IndexedValue` that store the optimal prediction for a labels, as well as - * its index, according to the following strict weak ordering: If the absolute value of the first object is greater, - * it goes before the second one. - */ - struct CompareLabelWiseCriteria final { - public: - - /** - * Returns whether the a given object of type `IndexedValue` that stores the optimal prediction for a label, - * as well as its index, should go before a second one. - * - * @param lhs A reference to a first object of type `IndexedValue` - * @param rhs A reference to a second object of type `IndexedValue` - * @return True, if the first object should go before the second one, false otherwise - */ - inline bool operator()(const IndexedValue& lhs, const IndexedValue& rhs) const { - return std::abs(lhs.value) > std::abs(rhs.value); - } - }; - - /** - * Calculates the scores to be predicted for individual labels and sorts them by their quality, such that the first - * `numPredictions` elements are the best-rated ones. - * - * @tparam StatisticIterator The type of the iterator that provides access to the gradients and Hessians - * @param tmpIterator An iterator that provides random access to a temporary array, which should be - * used to store the sorted scores and their original indices - * @param statisticIterator An iterator that provides access to the gradients and Hessians for each label - * @param numLabels The total number of available labels - * @param numPrediction The number of the best-rated predictions to be determined - * @param l1RegularizationWeight The l2 regularization weight - * @param l2RegularizationWeight The L1 regularization weight - */ - template - static inline void sortLabelWiseScores(SparseArrayVector::iterator tmpIterator, - StatisticIterator& statisticIterator, uint32 numLabels, - uint32 numPredictions, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) { - for (uint32 i = 0; i < numLabels; i++) { - const Tuple& tuple = statisticIterator[i]; - IndexedValue& entry = tmpIterator[i]; - entry.index = i; - entry.value = - calculateLabelWiseScore(tuple.first, tuple.second, l1RegularizationWeight, l2RegularizationWeight); - } - - std::partial_sort(tmpIterator, &tmpIterator[numPredictions], &tmpIterator[numLabels], - CompareLabelWiseCriteria()); - } - -} \ No newline at end of file diff --git a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_single.cpp b/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_single.cpp deleted file mode 100644 index 3929e71b..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_evaluation/rule_evaluation_label_wise_single.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include "boosting/rule_evaluation/rule_evaluation_label_wise_single.hpp" - -#include "common/rule_evaluation/score_vector_dense.hpp" -#include "rule_evaluation_label_wise_common.hpp" - -namespace boosting { - - /** - * Allows to calculate the predictions of single-label rules, as well as their overall quality, based on the - * gradients and Hessians that are stored by a vector using L1 and L2 regularization. - * - * @tparam StatisticVector The type of the vector that provides access to the gradients and Hessians - * @tparam IndexVector The type of the vector that provides access to the labels for which predictions should - * be calculated - */ - template - class LabelWiseSingleLabelRuleEvaluation final : public IRuleEvaluation { - private: - - const IndexVector& labelIndices_; - - PartialIndexVector indexVector_; - - DenseScoreVector scoreVector_; - - const float64 l1RegularizationWeight_; - - const float64 l2RegularizationWeight_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels for which the rules may predict - * @param l1RegularizationWeight The weight of the L1 regularization that is applied for calculating the - * scores to be predicted by rules - * @param l2RegularizationWeight The weight of the L2 regularization that is applied for calculating the - * scores to be predicted by rules - */ - LabelWiseSingleLabelRuleEvaluation(const IndexVector& labelIndices, float64 l1RegularizationWeight, - float64 l2RegularizationWeight) - : labelIndices_(labelIndices), indexVector_(PartialIndexVector(1)), - scoreVector_(DenseScoreVector(indexVector_, true)), - l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight) {} - - const IScoreVector& calculateScores(StatisticVector& statisticVector) override { - uint32 numElements = statisticVector.getNumElements(); - typename StatisticVector::const_iterator statisticIterator = statisticVector.cbegin(); - const Tuple& firstTuple = statisticIterator[0]; - float64 bestScore = calculateLabelWiseScore(firstTuple.first, firstTuple.second, - l1RegularizationWeight_, l2RegularizationWeight_); - uint32 bestIndex = 0; - - for (uint32 i = 1; i < numElements; i++) { - const Tuple& tuple = statisticIterator[i]; - float64 score = calculateLabelWiseScore(tuple.first, tuple.second, l1RegularizationWeight_, - l2RegularizationWeight_); - - if (std::abs(score) > std::abs(bestScore)) { - bestIndex = i; - bestScore = score; - } - } - - DenseScoreVector::score_iterator scoreIterator = scoreVector_.scores_begin(); - scoreIterator[0] = bestScore; - indexVector_.begin()[0] = labelIndices_.cbegin()[bestIndex]; - scoreVector_.quality = calculateLabelWiseQuality(bestScore, statisticIterator[bestIndex].first, - statisticIterator[bestIndex].second, - l1RegularizationWeight_, l2RegularizationWeight_); - return scoreVector_; - } - }; - - LabelWiseSingleLabelRuleEvaluationFactory::LabelWiseSingleLabelRuleEvaluationFactory(float64 l1RegularizationWeight, - float64 l2RegularizationWeight) - : l1RegularizationWeight_(l1RegularizationWeight), l2RegularizationWeight_(l2RegularizationWeight) {} - - std::unique_ptr> LabelWiseSingleLabelRuleEvaluationFactory::create( - const DenseLabelWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> LabelWiseSingleLabelRuleEvaluationFactory::create( - const DenseLabelWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> LabelWiseSingleLabelRuleEvaluationFactory::create( - const SparseLabelWiseStatisticVector& statisticVector, const CompleteIndexVector& indexVector) const { - return std::make_unique< - LabelWiseSingleLabelRuleEvaluation>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - - std::unique_ptr> LabelWiseSingleLabelRuleEvaluationFactory::create( - const SparseLabelWiseStatisticVector& statisticVector, const PartialIndexVector& indexVector) const { - return std::make_unique>( - indexVector, l1RegularizationWeight_, l2RegularizationWeight_); - } - -} \ No newline at end of file diff --git a/cpp/subprojects/boosting/src/boosting/rule_model_assemblage/default_rule_auto.cpp b/cpp/subprojects/boosting/src/boosting/rule_model_assemblage/default_rule_auto.cpp deleted file mode 100644 index 05b20f23..00000000 --- a/cpp/subprojects/boosting/src/boosting/rule_model_assemblage/default_rule_auto.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "boosting/rule_model_assemblage/default_rule_auto.hpp" - -namespace boosting { - - AutomaticDefaultRuleConfig::AutomaticDefaultRuleConfig( - const std::unique_ptr& statisticsConfigPtr, const std::unique_ptr& lossConfigPtr, - const std::unique_ptr& headConfigPtr) - : statisticsConfigPtr_(statisticsConfigPtr), lossConfigPtr_(lossConfigPtr), headConfigPtr_(headConfigPtr) {} - - bool AutomaticDefaultRuleConfig::isDefaultRuleUsed(const IRowWiseLabelMatrix& labelMatrix) const { - if (statisticsConfigPtr_->isDense()) { - return true; - } else if (statisticsConfigPtr_->isSparse()) { - return !lossConfigPtr_->isSparse(); - } else { - return !lossConfigPtr_->isSparse() - || !shouldSparseStatisticsBePreferred(labelMatrix, false, headConfigPtr_->isPartial()); - } - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/sampling/partition_sampling_auto.cpp b/cpp/subprojects/boosting/src/boosting/sampling/partition_sampling_auto.cpp deleted file mode 100644 index c5d1ca00..00000000 --- a/cpp/subprojects/boosting/src/boosting/sampling/partition_sampling_auto.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "boosting/sampling/partition_sampling_auto.hpp" - -#include "common/sampling/partition_sampling_bi_stratified_label_wise.hpp" -#include "common/sampling/partition_sampling_no.hpp" - -namespace boosting { - - AutomaticPartitionSamplingConfig::AutomaticPartitionSamplingConfig( - const std::unique_ptr& globalPruningConfigPtr, - const std::unique_ptr& marginalProbabilityCalibratorConfigPtr, - const std::unique_ptr& jointProbabilityCalibratorConfigPtr) - : globalPruningConfigPtr_(globalPruningConfigPtr), - marginalProbabilityCalibratorConfigPtr_(marginalProbabilityCalibratorConfigPtr), - jointProbabilityCalibratorConfigPtr_(jointProbabilityCalibratorConfigPtr) {} - - std::unique_ptr AutomaticPartitionSamplingConfig::createPartitionSamplingFactory() - const { - if ((globalPruningConfigPtr_.get() && globalPruningConfigPtr_->shouldUseHoldoutSet()) - || marginalProbabilityCalibratorConfigPtr_->shouldUseHoldoutSet() - || jointProbabilityCalibratorConfigPtr_->shouldUseHoldoutSet()) { - return LabelWiseStratifiedBiPartitionSamplingConfig().createPartitionSamplingFactory(); - } - - return NoPartitionSamplingConfig().createPartitionSamplingFactory(); - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistic_format_auto.cpp b/cpp/subprojects/boosting/src/boosting/statistics/statistic_format_auto.cpp deleted file mode 100644 index 534129b3..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistic_format_auto.cpp +++ /dev/null @@ -1,27 +0,0 @@ -#include "boosting/statistics/statistic_format_auto.hpp" - -namespace boosting { - - AutomaticStatisticsConfig::AutomaticStatisticsConfig( - const std::unique_ptr& lossConfigPtr, const std::unique_ptr& headConfigPtr, - const std::unique_ptr& defaultRuleConfigPtr) - : lossConfigPtr_(lossConfigPtr), headConfigPtr_(headConfigPtr), defaultRuleConfigPtr_(defaultRuleConfigPtr) {} - - std::unique_ptr AutomaticStatisticsConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack) const { - bool preferSparseStatistics = shouldSparseStatisticsBePreferred( - labelMatrix, defaultRuleConfigPtr_->isDefaultRuleUsed(labelMatrix), headConfigPtr_->isPartial()); - return lossConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, blas, lapack, - preferSparseStatistics); - } - - bool AutomaticStatisticsConfig::isDense() const { - return false; - } - - bool AutomaticStatisticsConfig::isSparse() const { - return false; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistic_format_dense.cpp b/cpp/subprojects/boosting/src/boosting/statistics/statistic_format_dense.cpp deleted file mode 100644 index baf0b08c..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistic_format_dense.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "boosting/statistics/statistic_format_dense.hpp" - -namespace boosting { - - DenseStatisticsConfig::DenseStatisticsConfig(const std::unique_ptr& lossConfigPtr) - : lossConfigPtr_(lossConfigPtr) {} - - std::unique_ptr DenseStatisticsConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack) const { - return lossConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, blas, lapack, false); - } - - bool DenseStatisticsConfig::isDense() const { - return true; - } - - bool DenseStatisticsConfig::isSparse() const { - return false; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistic_format_sparse.cpp b/cpp/subprojects/boosting/src/boosting/statistics/statistic_format_sparse.cpp deleted file mode 100644 index 4dd1be13..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistic_format_sparse.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "boosting/statistics/statistic_format_sparse.hpp" - -namespace boosting { - - SparseStatisticsConfig::SparseStatisticsConfig(const std::unique_ptr& lossConfigPtr) - : lossConfigPtr_(lossConfigPtr) {} - - std::unique_ptr SparseStatisticsConfig::createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix, const Blas& blas, - const Lapack& lapack) const { - return lossConfigPtr_->createStatisticsProviderFactory(featureMatrix, labelMatrix, blas, lapack, true); - } - - bool SparseStatisticsConfig::isDense() const { - return false; - } - - bool SparseStatisticsConfig::isSparse() const { - return true; - } - -} diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistics_example_wise_common.hpp b/cpp/subprojects/boosting/src/boosting/statistics/statistics_example_wise_common.hpp deleted file mode 100644 index 0c215a6c..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistics_example_wise_common.hpp +++ /dev/null @@ -1,1027 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/statistics/statistics_example_wise.hpp" -#include "common/binning/bin_weight_vector.hpp" - -namespace boosting { - - static inline bool hasNonZeroWeightExampleWise(const EqualWeightVector& weights, uint32 statisticIndex) { - return true; - } - - template - static inline bool hasNonZeroWeightExampleWise(const WeightVector& weights, uint32 statisticIndex) { - return weights[statisticIndex] != 0; - } - - template - static inline void addExampleWiseStatisticToSubset(const EqualWeightVector& weights, - const StatisticView& statisticView, StatisticVector& vector, - const IndexVector& labelIndices, uint32 statisticIndex) { - vector.addToSubset(statisticView.gradients_cbegin(statisticIndex), statisticView.gradients_cend(statisticIndex), - statisticView.hessians_cbegin(statisticIndex), statisticView.hessians_cend(statisticIndex), - labelIndices); - } - - template - static inline void addExampleWiseStatisticToSubset(const WeightVector& weights, const StatisticView& statisticView, - StatisticVector& vector, const IndexVector& labelIndices, - uint32 statisticIndex) { - float64 weight = weights[statisticIndex]; - vector.addToSubset(statisticView.gradients_cbegin(statisticIndex), statisticView.gradients_cend(statisticIndex), - statisticView.hessians_cbegin(statisticIndex), statisticView.hessians_cend(statisticIndex), - labelIndices, weight); - } - - /** - * A subset of gradients and Hessians that are calculated according to a differentiable loss function that is - * applied example-wise and are accessible via a view. - * - * @tparam StatisticVector The type of the vector that is used to store the sums of gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that - * are included in the subset - */ - template - class ExampleWiseStatisticsSubset : virtual public IStatisticsSubset { - protected: - - /** - * An object of template type `StatisticVector` that stores the sums of gradients and Hessians. - */ - StatisticVector sumVector_; - - /** - * A reference to an object of template type `StatisticView` that provides access to the gradients and - * Hessians. - */ - const StatisticView& statisticView_; - - /** - * A reference to an object of template type `WeightVector` that provides access to the weights of - * individual statistics. - */ - const WeightVector& weights_; - - /** - * A reference to an object of template type `IndexVector` that provides access to the indices of the labels - * that are included in the subset. - */ - const IndexVector& labelIndices_; - - /** - * An unique pointer to an object of type `IRuleEvaluation` that is used to calculate the predictions of - * rules, as well as their overall quality. - */ - const std::unique_ptr> ruleEvaluationPtr_; - - public: - - /** - * @param statisticView A reference to an object of template type `StatisticView` that provides - * access to the gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of template type `RuleEvaluationFactory` that - * allows to create instances of the class that is used for calculating the - * predictions of rules, as well as their overall quality - * @param weights A reference to an object of template type `WeightVector` that provides - * access to the weights of individual statistics - * @param labelIndices A reference to an object of template type `IndexVector` that provides access - * to the indices of the labels that are included in the subset - */ - ExampleWiseStatisticsSubset(const StatisticView& statisticView, - const RuleEvaluationFactory& ruleEvaluationFactory, const WeightVector& weights, - const IndexVector& labelIndices) - : sumVector_(StatisticVector(labelIndices.getNumElements(), true)), statisticView_(statisticView), - weights_(weights), labelIndices_(labelIndices), - ruleEvaluationPtr_(ruleEvaluationFactory.create(sumVector_, labelIndices)) {} - - /** - * @see `IStatisticsSubset::hasNonZeroWeight` - */ - bool hasNonZeroWeight(uint32 statisticIndex) const override final { - return hasNonZeroWeightExampleWise(weights_, statisticIndex); - } - - /** - * @see `IStatisticsSubset::addToSubset` - */ - void addToSubset(uint32 statisticIndex) override final { - addExampleWiseStatisticToSubset(weights_, statisticView_, sumVector_, labelIndices_, statisticIndex); - } - - /** - * @see `IStatisticsSubset::calculateScores` - */ - const IScoreVector& calculateScores() override final { - return ruleEvaluationPtr_->calculateScores(sumVector_); - } - }; - - /** - * An abstract base class for all statistics that provide access to gradients and Hessians that are calculated - * according to a differentiable loss function that is applied example-wise. - * - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - */ - template - class AbstractExampleWiseImmutableWeightedStatistics : virtual public IImmutableWeightedStatistics { - protected: - - /** - * An abstract base class for all subsets of the gradients and Hessians that are stored by an instance of - * the class `AbstractExampleWiseImmutableWeightedStatistics`. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that are - * included in the subset - */ - template - class AbstractWeightedStatisticsSubset - : public ExampleWiseStatisticsSubset, - virtual public IWeightedStatisticsSubset { - private: - - StatisticVector tmpVector_; - - std::unique_ptr accumulatedSumVectorPtr_; - - protected: - - /** - * A pointer to an object of template type `StatisticVector` that stores the total sum of all - * gradients and Hessians. - */ - const StatisticVector* totalSumVector_; - - public: - - /** - * @param statistics A reference to an object of type - * `AbstractExampleWiseImmutableWeightedStatistics` that stores the - * gradients and Hessians - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores - * the total sums of gradients and Hessians - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels that are included in the subset - */ - AbstractWeightedStatisticsSubset(const AbstractExampleWiseImmutableWeightedStatistics& statistics, - const StatisticVector& totalSumVector, - const IndexVector& labelIndices) - : ExampleWiseStatisticsSubset(statistics.statisticView_, - statistics.ruleEvaluationFactory_, - statistics.weights_, labelIndices), - tmpVector_(StatisticVector(labelIndices.getNumElements())), totalSumVector_(&totalSumVector) { - } - - /** - * @see `IWeightedStatisticsSubset::resetSubset` - */ - void resetSubset() override final { - if (!accumulatedSumVectorPtr_) { - // Create a vector for storing the accumulated sums of gradients and Hessians, if - // necessary... - accumulatedSumVectorPtr_ = std::make_unique(this->sumVector_); - } else { - // Add the sums of gradients and Hessians to the accumulated sums of gradients and - // Hessians... - accumulatedSumVectorPtr_->add( - this->sumVector_.gradients_cbegin(), this->sumVector_.gradients_cend(), - this->sumVector_.hessians_cbegin(), this->sumVector_.hessians_cend()); - } - - // Reset the sums of gradients and Hessians to zero... - this->sumVector_.clear(); - } - - /** - * @see `IWeightedStatisticsSubset::calculateScoresAccumulated` - */ - const IScoreVector& calculateScoresAccumulated() override final { - return this->ruleEvaluationPtr_->calculateScores(*accumulatedSumVectorPtr_); - } - - /** - * @see `IWeightedStatisticsSubset::calculateScoresUncovered` - */ - const IScoreVector& calculateScoresUncovered() override final { - tmpVector_.difference(totalSumVector_->gradients_cbegin(), totalSumVector_->gradients_cend(), - totalSumVector_->hessians_cbegin(), totalSumVector_->hessians_cend(), - this->labelIndices_, this->sumVector_.gradients_cbegin(), - this->sumVector_.gradients_cend(), this->sumVector_.hessians_cbegin(), - this->sumVector_.hessians_cend()); - return this->ruleEvaluationPtr_->calculateScores(tmpVector_); - } - - /** - * @see `IWeightedStatisticsSubset::calculateScoresUncoveredAccumulated` - */ - const IScoreVector& calculateScoresUncoveredAccumulated() override final { - tmpVector_.difference( - totalSumVector_->gradients_cbegin(), totalSumVector_->gradients_cend(), - totalSumVector_->hessians_cbegin(), totalSumVector_->hessians_cend(), this->labelIndices_, - accumulatedSumVectorPtr_->gradients_cbegin(), accumulatedSumVectorPtr_->gradients_cend(), - accumulatedSumVectorPtr_->hessians_cbegin(), accumulatedSumVectorPtr_->hessians_cend()); - return this->ruleEvaluationPtr_->calculateScores(tmpVector_); - } - }; - - protected: - - /** - * A reference to an object of template type `StatisticView` that stores the gradients and Hessians. - */ - const StatisticView& statisticView_; - - /** - * A reference to an object of template type `RuleEvaluationFactory` that is used to create instances of the - * class that is used for calculating the predictions of rules, as well as their overall quality. - */ - const RuleEvaluationFactory& ruleEvaluationFactory_; - - /** - * A reference to an object of template type `WeightVector` that provides access to the weights of - * individual statistics. - */ - const WeightVector& weights_; - - public: - - /** - * @param statisticView A reference to an object of template type `StatisticView` that provides - * access to the gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of template type `RuleEvaluationFactory` that - * allows to create instances of the class that should be used for calculating - * the predictions of rules, as well as their overall quality - * @param weights A reference to an object of template type `WeightVector` that provides - * access to the weights of individual statistics - */ - AbstractExampleWiseImmutableWeightedStatistics(const StatisticView& statisticView, - const RuleEvaluationFactory& ruleEvaluationFactory, - const WeightVector& weights) - : statisticView_(statisticView), ruleEvaluationFactory_(ruleEvaluationFactory), weights_(weights) {} - - /** - * @see `IImmutableWeightedStatistics::getNumStatistics` - */ - uint32 getNumStatistics() const override final { - return statisticView_.getNumRows(); - } - - /** - * @see `IImmutableWeightedStatistics::getNumLabels` - */ - uint32 getNumLabels() const override final { - return statisticView_.getNumCols(); - } - }; - - /** - * Provides access to gradients and Hessians that are calculated according to a differentiable loss function that is - * applied example-wise and are organized as a histogram. - * - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the original gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam BinIndexVector The type of the vector that stores the indices of the bins, individual examples - * have been assigned to - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - */ - template - class ExampleWiseHistogram final - : virtual public IHistogram, - public AbstractExampleWiseImmutableWeightedStatistics { - private: - - /** - * Provides access to a subset of the gradients and Hessians that are stored by an instance of the class - * `ExampleWiseHistogram`. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that are - * included in the subset - */ - template - class WeightedStatisticsSubset final - : public AbstractExampleWiseImmutableWeightedStatistics< - StatisticVector, Histogram, RuleEvaluationFactory, - BinWeightVector>::template AbstractWeightedStatisticsSubset { - private: - - const ExampleWiseHistogram& histogram_; - - std::unique_ptr totalCoverableSumVectorPtr_; - - public: - - /** - * @param histogram A reference to an object of type `ExampleWiseHistogram` that stores the - * gradients and Hessians - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores - * the total sums of gradients and Hessians - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels that are included in the subset - */ - WeightedStatisticsSubset(const ExampleWiseHistogram& histogram, - const StatisticVector& totalSumVector, const IndexVector& labelIndices) - : AbstractExampleWiseImmutableWeightedStatistics< - StatisticVector, Histogram, RuleEvaluationFactory, - BinWeightVector>::template AbstractWeightedStatisticsSubset(histogram, - totalSumVector, - labelIndices), - histogram_(histogram) {} - - /** - * @see `IWeightedStatisticsSubset::addToMissing` - */ - void addToMissing(uint32 statisticIndex) override { - // Create a vector for storing the totals sums of gradients and Hessians, if necessary... - if (!totalCoverableSumVectorPtr_) { - totalCoverableSumVectorPtr_ = std::make_unique(*this->totalSumVector_); - this->totalSumVector_ = totalCoverableSumVectorPtr_.get(); - } - - // Subtract the gradients and Hessians of the example at the given index (weighted by the given - // weight) from the total sums of gradients and Hessians... - removeExampleWiseStatistic(histogram_.originalWeights_, histogram_.originalStatisticView_, - *totalCoverableSumVectorPtr_, statisticIndex); - } - }; - - const std::unique_ptr histogramPtr_; - - const std::unique_ptr binWeightVectorPtr_; - - const BinIndexVector& binIndexVector_; - - const StatisticView& originalStatisticView_; - - const WeightVector& originalWeights_; - - const StatisticVector& totalSumVector_; - - public: - - /** - * @param histogramPtr An unique pointer to an object of template type `Histogram` that stores the - * gradients and Hessians in the histogram - * @param binWeightVectorPtr An unique pointer to an object of type `BinWeightVector` that stores the - * weights of individual bins - * @param binIndexVector A reference to an object of template type `BinIndexVector` that stores the - * indices of the bins, individual examples have been assigned to - * @param originalStatisticView A reference to an object of template type `StatisticView` that provides - * access to the original gradients and Hessians, the histogram was created - * from - * @param originalWeights A reference to an object of template type `WeightVector` that provides - * access to the weights of the original statistics, the histogram was created - * from - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores the - * total sums of gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of type `RuleEvaluationFactory` that allows to - * create instances of the class that should be used for calculating the - * predictions of rules, as well as their overall quality - */ - ExampleWiseHistogram(std::unique_ptr histogramPtr, - std::unique_ptr binWeightVectorPtr, - const BinIndexVector& binIndexVector, const StatisticView& originalStatisticView, - const WeightVector& originalWeights, const StatisticVector& totalSumVector, - const RuleEvaluationFactory& ruleEvaluationFactory) - : AbstractExampleWiseImmutableWeightedStatistics(*histogramPtr, ruleEvaluationFactory, - *binWeightVectorPtr), - histogramPtr_(std::move(histogramPtr)), binWeightVectorPtr_(std::move(binWeightVectorPtr)), - binIndexVector_(binIndexVector), originalStatisticView_(originalStatisticView), - originalWeights_(originalWeights), totalSumVector_(totalSumVector) {} - - /** - * @see `IHistogram::clear` - */ - void clear() override { - histogramPtr_->clear(); - binWeightVectorPtr_->clear(); - } - - /** - * @see `IHistogram::getBinWeight` - */ - uint32 getBinWeight(uint32 binIndex) const override { - return (*binWeightVectorPtr_)[binIndex]; - } - - /** - * @see `IHistogram::addToBin` - */ - void addToBin(uint32 statisticIndex) override { - float64 weight = originalWeights_[statisticIndex]; - - if (weight > 0) { - uint32 binIndex = binIndexVector_.getBinIndex(statisticIndex); - - if (binIndex != IBinIndexVector::BIN_INDEX_SPARSE) { - binWeightVectorPtr_->increaseWeight(binIndex); - histogramPtr_->addToRow(binIndex, originalStatisticView_.gradients_cbegin(statisticIndex), - originalStatisticView_.gradients_cend(statisticIndex), - originalStatisticView_.hessians_cbegin(statisticIndex), - originalStatisticView_.hessians_cend(statisticIndex), weight); - } - } - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices) const override { - return std::make_unique>(*this, totalSumVector_, - labelIndices); - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices) const override { - return std::make_unique>(*this, totalSumVector_, - labelIndices); - } - }; - - template - static inline void addExampleWiseStatistic(const WeightVector& weights, const StatisticView& statisticView, - StatisticVector& statisticVector, uint32 statisticIndex) { - float64 weight = weights[statisticIndex]; - statisticVector.add(statisticView.gradients_cbegin(statisticIndex), - statisticView.gradients_cend(statisticIndex), statisticView.hessians_cbegin(statisticIndex), - statisticView.hessians_cend(statisticIndex), weight); - } - - template - static inline void addExampleWiseStatistic(const EqualWeightVector& weights, const StatisticView& statisticView, - StatisticVector& statisticVector, uint32 statisticIndex) { - statisticVector.add(statisticView.gradients_cbegin(statisticIndex), - statisticView.gradients_cend(statisticIndex), statisticView.hessians_cbegin(statisticIndex), - statisticView.hessians_cend(statisticIndex)); - } - - template - static inline void removeExampleWiseStatistic(const WeightVector& weights, const StatisticView& statisticView, - StatisticVector& statisticVector, uint32 statisticIndex) { - float64 weight = weights[statisticIndex]; - statisticVector.remove( - statisticView.gradients_cbegin(statisticIndex), statisticView.gradients_cend(statisticIndex), - statisticView.hessians_cbegin(statisticIndex), statisticView.hessians_cend(statisticIndex), weight); - } - - template - static inline void removeExampleWiseStatistic(const EqualWeightVector& weights, const StatisticView& statisticView, - StatisticVector& statisticVector, uint32 statisticIndex) { - statisticVector.remove( - statisticView.gradients_cbegin(statisticIndex), statisticView.gradients_cend(statisticIndex), - statisticView.hessians_cbegin(statisticIndex), statisticView.hessians_cend(statisticIndex)); - } - - template - static inline std::unique_ptr createExampleWiseHistogramInternally( - const BinIndexVector& binIndexVector, const StatisticView& originalStatisticView, - const WeightVector& originalWeights, const StatisticVector& totalSumVector, - const RuleEvaluationFactory& ruleEvaluationFactory, uint32 numBins) { - std::unique_ptr histogramPtr = - std::make_unique(numBins, originalStatisticView.getNumCols()); - std::unique_ptr binWeightVectorPtr = std::make_unique(numBins); - return std::make_unique>( - std::move(histogramPtr), std::move(binWeightVectorPtr), binIndexVector, originalStatisticView, - originalWeights, totalSumVector, ruleEvaluationFactory); - } - - /** - * Provides access to weighted gradients and Hessians that are calculated according to a differentiable loss - * function that is applied example-wise and allows to update the gradients and Hessians after a new rule has been - * learned. - * - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - */ - template - class ExampleWiseWeightedStatistics final - : virtual public IWeightedStatistics, - public AbstractExampleWiseImmutableWeightedStatistics { - private: - - /** - * Provides access to a subset of the gradients and Hessians that are stored by an instance of the class - * `ExampleWiseWeightedStatistics`. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that are - * included in the subset - */ - template - class WeightedStatisticsSubset final - : public AbstractExampleWiseImmutableWeightedStatistics< - StatisticVector, StatisticView, RuleEvaluationFactory, - WeightVector>::template AbstractWeightedStatisticsSubset { - private: - - std::unique_ptr totalCoverableSumVectorPtr_; - - public: - - /** - * @param statistics A reference to an object of type `ExampleWiseWeightedStatistics` that - * stores the gradients and Hessians - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores - * the total sums of gradients and Hessians - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels that are included in the subset - */ - WeightedStatisticsSubset(const ExampleWiseWeightedStatistics& statistics, - const StatisticVector& totalSumVector, const IndexVector& labelIndices) - : AbstractExampleWiseImmutableWeightedStatistics< - StatisticVector, StatisticView, RuleEvaluationFactory, - WeightVector>::template AbstractWeightedStatisticsSubset(statistics, - totalSumVector, - labelIndices) {} - - /** - * @see `IWeightedStatisticsSubset::addToMissing` - */ - void addToMissing(uint32 statisticIndex) override { - // Create a vector for storing the totals sums of gradients and Hessians, if necessary... - if (!totalCoverableSumVectorPtr_) { - totalCoverableSumVectorPtr_ = std::make_unique(*this->totalSumVector_); - this->totalSumVector_ = totalCoverableSumVectorPtr_.get(); - } - - // Subtract the gradients and Hessians of the example at the given index (weighted by the given - // weight) from the total sums of gradients and Hessians... - removeExampleWiseStatistic(this->weights_, this->statisticView_, *totalCoverableSumVectorPtr_, - statisticIndex); - } - }; - - const std::unique_ptr totalSumVectorPtr_; - - public: - - /** - * @param statisticView A reference to an object of template type `StatisticView` that provides - * access to the gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of template type `RuleEvaluationFactory` that - * allows to create instances of the class that should be used for calculating - * the predictions of rules, as well as their overall quality - * @param weights A reference to an object of template type `WeightVector` that provides - * access to the weights of individual statistics - */ - ExampleWiseWeightedStatistics(const StatisticView& statisticView, - const RuleEvaluationFactory& ruleEvaluationFactory, - const WeightVector& weights) - : AbstractExampleWiseImmutableWeightedStatistics(statisticView, ruleEvaluationFactory, - weights), - totalSumVectorPtr_(std::make_unique(statisticView.getNumCols(), true)) { - uint32 numStatistics = weights.getNumElements(); - - for (uint32 i = 0; i < numStatistics; i++) { - addExampleWiseStatistic(weights, statisticView, *totalSumVectorPtr_, i); - } - } - - /** - * @param statistics A reference to an object of type `ExampleWiseWeightedStatistics` to be copied - */ - ExampleWiseWeightedStatistics(const ExampleWiseWeightedStatistics& statistics) - : AbstractExampleWiseImmutableWeightedStatistics( - statistics.statisticView_, statistics.ruleEvaluationFactory_, statistics.weights_), - totalSumVectorPtr_(std::make_unique(*statistics.totalSumVectorPtr_)) {} - - /** - * @see `IWeightedStatistics::copy` - */ - std::unique_ptr copy() const override { - return std::make_unique>(*this); - } - - /** - * @see `IWeightedStatistics::resetCoveredStatistics` - */ - void resetCoveredStatistics() override { - totalSumVectorPtr_->clear(); - } - - /** - * @see `IWeightedStatistics::addCoveredStatistic` - */ - void addCoveredStatistic(uint32 statisticIndex) override { - addExampleWiseStatistic(this->weights_, this->statisticView_, *totalSumVectorPtr_, statisticIndex); - } - - /** - * @see `IWeightedStatistics::removeCoveredStatistic` - */ - void removeCoveredStatistic(uint32 statisticIndex) override { - removeExampleWiseStatistic(this->weights_, this->statisticView_, *totalSumVectorPtr_, statisticIndex); - } - - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DenseBinIndexVector& binIndexVector, - uint32 numBins) const override { - return createExampleWiseHistogramInternally( - binIndexVector, this->statisticView_, this->weights_, *totalSumVectorPtr_, - this->ruleEvaluationFactory_, numBins); - } - - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DokBinIndexVector& binIndexVector, - uint32 numBins) const override { - return createExampleWiseHistogramInternally( - binIndexVector, this->statisticView_, this->weights_, *totalSumVectorPtr_, - this->ruleEvaluationFactory_, numBins); - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices) const override { - return std::make_unique>(*this, *totalSumVectorPtr_, - labelIndices); - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices) const override { - return std::make_unique>(*this, *totalSumVectorPtr_, - labelIndices); - } - }; - - template - static inline void updateExampleWiseStatisticsInternally(uint32 statisticIndex, const LabelMatrix& labelMatrix, - StatisticView& statisticView, ScoreMatrix& scoreMatrix, - const LossFunction& lossFunction) { - lossFunction.updateExampleWiseStatistics(statisticIndex, labelMatrix, scoreMatrix, statisticView); - } - - /** - * An abstract base class for all statistics that provide access to gradients and Hessians that are calculated - * according to a differentiable loss function that is applied example-wise. - * - * @tparam LabelMatrix The type of the matrix that provides access to the labels of the - * training examples - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians - * @tparam ScoreMatrix The type of the matrices that are used to store predicted scores - * @tparam LossFunction The type of the loss function that is used to calculate gradients and - * Hessians - * @tparam EvaluationMeasure The type of the evaluation measure that is used to assess the quality of - * predictions for a specific statistic - * @tparam ExampleWiseRuleEvaluationFactory The type of the factory that allows to create instances of the class - * that is used for calculating the example-wise predictions of rules, as - * well as their overall quality - * @tparam LabelWiseRuleEvaluationFactory The type of the factory that allows to create instances of the class - * that is used for calculating the label-wise predictions of rules, as - * well as their overall quality - */ - template - class AbstractExampleWiseStatistics - : virtual public IExampleWiseStatistics { - private: - - const ExampleWiseRuleEvaluationFactory* ruleEvaluationFactory_; - - protected: - - /** - * An unique pointer to the loss function that should be used for calculating gradients and Hessians. - */ - std::unique_ptr lossPtr_; - - /** - * An unique pointer to the evaluation measure that should be used to assess the quality of predictions for - * a specific statistic. - */ - std::unique_ptr evaluationMeasurePtr_; - - /** - * The label matrix that provides access to the labels of the training examples. - */ - const LabelMatrix& labelMatrix_; - - /** - * An unique pointer to an object of template type `StatisticView` that stores the gradients and Hessians. - */ - const std::unique_ptr statisticViewPtr_; - - /** - * The score matrix that stores the currently predicted scores. - */ - std::unique_ptr scoreMatrixPtr_; - - public: - - /** - * @param lossPtr An unique pointer to an object of template type `LossFunction` that - * implements the loss function that should be used for calculating gradients - * and Hessians - * @param evaluationMeasurePtr An unique pointer to an object of template type `EvaluationMeasure` that - * implements the evaluation measure that should be used to assess the quality - * of predictions for a specific statistic - * @param ruleEvaluationFactory A reference to an object of template type `ExampleWiseRuleEvaluationFactory` - * that allows to create instances of the class that should be used for - * calculating the predictions of rules, as well as corresponding quality - * scores - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides access - * to the labels of the training examples - * @param statisticViewPtr An unique pointer to an object of template type `StatisticView` that - * provides access to the gradients and Hessians - * @param scoreMatrixPtr An unique pointer to an object of template type `ScoreMatrix` that stores - * the currently predicted scores - */ - AbstractExampleWiseStatistics(std::unique_ptr lossPtr, - std::unique_ptr evaluationMeasurePtr, - const ExampleWiseRuleEvaluationFactory& ruleEvaluationFactory, - const LabelMatrix& labelMatrix, - std::unique_ptr statisticViewPtr, - std::unique_ptr scoreMatrixPtr) - : ruleEvaluationFactory_(&ruleEvaluationFactory), lossPtr_(std::move(lossPtr)), - evaluationMeasurePtr_(std::move(evaluationMeasurePtr)), labelMatrix_(labelMatrix), - statisticViewPtr_(std::move(statisticViewPtr)), scoreMatrixPtr_(std::move(scoreMatrixPtr)) {} - - /** - * @see `IExampleWiseStatistics::setRuleEvaluationFactory` - */ - void setRuleEvaluationFactory( - const ExampleWiseRuleEvaluationFactory& ruleEvaluationFactory) override final { - this->ruleEvaluationFactory_ = &ruleEvaluationFactory; - } - - /** - * @see `IStatistics::getNumStatistics` - */ - uint32 getNumStatistics() const override final { - return statisticViewPtr_->getNumRows(); - } - - /** - * @see `IStatistics::getNumLabels` - */ - uint32 getNumLabels() const override final { - return statisticViewPtr_->getNumCols(); - } - - /** - * @see `IStatistics::applyPrediction` - */ - void applyPrediction(uint32 statisticIndex, const CompletePrediction& prediction) override final { - applyPredictionInternally(statisticIndex, prediction, *scoreMatrixPtr_); - updateExampleWiseStatisticsInternally(statisticIndex, labelMatrix_, *this->statisticViewPtr_, - *scoreMatrixPtr_, *lossPtr_); - } - - /** - * @see `IStatistics::applyPrediction` - */ - void applyPrediction(uint32 statisticIndex, const PartialPrediction& prediction) override final { - applyPredictionInternally(statisticIndex, prediction, *scoreMatrixPtr_); - updateExampleWiseStatisticsInternally(statisticIndex, labelMatrix_, *this->statisticViewPtr_, - *scoreMatrixPtr_, *lossPtr_); - } - - /** - * @see `IStatistics::revertPrediction` - */ - void revertPrediction(uint32 statisticIndex, const CompletePrediction& prediction) override final { - revertPredictionInternally(statisticIndex, prediction, *scoreMatrixPtr_); - updateExampleWiseStatisticsInternally(statisticIndex, labelMatrix_, *this->statisticViewPtr_, - *scoreMatrixPtr_, *lossPtr_); - } - - /** - * @see `IStatistics::revertPrediction` - */ - void revertPrediction(uint32 statisticIndex, const PartialPrediction& prediction) override final { - revertPredictionInternally(statisticIndex, prediction, *scoreMatrixPtr_); - updateExampleWiseStatisticsInternally(statisticIndex, labelMatrix_, *this->statisticViewPtr_, - *scoreMatrixPtr_, *lossPtr_); - } - - /** - * @see `IStatistics::evaluatePrediction` - */ - float64 evaluatePrediction(uint32 statisticIndex) const override final { - return evaluationMeasurePtr_->evaluate(statisticIndex, labelMatrix_, *scoreMatrixPtr_); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset(const CompleteIndexVector& labelIndices, - const EqualWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset(const PartialIndexVector& labelIndices, - const EqualWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset(const CompleteIndexVector& labelIndices, - const BitWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset(const PartialIndexVector& labelIndices, - const BitWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, const DenseWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset, CompleteIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, const DenseWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset, PartialIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, - const OutOfSampleWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset, CompleteIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, - const OutOfSampleWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset, PartialIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, - const OutOfSampleWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset, CompleteIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, - const OutOfSampleWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset, PartialIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, - const OutOfSampleWeightVector>& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset>, CompleteIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, - const OutOfSampleWeightVector>& weights) const override final { - return std::make_unique< - ExampleWiseStatisticsSubset>, PartialIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createWeightedStatistics` - */ - std::unique_ptr createWeightedStatistics( - const EqualWeightVector& weights) const override final { - return std::make_unique>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights); - } - - /** - * @see `IStatistics::createWeightedStatistics` - */ - std::unique_ptr createWeightedStatistics( - const BitWeightVector& weights) const override final { - return std::make_unique>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights); - } - - /** - * @see `IStatistics::createWeightedStatistics` - */ - std::unique_ptr createWeightedStatistics( - const DenseWeightVector& weights) const override final { - return std::make_unique< - ExampleWiseWeightedStatistics>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights); - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistics_label_wise_common.hpp b/cpp/subprojects/boosting/src/boosting/statistics/statistics_label_wise_common.hpp deleted file mode 100644 index 3367c423..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistics_label_wise_common.hpp +++ /dev/null @@ -1,989 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/statistics/statistics_label_wise.hpp" -#include "common/binning/bin_weight_vector.hpp" - -namespace boosting { - - static inline bool hasNonZeroWeightLabelWise(const EqualWeightVector& weights, uint32 statisticIndex) { - return true; - } - - template - static inline bool hasNonZeroWeightLabelWise(const WeightVector& weights, uint32 statisticIndex) { - return weights[statisticIndex] != 0; - } - - template - static inline void addLabelWiseStatisticToSubset(const EqualWeightVector& weights, - const StatisticView& statisticView, StatisticVector& vector, - const IndexVector& labelIndices, uint32 statisticIndex) { - vector.addToSubset(statisticView, statisticIndex, labelIndices); - } - - template - static inline void addLabelWiseStatisticToSubset(const WeightVector& weights, const StatisticView& statisticView, - StatisticVector& vector, const IndexVector& labelIndices, - uint32 statisticIndex) { - float64 weight = weights[statisticIndex]; - vector.addToSubset(statisticView, statisticIndex, labelIndices, weight); - } - - /** - * A subset of gradients and Hessians that are calculated according to a differentiable loss function that is - * applied label-wise and are accessible via a view. - * - * @tparam StatisticVector The type of the vector that is used to store the sums of gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that - * are included in the subset - */ - template - class LabelWiseStatisticsSubset : virtual public IStatisticsSubset { - protected: - - /** - * An object of template type `StatisticVector` that stores the sums of gradients and Hessians. - */ - StatisticVector sumVector_; - - /** - * A reference to an object of template type `StatisticView` that provides access to the gradients and - * Hessians. - */ - const StatisticView& statisticView_; - - /** - * A reference to an object of template type `WeightVector` that provides access to the weights of - * individual statistics. - */ - const WeightVector& weights_; - - /** - * A reference to an object of template type `IndexVector` that provides access to the indices of the labels - * that are included in the subset. - */ - const IndexVector& labelIndices_; - - /** - * An unique pointer to an object of type `IRuleEvaluation` that is used to calculate the predictions of - * rules, as well as their overall quality. - */ - const std::unique_ptr> ruleEvaluationPtr_; - - public: - - /** - * @param statisticView A reference to an object of template type `StatisticView` that provides - * access to the gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of template type `RuleEvaluationFactory` that - * allows to create instances of the class that is used for calculating the - * predictions of rules, as well as their overall quality - * @param weights A reference to an object of template type `WeightVector` that provides - * access to the weights of individual statistics - * @param labelIndices A reference to an object of template type `IndexVector` that provides access - * to the indices of the labels that are included in the subset - */ - LabelWiseStatisticsSubset(const StatisticView& statisticView, - const RuleEvaluationFactory& ruleEvaluationFactory, const WeightVector& weights, - const IndexVector& labelIndices) - : sumVector_(StatisticVector(labelIndices.getNumElements(), true)), statisticView_(statisticView), - weights_(weights), labelIndices_(labelIndices), - ruleEvaluationPtr_(ruleEvaluationFactory.create(sumVector_, labelIndices)) {} - - /** - * @see `IStatisticsSubset::hasNonZeroWeight` - */ - bool hasNonZeroWeight(uint32 statisticIndex) const override final { - return hasNonZeroWeightLabelWise(weights_, statisticIndex); - } - - /** - * @see `IStatisticsSubset::addToSubset` - */ - void addToSubset(uint32 statisticIndex) override final { - addLabelWiseStatisticToSubset(weights_, statisticView_, sumVector_, labelIndices_, statisticIndex); - } - - /** - * @see `IStatisticsSubset::calculateScores` - */ - const IScoreVector& calculateScores() override final { - return ruleEvaluationPtr_->calculateScores(sumVector_); - } - }; - - /** - * An abstract base class for all statistics that provide access to gradients and Hessians that are calculated - * according to a differentiable loss function that is applied label-wise. - * - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - */ - template - class AbstractLabelWiseImmutableWeightedStatistics : virtual public IImmutableWeightedStatistics { - protected: - - /** - * An abstract base class for all subsets of the gradients and Hessians that are stored by an instance of - * the class `AbstractLabelWiseImmutableWeightedStatistics`. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that are - * included in the subset - */ - template - class AbstractWeightedStatisticsSubset - : public LabelWiseStatisticsSubset, - virtual public IWeightedStatisticsSubset { - private: - - StatisticVector tmpVector_; - - std::unique_ptr accumulatedSumVectorPtr_; - - protected: - - /** - * A pointer to an object of template type `StatisticVector` that stores the total sum of all - * gradients and Hessians. - */ - const StatisticVector* totalSumVector_; - - public: - - /** - * @param statistics A reference to an object of type - * `AbstractLabelWiseImmutableWeightedStatistics` that stores the gradients - * and Hessians - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores - * the total sums of gradients and Hessians - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels that are included in the subset - */ - AbstractWeightedStatisticsSubset(const AbstractLabelWiseImmutableWeightedStatistics& statistics, - const StatisticVector& totalSumVector, - const IndexVector& labelIndices) - : LabelWiseStatisticsSubset(statistics.statisticView_, - statistics.ruleEvaluationFactory_, statistics.weights_, - labelIndices), - tmpVector_(StatisticVector(labelIndices.getNumElements())), totalSumVector_(&totalSumVector) { - } - - /** - * @see `IWeightedStatisticsSubset::resetSubset` - */ - void resetSubset() override final { - if (!accumulatedSumVectorPtr_) { - // Create a vector for storing the accumulated sums of gradients and Hessians, if - // necessary... - accumulatedSumVectorPtr_ = std::make_unique(this->sumVector_); - } else { - // Add the sums of gradients and Hessians to the accumulated sums of gradients and - // Hessians... - accumulatedSumVectorPtr_->add(this->sumVector_); - } - - // Reset the sums of gradients and Hessians to zero... - this->sumVector_.clear(); - } - - /** - * @see `IWeightedStatisticsSubset::calculateScoresAccumulated` - */ - const IScoreVector& calculateScoresAccumulated() override final { - return this->ruleEvaluationPtr_->calculateScores(*accumulatedSumVectorPtr_); - } - - /** - * @see `IWeightedStatisticsSubset::calculateScoresUncovered` - */ - const IScoreVector& calculateScoresUncovered() override final { - tmpVector_.difference(*totalSumVector_, this->labelIndices_, this->sumVector_); - return this->ruleEvaluationPtr_->calculateScores(tmpVector_); - } - - /** - * @see `IWeightedStatisticsSubset::calculateScoresUncoveredAccumulated` - */ - const IScoreVector& calculateScoresUncoveredAccumulated() override final { - tmpVector_.difference(*totalSumVector_, this->labelIndices_, *accumulatedSumVectorPtr_); - return this->ruleEvaluationPtr_->calculateScores(tmpVector_); - } - }; - - protected: - - /** - * A reference to an object of template type `StatisticView` that stores the gradients and Hessians. - */ - const StatisticView& statisticView_; - - /** - * A reference to an object of template type `RuleEvaluationFactory` that is used to create instances of the - * class that is used for calculating the predictions of rules, as well as their overall quality. - */ - const RuleEvaluationFactory& ruleEvaluationFactory_; - - /** - * A reference to an object of template type `WeightVector` that provides access to the weights of - * individual statistics. - */ - const WeightVector& weights_; - - public: - - /** - * @param statisticView A reference to an object of template type `StatisticView` that provides - * access to the gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of template type `RuleEvaluationFactory` that - * allows to create instances of the class that should be used for calculating - * the predictions of rules, as well as their overall quality - * @param weights A reference to an object of template type `WeightVector` that provides - * access to the weights of individual statistics - */ - AbstractLabelWiseImmutableWeightedStatistics(const StatisticView& statisticView, - const RuleEvaluationFactory& ruleEvaluationFactory, - const WeightVector& weights) - : statisticView_(statisticView), ruleEvaluationFactory_(ruleEvaluationFactory), weights_(weights) {} - - /** - * @see `IImmutableWeightedStatistics::getNumStatistics` - */ - uint32 getNumStatistics() const override final { - return statisticView_.getNumRows(); - } - - /** - * @see `IImmutableWeightedStatistics::getNumLabels` - */ - uint32 getNumLabels() const override final { - return statisticView_.getNumCols(); - } - }; - - /** - * Provides access to gradients and Hessians that are calculated according to a differentiable loss function that is - * applied label-wise and are organized as a histogram. - * - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the original gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam BinIndexVector The type of the vector that stores the indices of the bins, individual examples - * have been assigned to - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - */ - template - class LabelWiseHistogram final - : virtual public IHistogram, - public AbstractLabelWiseImmutableWeightedStatistics { - private: - - /** - * Provides access to a subset of the gradients and Hessians that are stored by an instance of the class - * `LabelWiseHistogram`. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that are - * included in the subset - */ - template - class WeightedStatisticsSubset final - : public AbstractLabelWiseImmutableWeightedStatistics< - StatisticVector, Histogram, RuleEvaluationFactory, - BinWeightVector>::template AbstractWeightedStatisticsSubset { - private: - - const LabelWiseHistogram& histogram_; - - std::unique_ptr totalCoverableSumVectorPtr_; - - public: - - /** - * @param histogram A reference to an object of type `LabelWiseHistogram` that stores the - * gradients and Hessians - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores - * the total sums of gradients and Hessians - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels that are included in the subset - */ - WeightedStatisticsSubset(const LabelWiseHistogram& histogram, const StatisticVector& totalSumVector, - const IndexVector& labelIndices) - : AbstractLabelWiseImmutableWeightedStatistics< - StatisticVector, Histogram, RuleEvaluationFactory, - BinWeightVector>::template AbstractWeightedStatisticsSubset(histogram, - totalSumVector, - labelIndices), - histogram_(histogram) {} - - /** - * @see `IWeightedStatisticsSubset::addToMissing` - */ - void addToMissing(uint32 statisticIndex) override { - // Create a vector for storing the totals sums of gradients and Hessians, if necessary... - if (!totalCoverableSumVectorPtr_) { - totalCoverableSumVectorPtr_ = std::make_unique(*this->totalSumVector_); - this->totalSumVector_ = totalCoverableSumVectorPtr_.get(); - } - - // Subtract the gradients and Hessians of the example at the given index (weighted by the given - // weight) from the total sums of gradients and Hessians... - removeLabelWiseStatistic(histogram_.originalWeights_, histogram_.originalStatisticView_, - *totalCoverableSumVectorPtr_, statisticIndex); - } - }; - - const std::unique_ptr histogramPtr_; - - const std::unique_ptr binWeightVectorPtr_; - - const BinIndexVector& binIndexVector_; - - const StatisticView& originalStatisticView_; - - const WeightVector& originalWeights_; - - const StatisticVector& totalSumVector_; - - public: - - /** - * @param histogramPtr An unique pointer to an object of template type `Histogram` that stores the - * gradients and Hessians in the histogram - * @param binWeightVectorPtr An unique pointer to an object of type `BinWeightVector` that stores the - * weights of individual bins - * @param binIndexVector A reference to an object of template type `BinIndexVector` that stores the - * indices of the bins, individual examples have been assigned to - * @param originalStatisticView A reference to an object of template type `StatisticView` that provides - * access to the original gradients and Hessians, the histogram was created - * from - * @param originalWeights A reference to an object of template type `WeightVector` that provides - * access to the weights of the original statistics, the histogram was created - * from - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores the - * total sums of gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of type `RuleEvaluationFactory` that allows to - * create instances of the class that should be used for calculating the - * predictions of rules, as well as their overall quality - */ - LabelWiseHistogram(std::unique_ptr histogramPtr, - std::unique_ptr binWeightVectorPtr, - const BinIndexVector& binIndexVector, const StatisticView& originalStatisticView, - const WeightVector& originalWeights, const StatisticVector& totalSumVector, - const RuleEvaluationFactory& ruleEvaluationFactory) - : AbstractLabelWiseImmutableWeightedStatistics(*histogramPtr, ruleEvaluationFactory, - *binWeightVectorPtr), - histogramPtr_(std::move(histogramPtr)), binWeightVectorPtr_(std::move(binWeightVectorPtr)), - binIndexVector_(binIndexVector), originalStatisticView_(originalStatisticView), - originalWeights_(originalWeights), totalSumVector_(totalSumVector) {} - - /** - * @see `IHistogram::clear` - */ - void clear() override { - histogramPtr_->clear(); - binWeightVectorPtr_->clear(); - } - - /** - * @see `IHistogram::getBinWeight` - */ - uint32 getBinWeight(uint32 binIndex) const override { - return (*binWeightVectorPtr_)[binIndex]; - } - - /** - * @see `IHistogram::addToBin` - */ - void addToBin(uint32 statisticIndex) override { - float64 weight = originalWeights_[statisticIndex]; - - if (weight > 0) { - uint32 binIndex = binIndexVector_.getBinIndex(statisticIndex); - - if (binIndex != IBinIndexVector::BIN_INDEX_SPARSE) { - binWeightVectorPtr_->increaseWeight(binIndex); - histogramPtr_->addToRow(binIndex, originalStatisticView_.cbegin(statisticIndex), - originalStatisticView_.cend(statisticIndex), weight); - } - } - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices) const override { - return std::make_unique>(*this, totalSumVector_, - labelIndices); - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices) const override { - return std::make_unique>(*this, totalSumVector_, - labelIndices); - } - }; - - template - static inline void addLabelWiseStatistic(const EqualWeightVector& weights, const StatisticView& statisticView, - StatisticVector& statisticVector, uint32 statisticIndex) { - statisticVector.add(statisticView, statisticIndex); - } - - template - static inline void addLabelWiseStatistic(const WeightVector& weights, const StatisticView& statisticView, - StatisticVector& statisticVector, uint32 statisticIndex) { - float64 weight = weights[statisticIndex]; - statisticVector.add(statisticView, statisticIndex, weight); - } - - template - static inline void removeLabelWiseStatistic(const EqualWeightVector& weights, const StatisticView& statisticView, - StatisticVector& statisticVector, uint32 statisticIndex) { - statisticVector.remove(statisticView, statisticIndex); - } - - template - static inline void removeLabelWiseStatistic(const WeightVector& weights, const StatisticView& statisticView, - StatisticVector& statisticVector, uint32 statisticIndex) { - float64 weight = weights[statisticIndex]; - statisticVector.remove(statisticView, statisticIndex, weight); - } - - template - static inline std::unique_ptr createLabelWiseHistogramInternally( - const BinIndexVector& binIndexVector, const StatisticView& originalStatisticView, - const WeightVector& originalWeights, const StatisticVector& totalSumVector, - const RuleEvaluationFactory& ruleEvaluationFactory, uint32 numBins) { - std::unique_ptr histogramPtr = - std::make_unique(numBins, originalStatisticView.getNumCols()); - std::unique_ptr binWeightVectorPtr = std::make_unique(numBins); - return std::make_unique>( - std::move(histogramPtr), std::move(binWeightVectorPtr), binIndexVector, originalStatisticView, - originalWeights, totalSumVector, ruleEvaluationFactory); - } - - /** - * Provides access to weighted gradients and Hessians that are calculated according to a differentiable loss - * function that is applied label-wise and allows to update the gradients and Hessians after a new rule has been - * learned. - * - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - * @tparam WeightVector The type of the vector that provides access to the weights of individual - * statistics - */ - template - class LabelWiseWeightedStatistics final - : virtual public IWeightedStatistics, - public AbstractLabelWiseImmutableWeightedStatistics { - private: - - /** - * Provides access to a subset of the gradients and Hessians that are stored by an instance of the class - * `LabelWiseWeightedStatistics`. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels that are - * included in the subset - */ - template - class WeightedStatisticsSubset final - : public AbstractLabelWiseImmutableWeightedStatistics< - StatisticVector, StatisticView, RuleEvaluationFactory, - WeightVector>::template AbstractWeightedStatisticsSubset { - private: - - std::unique_ptr totalCoverableSumVectorPtr_; - - public: - - /** - * @param statistics A reference to an object of type `LabelWiseWeightedStatistics` that - * stores the gradients and Hessians - * @param totalSumVector A reference to an object of template type `StatisticVector` that stores - * the total sums of gradients and Hessians - * @param labelIndices A reference to an object of template type `IndexVector` that provides - * access to the indices of the labels that are included in the subset - */ - WeightedStatisticsSubset(const LabelWiseWeightedStatistics& statistics, - const StatisticVector& totalSumVector, const IndexVector& labelIndices) - : AbstractLabelWiseImmutableWeightedStatistics< - StatisticVector, StatisticView, RuleEvaluationFactory, - WeightVector>::template AbstractWeightedStatisticsSubset(statistics, - totalSumVector, - labelIndices) {} - - /** - * @see `IWeightedStatisticsSubset::addToMissing` - */ - void addToMissing(uint32 statisticIndex) override { - // Create a vector for storing the totals sums of gradients and Hessians, if necessary... - if (!totalCoverableSumVectorPtr_) { - totalCoverableSumVectorPtr_ = std::make_unique(*this->totalSumVector_); - this->totalSumVector_ = totalCoverableSumVectorPtr_.get(); - } - - // Subtract the gradients and Hessians of the example at the given index (weighted by the given - // weight) from the total sums of gradients and Hessians... - removeLabelWiseStatistic(this->weights_, this->statisticView_, *totalCoverableSumVectorPtr_, - statisticIndex); - } - }; - - const std::unique_ptr totalSumVectorPtr_; - - public: - - /** - * @param statisticView A reference to an object of template type `StatisticView` that provides - * access to the gradients and Hessians - * @param ruleEvaluationFactory A reference to an object of template type `RuleEvaluationFactory` that - * allows to create instances of the class that should be used for calculating - * the predictions of rules, as well as their overall quality - * @param weights A reference to an object of template type `WeightVector` that provides - * access to the weights of individual statistics - */ - LabelWiseWeightedStatistics(const StatisticView& statisticView, - const RuleEvaluationFactory& ruleEvaluationFactory, const WeightVector& weights) - : AbstractLabelWiseImmutableWeightedStatistics(statisticView, ruleEvaluationFactory, - weights), - totalSumVectorPtr_(std::make_unique(statisticView.getNumCols(), true)) { - uint32 numStatistics = weights.getNumElements(); - - for (uint32 i = 0; i < numStatistics; i++) { - addLabelWiseStatistic(weights, statisticView, *totalSumVectorPtr_, i); - } - } - - /** - * @param statistics A reference to an object of type `LabelWiseWeightedStatistics` to be copied - */ - LabelWiseWeightedStatistics(const LabelWiseWeightedStatistics& statistics) - : AbstractLabelWiseImmutableWeightedStatistics( - statistics.statisticView_, statistics.ruleEvaluationFactory_, statistics.weights_), - totalSumVectorPtr_(std::make_unique(*statistics.totalSumVectorPtr_)) {} - - /** - * @see `IWeightedStatistics::copy` - */ - std::unique_ptr copy() const override { - return std::make_unique>(*this); - } - - /** - * @see `IWeightedStatistics::resetCoveredStatistics` - */ - void resetCoveredStatistics() override { - totalSumVectorPtr_->clear(); - } - - /** - * @see `IWeightedStatistics::addCoveredStatistic` - */ - void addCoveredStatistic(uint32 statisticIndex) override { - addLabelWiseStatistic(this->weights_, this->statisticView_, *totalSumVectorPtr_, statisticIndex); - } - - /** - * @see `IWeightedStatistics::removeCoveredStatistic` - */ - void removeCoveredStatistic(uint32 statisticIndex) override { - removeLabelWiseStatistic(this->weights_, this->statisticView_, *totalSumVectorPtr_, statisticIndex); - } - - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DenseBinIndexVector& binIndexVector, - uint32 numBins) const override { - return createLabelWiseHistogramInternally( - binIndexVector, this->statisticView_, this->weights_, *totalSumVectorPtr_, - this->ruleEvaluationFactory_, numBins); - } - - /** - * @see `IWeightedStatistics::createHistogram` - */ - std::unique_ptr createHistogram(const DokBinIndexVector& binIndexVector, - uint32 numBins) const override { - return createLabelWiseHistogramInternally( - binIndexVector, this->statisticView_, this->weights_, *totalSumVectorPtr_, - this->ruleEvaluationFactory_, numBins); - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices) const override { - return std::make_unique>(*this, *totalSumVectorPtr_, - labelIndices); - } - - /** - * @see `IImmutableWeightedStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices) const override { - return std::make_unique>(*this, *totalSumVectorPtr_, - labelIndices); - } - }; - - template - static inline void applyPredictionInternally(uint32 statisticIndex, const Prediction& prediction, - ScoreMatrix& scoreMatrix) { - scoreMatrix.addToRowFromSubset(statisticIndex, prediction.scores_cbegin(), prediction.scores_cend(), - prediction.indices_cbegin(), prediction.indices_cend()); - } - - template - static inline void revertPredictionInternally(uint32 statisticIndex, const Prediction& prediction, - ScoreMatrix& scoreMatrix) { - scoreMatrix.removeFromRowFromSubset(statisticIndex, prediction.scores_cbegin(), prediction.scores_cend(), - prediction.indices_cbegin(), prediction.indices_cend()); - } - - template - static inline void updateLabelWiseStatisticsInternally(uint32 statisticIndex, const Prediction& prediction, - const LabelMatrix& labelMatrix, StatisticView& statisticView, - ScoreMatrix& scoreMatrix, const LossFunction& lossFunction) { - lossFunction.updateLabelWiseStatistics(statisticIndex, labelMatrix, scoreMatrix, prediction.indices_cbegin(), - prediction.indices_cend(), statisticView); - } - - /** - * An abstract base class for all statistics that provide access to gradients and Hessians that are calculated - * according to a differentiable loss function that is applied label-wise. - * - * @tparam LabelMatrix The type of the matrix that provides access to the labels of the training - * examples - * @tparam StatisticVector The type of the vectors that are used to store gradients and Hessians - * @tparam StatisticView The type of the view that provides access to the gradients and Hessians - * @tparam Histogram The type of a histogram that stores aggregated gradients and Hessians - * @tparam ScoreMatrix The type of the matrices that are used to store predicted scores - * @tparam LossFunction The type of the loss function that is used to calculate gradients and Hessians - * @tparam EvaluationMeasure The type of the evaluation measure that is used to assess the quality of - * predictions for a specific statistic - * @tparam RuleEvaluationFactory The type of the factory that allows to create instances of the class that is - * used for calculating the predictions of rules, as well as corresponding quality - * scores - */ - template - class AbstractLabelWiseStatistics : virtual public ILabelWiseStatistics { - private: - - const std::unique_ptr lossPtr_; - - const std::unique_ptr evaluationMeasurePtr_; - - const RuleEvaluationFactory* ruleEvaluationFactory_; - - const LabelMatrix& labelMatrix_; - - const std::unique_ptr statisticViewPtr_; - - protected: - - /** - * An unique pointer to an object of template type `ScoreMatrix` that stores the currently predicted scores. - */ - const std::unique_ptr scoreMatrixPtr_; - - public: - - /** - * @param lossPtr An unique pointer to an object of template type `LossFunction` that - * implements the loss function that should be used for calculating gradients - * and Hessians - * @param evaluationMeasurePtr An unique pointer to an object of template type `EvaluationMeasure` that - * implements the evaluation measure that should be used to assess the quality - * of predictions for a specific statistic - * @param ruleEvaluationFactory A reference to an object of type `RuleEvaluationFactory` that allows to - * create instances of the class that should be used for calculating the - * predictions of rules, as well as their overall quality - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides access - * to the labels of the training examples - * @param statisticViewPtr An unique pointer to an object of template type `StatisticView` that - * provides access to the gradients and Hessians - * @param scoreMatrixPtr An unique pointer to an object of template type `ScoreMatrix` that stores - * the currently predicted scores - */ - AbstractLabelWiseStatistics(std::unique_ptr lossPtr, - std::unique_ptr evaluationMeasurePtr, - const RuleEvaluationFactory& ruleEvaluationFactory, - const LabelMatrix& labelMatrix, std::unique_ptr statisticViewPtr, - std::unique_ptr scoreMatrixPtr) - : lossPtr_(std::move(lossPtr)), evaluationMeasurePtr_(std::move(evaluationMeasurePtr)), - ruleEvaluationFactory_(&ruleEvaluationFactory), labelMatrix_(labelMatrix), - statisticViewPtr_(std::move(statisticViewPtr)), scoreMatrixPtr_(std::move(scoreMatrixPtr)) {} - - /** - * @see `ILabelWiseStatistics::setRuleEvaluationFactory` - */ - void setRuleEvaluationFactory(const RuleEvaluationFactory& ruleEvaluationFactory) override final { - this->ruleEvaluationFactory_ = &ruleEvaluationFactory; - } - - /** - * @see `IStatistics::getNumStatistics` - */ - uint32 getNumStatistics() const override final { - return statisticViewPtr_->getNumRows(); - } - - /** - * @see `IStatistics::getNumLabels` - */ - uint32 getNumLabels() const override final { - return statisticViewPtr_->getNumCols(); - } - - /** - * @see `IStatistics::applyPrediction` - */ - void applyPrediction(uint32 statisticIndex, const CompletePrediction& prediction) override final { - applyPredictionInternally(statisticIndex, prediction, *scoreMatrixPtr_); - updateLabelWiseStatisticsInternally(statisticIndex, prediction, labelMatrix_, *this->statisticViewPtr_, - *scoreMatrixPtr_, *lossPtr_); - } - - /** - * @see `IStatistics::applyPrediction` - */ - void applyPrediction(uint32 statisticIndex, const PartialPrediction& prediction) override final { - applyPredictionInternally(statisticIndex, prediction, *scoreMatrixPtr_); - updateLabelWiseStatisticsInternally(statisticIndex, prediction, labelMatrix_, *this->statisticViewPtr_, - *scoreMatrixPtr_, *lossPtr_); - } - - /** - * @see `IStatistics::revertPrediction` - */ - void revertPrediction(uint32 statisticIndex, const CompletePrediction& prediction) override final { - revertPredictionInternally(statisticIndex, prediction, *scoreMatrixPtr_); - updateLabelWiseStatisticsInternally(statisticIndex, prediction, labelMatrix_, *this->statisticViewPtr_, - *scoreMatrixPtr_, *lossPtr_); - } - - /** - * @see `IStatistics::revertPrediction` - */ - void revertPrediction(uint32 statisticIndex, const PartialPrediction& prediction) override final { - revertPredictionInternally(statisticIndex, prediction, *scoreMatrixPtr_); - updateLabelWiseStatisticsInternally(statisticIndex, prediction, labelMatrix_, *this->statisticViewPtr_, - *scoreMatrixPtr_, *lossPtr_); - } - - /** - * @see `IStatistics::evaluatePrediction` - */ - float64 evaluatePrediction(uint32 statisticIndex) const override final { - return evaluationMeasurePtr_->evaluate(statisticIndex, labelMatrix_, *scoreMatrixPtr_); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset(const CompleteIndexVector& labelIndices, - const EqualWeightVector& weights) const override final { - return std::make_unique>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset(const PartialIndexVector& labelIndices, - const EqualWeightVector& weights) const override final { - return std::make_unique>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset(const CompleteIndexVector& labelIndices, - const BitWeightVector& weights) const override final { - return std::make_unique>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset(const PartialIndexVector& labelIndices, - const BitWeightVector& weights) const override final { - return std::make_unique>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, const DenseWeightVector& weights) const override final { - return std::make_unique, CompleteIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, const DenseWeightVector& weights) const override final { - return std::make_unique, PartialIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, - const OutOfSampleWeightVector& weights) const override final { - return std::make_unique< - LabelWiseStatisticsSubset, CompleteIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, - const OutOfSampleWeightVector& weights) const override final { - return std::make_unique< - LabelWiseStatisticsSubset, PartialIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, - const OutOfSampleWeightVector& weights) const override final { - return std::make_unique< - LabelWiseStatisticsSubset, CompleteIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, - const OutOfSampleWeightVector& weights) const override final { - return std::make_unique< - LabelWiseStatisticsSubset, PartialIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, - const OutOfSampleWeightVector>& weights) const override final { - return std::make_unique< - LabelWiseStatisticsSubset>, CompleteIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createSubset` - */ - std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, - const OutOfSampleWeightVector>& weights) const override final { - return std::make_unique< - LabelWiseStatisticsSubset>, PartialIndexVector>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights, labelIndices); - } - - /** - * @see `IStatistics::createWeightedStatistics` - */ - std::unique_ptr createWeightedStatistics( - const EqualWeightVector& weights) const override final { - return std::make_unique>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights); - } - - /** - * @see `IStatistics::createWeightedStatistics` - */ - std::unique_ptr createWeightedStatistics( - const BitWeightVector& weights) const override final { - return std::make_unique>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights); - } - - /** - * @see `IStatistics::createWeightedStatistics` - */ - std::unique_ptr createWeightedStatistics( - const DenseWeightVector& weights) const override final { - return std::make_unique>>( - *statisticViewPtr_, *ruleEvaluationFactory_, weights); - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistics_label_wise_dense.hpp b/cpp/subprojects/boosting/src/boosting/statistics/statistics_label_wise_dense.hpp deleted file mode 100644 index 66024d33..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistics_label_wise_dense.hpp +++ /dev/null @@ -1,87 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/data/matrix_c_contiguous_numeric.hpp" -#include "boosting/data/statistic_vector_label_wise_dense.hpp" -#include "boosting/data/statistic_view_label_wise_dense.hpp" -#include "boosting/losses/loss_label_wise.hpp" -#include "common/measures/measure_evaluation.hpp" -#include "statistics_label_wise_common.hpp" - -#include - -namespace boosting { - - /** - * A matrix that stores gradients and Hessians that have been calculated using a label-wise decomposable loss - * function using C-contiguous arrays. - */ - class DenseLabelWiseStatisticMatrix final : public DenseLabelWiseStatisticView { - public: - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - */ - DenseLabelWiseStatisticMatrix(uint32 numRows, uint32 numCols) - : DenseLabelWiseStatisticView(numRows, numCols, - (Tuple*) malloc(numRows * numCols * sizeof(Tuple))) {} - - ~DenseLabelWiseStatisticMatrix() override { - free(statistics_); - } - }; - - /** - * Provides access to gradients and Hessians that have been calculated according to a differentiable loss function - * that is applied label-wise and are stored using dense data structures. - * - * @tparam LabelMatrix The type of the matrix that provides access to the labels of the training examples - */ - template - class DenseLabelWiseStatistics final - : public AbstractLabelWiseStatistics, - ILabelWiseLoss, IEvaluationMeasure, ILabelWiseRuleEvaluationFactory> { - public: - - /** - * @param lossPtr An unique pointer to an object of type `ILabelWiseLoss` that implements the - * loss function that should be used for calculating gradients and Hessians - * @param evaluationMeasurePtr An unique pointer to an object of type `IEvaluationMeasure` that implements - * the evaluation measure that should be used to assess the quality of - * predictions for a specific statistic - * @param ruleEvaluationFactory A reference to an object of type `ILabelWiseRuleEvaluationFactory`, that - * allows to create instances of the class that is used for calculating the - * predictions of rules, as well as their overall quality - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides access - * to the labels of the training examples - * @param statisticViewPtr An unique pointer to an object of type `DenseLabelWiseStatisticView` that - * provides access to the gradients and Hessians - * @param scoreMatrixPtr An unique pointer to an object of type `NumericCContiguousMatrix` that - * stores the currently predicted scores - */ - DenseLabelWiseStatistics(std::unique_ptr lossPtr, - std::unique_ptr evaluationMeasurePtr, - const ILabelWiseRuleEvaluationFactory& ruleEvaluationFactory, - const LabelMatrix& labelMatrix, - std::unique_ptr statisticViewPtr, - std::unique_ptr> scoreMatrixPtr) - : AbstractLabelWiseStatistics, - ILabelWiseLoss, IEvaluationMeasure, ILabelWiseRuleEvaluationFactory>( - std::move(lossPtr), std::move(evaluationMeasurePtr), ruleEvaluationFactory, labelMatrix, - std::move(statisticViewPtr), std::move(scoreMatrixPtr)) {} - - /** - * @see `IBoostingStatistics::visitScoreMatrix` - */ - void visitScoreMatrix(IBoostingStatistics::DenseScoreMatrixVisitor denseVisitor, - IBoostingStatistics::SparseScoreMatrixVisitor sparseVisitor) const override { - denseVisitor(*this->scoreMatrixPtr_); - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_example_wise.hpp b/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_example_wise.hpp deleted file mode 100644 index 55fa996e..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_example_wise.hpp +++ /dev/null @@ -1,166 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/statistics/statistics_example_wise.hpp" -#include "boosting/statistics/statistics_label_wise.hpp" -#include "common/statistics/statistics_provider.hpp" - -namespace boosting { - - /** - * Provides access to an object of type `IExampleWiseStatistics`. - * - * @tparam LabelWiseRuleEvaluationFactory The type of the classes that may be used for calculating the label-wise - * predictions of rules, as well as their overall quality - * @tparam ExampleWiseRuleEvaluationFactory The type of the classes that may be used for calculating the - * example-wise predictions of rules, as well as their overall quality - */ - template - class ExampleWiseStatisticsProvider final : public IStatisticsProvider { - private: - - typedef IExampleWiseStatistics - ExampleWiseStatistics; - - const ExampleWiseRuleEvaluationFactory& regularRuleEvaluationFactory_; - - const ExampleWiseRuleEvaluationFactory& pruningRuleEvaluationFactory_; - - const std::unique_ptr statisticsPtr_; - - public: - - /** - * @param regularRuleEvaluationFactory A reference to an object of template type - * `ExampleWiseRuleEvaluationFactory` to switch to when invoking the - * function `switchToRegularRuleEvaluation` - * @param pruningRuleEvaluationFactory A reference to an object of template type - * `ExampleWiseRuleEvaluationFactory` to switch to when invoking the - * function `switchToPruningRuleEvaluation` - * @param statisticsPtr An unique pointer to an object of type `IExampleWiseStatistics` to - * provide access to - */ - ExampleWiseStatisticsProvider(const ExampleWiseRuleEvaluationFactory& regularRuleEvaluationFactory, - const ExampleWiseRuleEvaluationFactory& pruningRuleEvaluationFactory, - std::unique_ptr statisticsPtr) - : regularRuleEvaluationFactory_(regularRuleEvaluationFactory), - pruningRuleEvaluationFactory_(pruningRuleEvaluationFactory), - statisticsPtr_(std::move(statisticsPtr)) {} - - /** - * @see `IStatisticsProvider::get` - */ - IStatistics& get() const override { - return *statisticsPtr_; - } - - /** - * @see `IStatisticsProvider::switchToRegularRuleEvaluation` - */ - void switchToRegularRuleEvaluation() override { - statisticsPtr_->setRuleEvaluationFactory(regularRuleEvaluationFactory_); - } - - /** - * @see `IStatisticsProvider::switchToPruningRuleEvaluation` - */ - void switchToPruningRuleEvaluation() override { - statisticsPtr_->setRuleEvaluationFactory(pruningRuleEvaluationFactory_); - } - }; - - /** - * Provides access to an object of type `IExampleWiseStatistics` that can be converted into an object of type - * `ILabelWiseStatistics`. - * - * @tparam LabelWiseRuleEvaluationFactory The type of the classes that may be used for calculating the label-wise - * predictions of rules, as well as their overall quality - * @tparam ExampleWiseRuleEvaluationFactory The type of the classes that may be used for calculating the - * example-wise predictions of rules, as well as their overall quality - */ - template - class ConvertibleExampleWiseStatisticsProvider final : public IStatisticsProvider { - private: - - typedef IExampleWiseStatistics - ExampleWiseStatistics; - - const LabelWiseRuleEvaluationFactory& regularRuleEvaluationFactory_; - - const LabelWiseRuleEvaluationFactory& pruningRuleEvaluationFactory_; - - std::unique_ptr exampleWiseStatisticsPtr_; - - std::unique_ptr> labelWiseStatisticsPtr_; - - const uint32 numThreads_; - - public: - - /** - * @param regularRuleEvaluationFactory A reference to an object of template type - * `LabelWiseRuleEvaluationFactory` to switch to when invoking the - * function `switchToRegularRuleEvaluation` - * @param pruningRuleEvaluationFactory A reference to an object of template type - * `LabelWiseRuleEvaluationFactory` to switch to when invoking the - * function `switchToPruningRuleEvaluation` - * @param statisticsPtr An unique pointer to an object of type `IExampleWiseStatistics` to - * provide access to - * @param numThreads The number of threads that should be used to convert the statistics - * for individual examples in parallel - */ - ConvertibleExampleWiseStatisticsProvider(const LabelWiseRuleEvaluationFactory& regularRuleEvaluationFactory, - const LabelWiseRuleEvaluationFactory& pruningRuleEvaluationFactory, - std::unique_ptr statisticsPtr, - uint32 numThreads) - : regularRuleEvaluationFactory_(regularRuleEvaluationFactory), - pruningRuleEvaluationFactory_(pruningRuleEvaluationFactory), - exampleWiseStatisticsPtr_(std::move(statisticsPtr)), numThreads_(numThreads) {} - - /** - * @see `IStatisticsProvider::get` - */ - IStatistics& get() const override { - ExampleWiseStatistics* exampleWiseStatistics = exampleWiseStatisticsPtr_.get(); - - if (exampleWiseStatistics) { - return *exampleWiseStatistics; - } else { - return *labelWiseStatisticsPtr_; - } - } - - /** - * @see `IStatisticsProvider::switchToRegularRuleEvaluation` - */ - void switchToRegularRuleEvaluation() override { - ExampleWiseStatistics* exampleWiseStatistics = exampleWiseStatisticsPtr_.get(); - - if (exampleWiseStatistics) { - labelWiseStatisticsPtr_ = - exampleWiseStatistics->toLabelWiseStatistics(regularRuleEvaluationFactory_, numThreads_); - exampleWiseStatisticsPtr_.reset(); - } else { - labelWiseStatisticsPtr_->setRuleEvaluationFactory(regularRuleEvaluationFactory_); - } - } - - /** - * @see `IStatisticsProvider::switchToPruningRuleEvaluation` - */ - void switchToPruningRuleEvaluation() override { - ExampleWiseStatistics* exampleWiseStatistics = exampleWiseStatisticsPtr_.get(); - - if (exampleWiseStatistics) { - labelWiseStatisticsPtr_ = - exampleWiseStatistics->toLabelWiseStatistics(pruningRuleEvaluationFactory_, numThreads_); - exampleWiseStatisticsPtr_.reset(); - } else { - labelWiseStatisticsPtr_->setRuleEvaluationFactory(pruningRuleEvaluationFactory_); - } - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_example_wise_dense.cpp b/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_example_wise_dense.cpp deleted file mode 100644 index a7059646..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_example_wise_dense.cpp +++ /dev/null @@ -1,228 +0,0 @@ -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "boosting/statistics/statistics_provider_example_wise_dense.hpp" - -#include "boosting/data/matrix_c_contiguous_numeric.hpp" -#include "boosting/data/statistic_vector_example_wise_dense.hpp" -#include "boosting/data/statistic_view_example_wise_dense.hpp" -#include "boosting/math/math.hpp" -#include "omp.h" -#include "statistics_example_wise_common.hpp" -#include "statistics_label_wise_dense.hpp" -#include "statistics_provider_example_wise.hpp" - -#include - -namespace boosting { - - /** - * A matrix that stores gradients and Hessians that have been calculated using a non-decomposable loss function - * using C-contiguous arrays. - */ - class DenseExampleWiseStatisticMatrix final : public DenseExampleWiseStatisticView { - public: - - /** - * @param numRows The number of rows in the matrix - * @param numGradients The number of gradients per row - */ - DenseExampleWiseStatisticMatrix(uint32 numRows, uint32 numGradients) - : DenseExampleWiseStatisticView( - numRows, numGradients, triangularNumber(numGradients), - (float64*) malloc(numRows * numGradients * sizeof(float64)), - (float64*) malloc(numRows * triangularNumber(numGradients) * sizeof(float64))) {} - - ~DenseExampleWiseStatisticMatrix() { - free(gradients_); - free(hessians_); - } - }; - - /** - * Provides access to gradients and Hessians that have been calculated according to a differentiable loss function - * that is applied example-wise and are stored using dense data structures. - * - * @tparam LabelMatrix The type of the matrix that provides access to the labels of the training examples - */ - template - class DenseExampleWiseStatistics final - : public AbstractExampleWiseStatistics, IExampleWiseLoss, IEvaluationMeasure, - IExampleWiseRuleEvaluationFactory, ILabelWiseRuleEvaluationFactory> { - public: - - /** - * @param lossPtr An unique pointer to an object of type `IExampleWiseLoss` that implements - * the loss function to be used for calculating gradients and Hessians - * @param evaluationMeasurePtr An unique pointer to an object of type `IEvaluationMeasure` that implements - * the evaluation measure that should be used to assess the quality of - * predictions - * @param ruleEvaluationFactory A reference to an object of type `IExampleWiseRuleEvaluationFactory`, to be - * used for calculating the predictions, as well as corresponding quality - * scores, of rules - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides access - * to the labels of the training examples - * @param statisticViewPtr An unique pointer to an object of type `DenseExampleWiseStatisticView` that - * provides access to the gradients and Hessians - * @param scoreMatrixPtr An unique pointer to an object of type `NumericCContiguousMatrix` that - * stores the currently predicted scores - */ - DenseExampleWiseStatistics(std::unique_ptr lossPtr, - std::unique_ptr evaluationMeasurePtr, - const IExampleWiseRuleEvaluationFactory& ruleEvaluationFactory, - const LabelMatrix& labelMatrix, - std::unique_ptr statisticViewPtr, - std::unique_ptr> scoreMatrixPtr) - : AbstractExampleWiseStatistics, IExampleWiseLoss, IEvaluationMeasure, - IExampleWiseRuleEvaluationFactory, ILabelWiseRuleEvaluationFactory>( - std::move(lossPtr), std::move(evaluationMeasurePtr), ruleEvaluationFactory, labelMatrix, - std::move(statisticViewPtr), std::move(scoreMatrixPtr)) {} - - /** - * @see `IBoostingStatistics::visitScoreMatrix` - */ - void visitScoreMatrix(IBoostingStatistics::DenseScoreMatrixVisitor denseVisitor, - IBoostingStatistics::SparseScoreMatrixVisitor sparseVisitor) const override { - denseVisitor(*this->scoreMatrixPtr_); - } - - /** - * @see `IExampleWiseStatistics::toLabelWiseStatistics` - */ - std::unique_ptr> toLabelWiseStatistics( - const ILabelWiseRuleEvaluationFactory& ruleEvaluationFactory, uint32 numThreads) override final { - uint32 numRows = this->statisticViewPtr_->getNumRows(); - uint32 numCols = this->statisticViewPtr_->getNumCols(); - std::unique_ptr labelWiseStatisticMatrixPtr = - std::make_unique(numRows, numCols); - DenseLabelWiseStatisticView* labelWiseStatisticMatrixRawPtr = labelWiseStatisticMatrixPtr.get(); - DenseExampleWiseStatisticView* exampleWiseStatisticViewRawPtr = this->statisticViewPtr_.get(); - -#pragma omp parallel for firstprivate(numRows) firstprivate(numCols) firstprivate(labelWiseStatisticMatrixRawPtr) \ - firstprivate(exampleWiseStatisticViewRawPtr) schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numRows; i++) { - DenseLabelWiseStatisticView::iterator iterator = labelWiseStatisticMatrixRawPtr->begin(i); - DenseExampleWiseStatisticView::gradient_const_iterator gradientIterator = - exampleWiseStatisticViewRawPtr->gradients_cbegin(i); - DenseExampleWiseStatisticView::hessian_diagonal_const_iterator hessianIterator = - exampleWiseStatisticViewRawPtr->hessians_diagonal_cbegin(i); - - for (uint32 j = 0; j < numCols; j++) { - Tuple& tuple = iterator[j]; - tuple.first = gradientIterator[j]; - tuple.second = hessianIterator[j]; - } - } - - return std::make_unique>( - std::move(this->lossPtr_), std::move(this->evaluationMeasurePtr_), ruleEvaluationFactory, - this->labelMatrix_, std::move(labelWiseStatisticMatrixPtr), std::move(this->scoreMatrixPtr_)); - } - }; - - template - static inline std::unique_ptr< - IExampleWiseStatistics> - createStatistics(const IExampleWiseLossFactory& lossFactory, - const IEvaluationMeasureFactory& evaluationMeasureFactory, - const IExampleWiseRuleEvaluationFactory& ruleEvaluationFactory, uint32 numThreads, - const LabelMatrix& labelMatrix) { - uint32 numExamples = labelMatrix.getNumRows(); - uint32 numLabels = labelMatrix.getNumCols(); - std::unique_ptr lossPtr = lossFactory.createExampleWiseLoss(); - std::unique_ptr evaluationMeasurePtr = evaluationMeasureFactory.createEvaluationMeasure(); - std::unique_ptr statisticMatrixPtr = - std::make_unique(numExamples, numLabels); - std::unique_ptr> scoreMatrixPtr = - std::make_unique>(numExamples, numLabels, true); - const IExampleWiseLoss* lossRawPtr = lossPtr.get(); - const LabelMatrix* labelMatrixPtr = &labelMatrix; - const CContiguousConstView* scoreMatrixRawPtr = scoreMatrixPtr.get(); - DenseExampleWiseStatisticMatrix* statisticMatrixRawPtr = statisticMatrixPtr.get(); - -#pragma omp parallel for firstprivate(numExamples) firstprivate(lossRawPtr) firstprivate(labelMatrixPtr) \ - firstprivate(scoreMatrixRawPtr) firstprivate(statisticMatrixRawPtr) schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numExamples; i++) { - lossRawPtr->updateExampleWiseStatistics(i, *labelMatrixPtr, *scoreMatrixRawPtr, *statisticMatrixRawPtr); - } - - return std::make_unique>( - std::move(lossPtr), std::move(evaluationMeasurePtr), ruleEvaluationFactory, labelMatrix, - std::move(statisticMatrixPtr), std::move(scoreMatrixPtr)); - } - - DenseExampleWiseStatisticsProviderFactory::DenseExampleWiseStatisticsProviderFactory( - std::unique_ptr lossFactoryPtr, - std::unique_ptr evaluationMeasureFactoryPtr, - std::unique_ptr defaultRuleEvaluationFactoryPtr, - std::unique_ptr regularRuleEvaluationFactoryPtr, - std::unique_ptr pruningRuleEvaluationFactoryPtr, uint32 numThreads) - : lossFactoryPtr_(std::move(lossFactoryPtr)), - evaluationMeasureFactoryPtr_(std::move(evaluationMeasureFactoryPtr)), - defaultRuleEvaluationFactoryPtr_(std::move(defaultRuleEvaluationFactoryPtr)), - regularRuleEvaluationFactoryPtr_(std::move(regularRuleEvaluationFactoryPtr)), - pruningRuleEvaluationFactoryPtr_(std::move(pruningRuleEvaluationFactoryPtr)), numThreads_(numThreads) {} - - std::unique_ptr DenseExampleWiseStatisticsProviderFactory::create( - const CContiguousConstView& labelMatrix) const { - std::unique_ptr> - statisticsPtr = createStatistics(*lossFactoryPtr_, *evaluationMeasureFactoryPtr_, - *defaultRuleEvaluationFactoryPtr_, numThreads_, labelMatrix); - return std::make_unique< - ExampleWiseStatisticsProvider>( - *regularRuleEvaluationFactoryPtr_, *pruningRuleEvaluationFactoryPtr_, std::move(statisticsPtr)); - } - - std::unique_ptr DenseExampleWiseStatisticsProviderFactory::create( - const BinaryCsrConstView& labelMatrix) const { - std::unique_ptr> - statisticsPtr = createStatistics(*lossFactoryPtr_, *evaluationMeasureFactoryPtr_, - *defaultRuleEvaluationFactoryPtr_, numThreads_, labelMatrix); - return std::make_unique< - ExampleWiseStatisticsProvider>( - *regularRuleEvaluationFactoryPtr_, *pruningRuleEvaluationFactoryPtr_, std::move(statisticsPtr)); - } - - DenseConvertibleExampleWiseStatisticsProviderFactory::DenseConvertibleExampleWiseStatisticsProviderFactory( - std::unique_ptr lossFactoryPtr, - std::unique_ptr evaluationMeasureFactoryPtr, - std::unique_ptr defaultRuleEvaluationFactoryPtr, - std::unique_ptr regularRuleEvaluationFactoryPtr, - std::unique_ptr pruningRuleEvaluationFactoryPtr, uint32 numThreads) - : lossFactoryPtr_(std::move(lossFactoryPtr)), - evaluationMeasureFactoryPtr_(std::move(evaluationMeasureFactoryPtr)), - defaultRuleEvaluationFactoryPtr_(std::move(defaultRuleEvaluationFactoryPtr)), - regularRuleEvaluationFactoryPtr_(std::move(regularRuleEvaluationFactoryPtr)), - pruningRuleEvaluationFactoryPtr_(std::move(pruningRuleEvaluationFactoryPtr)), numThreads_(numThreads) {} - - std::unique_ptr DenseConvertibleExampleWiseStatisticsProviderFactory::create( - const CContiguousConstView& labelMatrix) const { - std::unique_ptr> - statisticsPtr = createStatistics(*lossFactoryPtr_, *evaluationMeasureFactoryPtr_, - *defaultRuleEvaluationFactoryPtr_, numThreads_, labelMatrix); - return std::make_unique< - ConvertibleExampleWiseStatisticsProvider>( - *regularRuleEvaluationFactoryPtr_, *pruningRuleEvaluationFactoryPtr_, std::move(statisticsPtr), numThreads_); - } - - std::unique_ptr DenseConvertibleExampleWiseStatisticsProviderFactory::create( - const BinaryCsrConstView& labelMatrix) const { - std::unique_ptr> - statisticsPtr = createStatistics(*lossFactoryPtr_, *evaluationMeasureFactoryPtr_, - *defaultRuleEvaluationFactoryPtr_, numThreads_, labelMatrix); - return std::make_unique< - ConvertibleExampleWiseStatisticsProvider>( - *regularRuleEvaluationFactoryPtr_, *pruningRuleEvaluationFactoryPtr_, std::move(statisticsPtr), numThreads_); - } - -} - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise.hpp b/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise.hpp deleted file mode 100644 index c3484318..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "boosting/statistics/statistics_label_wise.hpp" -#include "common/statistics/statistics_provider.hpp" - -namespace boosting { - - /** - * Provides access to an object of type `ILabelWiseStatistics`. - * - * @tparam RuleEvaluationFactory The type of the classes that may be used for calculating the predictions of rules, - * as well as their overall quality - */ - template - class LabelWiseStatisticsProvider final : public IStatisticsProvider { - private: - - const RuleEvaluationFactory& regularRuleEvaluationFactory_; - - const RuleEvaluationFactory& pruningRuleEvaluationFactory_; - - const std::unique_ptr> statisticsPtr_; - - public: - - /** - * @param regularRuleEvaluationFactory A reference to an object of template type `RuleEvaluationFactory` to - * switch to when invoking the function `switchToRegularRuleEvaluation` - * @param pruningRuleEvaluationFactory A reference to an object of template type `RuleEvaluationFactory` to - * switch to when invoking the function `switchToPruningRuleEvaluation` - * @param statisticsPtr An unique pointer to an object of type `ILabelWiseStatistics` to - * provide access to - */ - LabelWiseStatisticsProvider(const RuleEvaluationFactory& regularRuleEvaluationFactory, - const RuleEvaluationFactory& pruningRuleEvaluationFactory, - std::unique_ptr> statisticsPtr) - : regularRuleEvaluationFactory_(regularRuleEvaluationFactory), - pruningRuleEvaluationFactory_(pruningRuleEvaluationFactory), - statisticsPtr_(std::move(statisticsPtr)) {} - - /** - * @see `IStatisticsProvider::get` - */ - IStatistics& get() const override { - return *statisticsPtr_; - } - - /** - * @see `IStatisticsProvider::switchToRegularRuleEvaluation` - */ - void switchToRegularRuleEvaluation() override { - statisticsPtr_->setRuleEvaluationFactory(regularRuleEvaluationFactory_); - } - - /** - * @see `IStatisticsProvider::switchToPruningRuleEvaluation` - */ - void switchToPruningRuleEvaluation() override { - statisticsPtr_->setRuleEvaluationFactory(pruningRuleEvaluationFactory_); - } - }; - -} diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise_dense.cpp b/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise_dense.cpp deleted file mode 100644 index 7c2a1777..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise_dense.cpp +++ /dev/null @@ -1,75 +0,0 @@ -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "boosting/statistics/statistics_provider_label_wise_dense.hpp" - -#include "omp.h" -#include "statistics_label_wise_dense.hpp" -#include "statistics_provider_label_wise.hpp" - -namespace boosting { - - template - static inline std::unique_ptr> createStatistics( - const ILabelWiseLossFactory& lossFactory, const IEvaluationMeasureFactory& evaluationMeasureFactory, - const ILabelWiseRuleEvaluationFactory& ruleEvaluationFactory, uint32 numThreads, const LabelMatrix& labelMatrix) { - uint32 numExamples = labelMatrix.getNumRows(); - uint32 numLabels = labelMatrix.getNumCols(); - std::unique_ptr lossPtr = lossFactory.createLabelWiseLoss(); - std::unique_ptr evaluationMeasurePtr = evaluationMeasureFactory.createEvaluationMeasure(); - std::unique_ptr statisticMatrixPtr = - std::make_unique(numExamples, numLabels); - std::unique_ptr> scoreMatrixPtr = - std::make_unique>(numExamples, numLabels, true); - const ILabelWiseLoss* lossRawPtr = lossPtr.get(); - const LabelMatrix* labelMatrixPtr = &labelMatrix; - const CContiguousConstView* scoreMatrixRawPtr = scoreMatrixPtr.get(); - DenseLabelWiseStatisticMatrix* statisticMatrixRawPtr = statisticMatrixPtr.get(); - -#pragma omp parallel for firstprivate(numExamples) firstprivate(lossRawPtr) firstprivate(labelMatrixPtr) \ - firstprivate(scoreMatrixRawPtr) firstprivate(statisticMatrixRawPtr) schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numExamples; i++) { - lossRawPtr->updateLabelWiseStatistics(i, *labelMatrixPtr, *scoreMatrixRawPtr, IndexIterator(), - IndexIterator(labelMatrixPtr->getNumCols()), *statisticMatrixRawPtr); - } - - return std::make_unique>( - std::move(lossPtr), std::move(evaluationMeasurePtr), ruleEvaluationFactory, labelMatrix, - std::move(statisticMatrixPtr), std::move(scoreMatrixPtr)); - } - - DenseLabelWiseStatisticsProviderFactory::DenseLabelWiseStatisticsProviderFactory( - std::unique_ptr lossFactoryPtr, - std::unique_ptr evaluationMeasureFactoryPtr, - std::unique_ptr defaultRuleEvaluationFactoryPtr, - std::unique_ptr regularRuleEvaluationFactoryPtr, - std::unique_ptr pruningRuleEvaluationFactoryPtr, uint32 numThreads) - : lossFactoryPtr_(std::move(lossFactoryPtr)), - evaluationMeasureFactoryPtr_(std::move(evaluationMeasureFactoryPtr)), - defaultRuleEvaluationFactoryPtr_(std::move(defaultRuleEvaluationFactoryPtr)), - regularRuleEvaluationFactoryPtr_(std::move(regularRuleEvaluationFactoryPtr)), - pruningRuleEvaluationFactoryPtr_(std::move(pruningRuleEvaluationFactoryPtr)), numThreads_(numThreads) {} - - std::unique_ptr DenseLabelWiseStatisticsProviderFactory::create( - const CContiguousConstView& labelMatrix) const { - std::unique_ptr> statisticsPtr = createStatistics( - *lossFactoryPtr_, *evaluationMeasureFactoryPtr_, *defaultRuleEvaluationFactoryPtr_, numThreads_, labelMatrix); - return std::make_unique>( - *regularRuleEvaluationFactoryPtr_, *pruningRuleEvaluationFactoryPtr_, std::move(statisticsPtr)); - } - - std::unique_ptr DenseLabelWiseStatisticsProviderFactory::create( - const BinaryCsrConstView& labelMatrix) const { - std::unique_ptr> statisticsPtr = createStatistics( - *lossFactoryPtr_, *evaluationMeasureFactoryPtr_, *defaultRuleEvaluationFactoryPtr_, numThreads_, labelMatrix); - return std::make_unique>( - *regularRuleEvaluationFactoryPtr_, *pruningRuleEvaluationFactoryPtr_, std::move(statisticsPtr)); - } - -} - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise_sparse.cpp b/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise_sparse.cpp deleted file mode 100644 index 76bc8147..00000000 --- a/cpp/subprojects/boosting/src/boosting/statistics/statistics_provider_label_wise_sparse.cpp +++ /dev/null @@ -1,170 +0,0 @@ -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "boosting/statistics/statistics_provider_label_wise_sparse.hpp" - -#include "boosting/data/histogram_view_label_wise_sparse.hpp" -#include "boosting/data/matrix_sparse_set_numeric.hpp" -#include "omp.h" -#include "statistics_label_wise_common.hpp" -#include "statistics_provider_label_wise.hpp" - -namespace boosting { - - /** - * A matrix that stores gradients and Hessians that have been calculated using a label-wise decomposable loss - * function in the list of lists (LIL) format. - */ - class SparseLabelWiseStatisticMatrix final : public SparseLabelWiseStatisticView { - public: - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - */ - SparseLabelWiseStatisticMatrix(uint32 numRows, uint32 numCols) - : SparseLabelWiseStatisticView(numCols, new SparseSetMatrix>(numRows, numCols)) {} - - ~SparseLabelWiseStatisticMatrix() override { - delete statistics_; - } - }; - - /** - * A histogram that stores gradients and Hessians that have been calculated using a label-wise decomposable loss - * function in the list of lists (LIL) format. - */ - class SparseLabelWiseHistogram final : public SparseLabelWiseHistogramView { - public: - - /** - * @param numBins The number of bins in the histogram - * @param numCols The number of columns in the histogram - */ - SparseLabelWiseHistogram(uint32 numBins, uint32 numCols) - : SparseLabelWiseHistogramView(numBins, numCols, new Triple[numBins * numCols], - new float64[numBins]) {} - - ~SparseLabelWiseHistogram() override { - delete[] statistics_; - delete[] weights_; - } - }; - - /** - * Provides access to gradients and Hessians that have been calculated according to a differentiable loss function - * that is applied label-wise and are stored using sparse data structures. - * - * @tparam LabelMatrix The type of the matrix that provides access to the labels of the training examples - */ - template - class SparseLabelWiseStatistics final - : public AbstractLabelWiseStatistics, - ISparseLabelWiseLoss, ISparseEvaluationMeasure, - ISparseLabelWiseRuleEvaluationFactory> { - public: - - /** - * @param lossPtr An unique pointer to an object of template type `LossFunction` that - * implements the loss function that should be used for calculating gradients - * and Hessians - * @param evaluationMeasurePtr An unique pointer to an object of type `ISparseEvaluationMeasure` that - * implements the evaluation measure that should be used to assess the quality - * of predictions for a specific statistic - * @param ruleEvaluationFactory A reference to an object of type `ISparseLabelWiseRuleEvaluationFactory`, - * that allows to create instances of the class that is used for calculating - * the predictions of rules, as well as their overall quality - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides access - * to the labels of the training examples - * @param statisticViewPtr An unique pointer to an object of type `SparseLabelWiseStatisticView` that - * provides access to the gradients and Hessians - * @param scoreMatrixPtr An unique pointer to an object of type `NumericSparseSetMatrix` that stores - * the currently predicted scores - */ - SparseLabelWiseStatistics(std::unique_ptr lossPtr, - std::unique_ptr evaluationMeasurePtr, - const ISparseLabelWiseRuleEvaluationFactory& ruleEvaluationFactory, - const LabelMatrix& labelMatrix, - std::unique_ptr statisticViewPtr, - std::unique_ptr> scoreMatrixPtr) - : AbstractLabelWiseStatistics, - ISparseLabelWiseLoss, ISparseEvaluationMeasure, - ISparseLabelWiseRuleEvaluationFactory>( - std::move(lossPtr), std::move(evaluationMeasurePtr), ruleEvaluationFactory, labelMatrix, - std::move(statisticViewPtr), std::move(scoreMatrixPtr)) {} - - /** - * @see `IBoostingStatistics::visitScoreMatrix` - */ - void visitScoreMatrix(IBoostingStatistics::DenseScoreMatrixVisitor denseVisitor, - IBoostingStatistics::SparseScoreMatrixVisitor sparseVisitor) const override { - sparseVisitor(*this->scoreMatrixPtr_); - } - }; - - template - static inline std::unique_ptr> createStatistics( - const ISparseLabelWiseLossFactory& lossFactory, const ISparseEvaluationMeasureFactory& evaluationMeasureFactory, - const ISparseLabelWiseRuleEvaluationFactory& ruleEvaluationFactory, uint32 numThreads, - const LabelMatrix& labelMatrix) { - uint32 numExamples = labelMatrix.getNumRows(); - uint32 numLabels = labelMatrix.getNumCols(); - std::unique_ptr lossPtr = lossFactory.createSparseLabelWiseLoss(); - std::unique_ptr evaluationMeasurePtr = - evaluationMeasureFactory.createSparseEvaluationMeasure(); - std::unique_ptr statisticMatrixPtr = - std::make_unique(numExamples, numLabels); - std::unique_ptr> scoreMatrixPtr = - std::make_unique>(numExamples, numLabels); - const ISparseLabelWiseLoss* lossRawPtr = lossPtr.get(); - const LabelMatrix* labelMatrixPtr = &labelMatrix; - const SparseSetMatrix* scoreMatrixRawPtr = scoreMatrixPtr.get(); - SparseLabelWiseStatisticMatrix* statisticMatrixRawPtr = statisticMatrixPtr.get(); - -#pragma omp parallel for firstprivate(numExamples) firstprivate(lossRawPtr) firstprivate(labelMatrixPtr) \ - firstprivate(scoreMatrixRawPtr) firstprivate(statisticMatrixRawPtr) schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numExamples; i++) { - lossRawPtr->updateLabelWiseStatistics(i, *labelMatrixPtr, *scoreMatrixRawPtr, IndexIterator(), - IndexIterator(labelMatrixPtr->getNumCols()), *statisticMatrixRawPtr); - } - - return std::make_unique>( - std::move(lossPtr), std::move(evaluationMeasurePtr), ruleEvaluationFactory, labelMatrix, - std::move(statisticMatrixPtr), std::move(scoreMatrixPtr)); - } - - SparseLabelWiseStatisticsProviderFactory::SparseLabelWiseStatisticsProviderFactory( - std::unique_ptr lossFactoryPtr, - std::unique_ptr evaluationMeasureFactoryPtr, - std::unique_ptr regularRuleEvaluationFactoryPtr, - std::unique_ptr pruningRuleEvaluationFactoryPtr, uint32 numThreads) - : lossFactoryPtr_(std::move(lossFactoryPtr)), - evaluationMeasureFactoryPtr_(std::move(evaluationMeasureFactoryPtr)), - regularRuleEvaluationFactoryPtr_(std::move(regularRuleEvaluationFactoryPtr)), - pruningRuleEvaluationFactoryPtr_(std::move(pruningRuleEvaluationFactoryPtr)), numThreads_(numThreads) {} - - std::unique_ptr SparseLabelWiseStatisticsProviderFactory::create( - const CContiguousConstView& labelMatrix) const { - std::unique_ptr> statisticsPtr = createStatistics( - *lossFactoryPtr_, *evaluationMeasureFactoryPtr_, *regularRuleEvaluationFactoryPtr_, numThreads_, labelMatrix); - return std::make_unique>( - *regularRuleEvaluationFactoryPtr_, *pruningRuleEvaluationFactoryPtr_, std::move(statisticsPtr)); - } - - std::unique_ptr SparseLabelWiseStatisticsProviderFactory::create( - const BinaryCsrConstView& labelMatrix) const { - std::unique_ptr> statisticsPtr = createStatistics( - *lossFactoryPtr_, *evaluationMeasureFactoryPtr_, *regularRuleEvaluationFactoryPtr_, numThreads_, labelMatrix); - return std::make_unique>( - *regularRuleEvaluationFactoryPtr_, *pruningRuleEvaluationFactoryPtr_, std::move(statisticsPtr)); - } - -} - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/common/include/common/binning/bin_index_vector.hpp b/cpp/subprojects/common/include/common/binning/bin_index_vector.hpp deleted file mode 100644 index 41fa27e3..00000000 --- a/cpp/subprojects/common/include/common/binning/bin_index_vector.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -// Forward declarations -class IHistogram; -class IWeightedStatistics; - -/** - * Defines an interface for all classes that provide access to the indices of the bins, individual examples have been - * assigned to. - */ -class IBinIndexVector { - public: - - /** - * The index of the bin that contains sparse values. - */ - static const uint32 BIN_INDEX_SPARSE = std::numeric_limits::max(); - - virtual ~IBinIndexVector() {}; - - /** - * Returns the index of the bin, the example at a specific index has been assigned to. - * - * @param exampleIndex The index of the example - * @return The index of the bin, the example has been assigned to - */ - virtual uint32 getBinIndex(uint32 exampleIndex) const = 0; - - /** - * Sets the index of the bin, the examples at a specific index should be assigned to. - * - * @param exampleIndex The index of the example - * @param binIndex The index of the bin, the example should be assigned to - */ - virtual void setBinIndex(uint32 exampleIndex, uint32 binIndex) = 0; - - /** - * Creates and returns a new histogram based on given statistics and the indices that are stored by this vector. - * - * @param statistics A reference to an object of type `IWeightedStatistics` that should be used - * @param numBins The number of bins in the histogram - * @return An unique pointer to an object of type `IHistogram` that has been created - */ - virtual std::unique_ptr createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/binning/bin_index_vector_dense.hpp b/cpp/subprojects/common/include/common/binning/bin_index_vector_dense.hpp deleted file mode 100644 index 28ba8dbc..00000000 --- a/cpp/subprojects/common/include/common/binning/bin_index_vector_dense.hpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/bin_index_vector.hpp" -#include "common/data/vector_dense.hpp" - -/** - * Stores the indices of the bins, individual examples have been assigned to, using a C-contiguous array. - */ -class DenseBinIndexVector final : public IBinIndexVector { - private: - - DenseVector vector_; - - public: - - /** - * @param numElements The number of elements in the vector - */ - DenseBinIndexVector(uint32 numElements); - - uint32 getBinIndex(uint32 exampleIndex) const override; - - void setBinIndex(uint32 exampleIndex, uint32 binIndex) override; - - std::unique_ptr createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const override; -}; diff --git a/cpp/subprojects/common/include/common/binning/bin_index_vector_dok.hpp b/cpp/subprojects/common/include/common/binning/bin_index_vector_dok.hpp deleted file mode 100644 index 84d05b8e..00000000 --- a/cpp/subprojects/common/include/common/binning/bin_index_vector_dok.hpp +++ /dev/null @@ -1,47 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/bin_index_vector.hpp" -#include "common/data/vector_dok.hpp" - -/** - * Stores the indices of the bins, individual examples have been assigned to, using the dictionaries of keys (DOK) - * format. - */ -class DokBinIndexVector final : public IBinIndexVector { - private: - - DokVector vector_; - - public: - - DokBinIndexVector(); - - /** - * An iterator that provides access to the elements in the vector. - */ - typedef DokVector::iterator iterator; - - /** - * Returns an `iterator` to the beginning of the vector. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the vector. - * - * @return An `iterator` to the end - */ - iterator end(); - - uint32 getBinIndex(uint32 exampleIndex) const override; - - void setBinIndex(uint32 exampleIndex, uint32 binIndex) override; - - std::unique_ptr createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const override; -}; diff --git a/cpp/subprojects/common/include/common/binning/bin_weight_vector.hpp b/cpp/subprojects/common/include/common/binning/bin_weight_vector.hpp deleted file mode 100644 index 7682af21..00000000 --- a/cpp/subprojects/common/include/common/binning/bin_weight_vector.hpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_dense.hpp" - -/** - * A vector that stores the weights of individual bins, i.e., how many examples have been assigned to them. - */ -class BinWeightVector final { - private: - - DenseVector vector_; - - public: - - /** - * @param numElements The number of elements in the vector - */ - BinWeightVector(uint32 numElements); - - /** - * Resets all weights to zero. - */ - void clear(); - - /** - * Increases the weight at a specific position by one. - * - * @param pos The position - */ - void increaseWeight(uint32 pos); - - /** - * Returns whether the weight at a specific position is non-zero or not. - * - * @param pos The position - * @return True, if the weight is non-zero, false otherwise - */ - bool operator[](uint32 pos) const; - - /** - * Returns the number of elements in the vector. - * - * @return The number of elements - */ - uint32 getNumElements() const; -}; diff --git a/cpp/subprojects/common/include/common/binning/feature_binning.hpp b/cpp/subprojects/common/include/common/binning/feature_binning.hpp deleted file mode 100644 index e5b55b7e..00000000 --- a/cpp/subprojects/common/include/common/binning/feature_binning.hpp +++ /dev/null @@ -1,92 +0,0 @@ -/* - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/bin_index_vector.hpp" -#include "common/binning/threshold_vector.hpp" -#include "common/input/feature_matrix.hpp" -#include "common/input/feature_vector.hpp" -#include "common/input/label_matrix.hpp" -#include "common/thresholds/thresholds.hpp" - -#include - -/** - * Defines an interface for methods that assign feature values to bins. - */ -class IFeatureBinning { - public: - - /** - * The result that is returned by a binning method. It contains an unique pointer to a vector that stores the - * thresholds that result from the boundaries of the bins, as well as to a vector that stores the indices of the - * bins, individual values have been assigned to. - */ - struct Result final { - public: - - /** - * An unique pointer to an object of type `ThresholdVector` that provides access to the thresholds that - * result from the boundaries of the bins. - */ - std::unique_ptr thresholdVectorPtr; - - /** - * An unique pointer to an object of type `IBinIndexVector` that provides access to the indices of the - * bins, individual values have been assigned to. - */ - std::unique_ptr binIndicesPtr; - }; - - virtual ~IFeatureBinning() {}; - - /** - * Assigns the values in a given `FeatureVector` to bins. - * - * @param featureVector A reference to an object of type `FeatureVector` whose values should be assigned to bins - * @param numExamples The total number of available training examples - * @return An object of type `Result` that contains a vector, which stores thresholds that result - * from the boundaries between the bins, as well as a vector that stores the indices of the - * bins, individual values have been assigned to - */ - virtual Result createBins(FeatureVector& featureVector, uint32 numExamples) const = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IFeatureBinning`. - */ -class IFeatureBinningFactory { - public: - - virtual ~IFeatureBinningFactory() {}; - - /** - * Creates and returns a new object of type `IFeatureBinning`. - * - * @return An unique pointer to an object of type `IFeatureBinning` that has been created - */ - virtual std::unique_ptr create() const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a method that assigns feature values to bins. - */ -class IFeatureBinningConfig { - public: - - virtual ~IFeatureBinningConfig() {}; - - /** - * Creates and returns a new object of type `IThresholdsFactory` according to the specified configuration. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the feature - * values of the training examples - * @param labelMatrix A reference to an object of type `ILabelMatrix` that provides access to the labels of - * the training examples - * @return An unique pointer to an object of type `IThresholdsFactory` that has been created - */ - virtual std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/binning/feature_binning_equal_frequency.hpp b/cpp/subprojects/common/include/common/binning/feature_binning_equal_frequency.hpp deleted file mode 100644 index 5855664f..00000000 --- a/cpp/subprojects/common/include/common/binning/feature_binning_equal_frequency.hpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/feature_binning.hpp" -#include "common/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" - -/** - * Defines an interface for all classes that allow to configure a method that assigns numerical feature values to bins, - * such that each bins contains approximately the same number of values. - */ -class MLRLCOMMON_API IEqualFrequencyFeatureBinningConfig { - public: - - virtual ~IEqualFrequencyFeatureBinningConfig() {}; - - /** - * Returns the percentage that specifies how many bins are used. - * - * @return The percentage that specifies how many bins are used - */ - virtual float32 getBinRatio() const = 0; - - /** - * Sets the percentage that specifies how many bins should be used. - * - * @param binRatio The percentage that specifies how many bins should be used, e.g., if 100 values are - * available, a percentage of 0.5 means that `ceil(0.5 * 100) = 50` bins should be used. Must - * be in (0, 1) - * @return A reference to an object of type `IEqualFrequencyFeatureBinningConfig` that allows further - * configuration of the method that assigns numerical feature values to bins - */ - virtual IEqualFrequencyFeatureBinningConfig& setBinRatio(float32 binRatio) = 0; - - /** - * Returns the minimum number of bins that is used. - * - * @return The minimum number of bins that is used - */ - virtual uint32 getMinBins() const = 0; - - /** - * Sets the minimum number of bins that should be used. - * - * @param minBins The minimum number of bins that should be used. Must be at least 2 - * @return A reference to an object of type `IEqualFrequencyFeatureBinningConfig` that allows further - * configuration of the method that assigns numerical feature values to bins - */ - virtual IEqualFrequencyFeatureBinningConfig& setMinBins(uint32 minBins) = 0; - - /** - * Returns the maximum number of bins that is used. - * - * @return The maximum number of bins that is used - */ - virtual uint32 getMaxBins() const = 0; - - /** - * Sets the maximum number of bins that should be used. - * - * @param maxBins The maximum number of bins that should be used. Must be at least the minimum number of bins - * or 0, if the maximum number of bins should not be restricted - * @return A reference to an object of type `IEqualFrequencyFeatureBinningConfig` that allows further - * configuration of the method that assigns numerical feature values to bins - */ - virtual IEqualFrequencyFeatureBinningConfig& setMaxBins(uint32 maxBins) = 0; -}; - -/** - * Allows to configure a method that assigns numerical feature values to bins, such that each bins contains - * approximately the same number of values. - */ -class EqualFrequencyFeatureBinningConfig final : public IFeatureBinningConfig, - public IEqualFrequencyFeatureBinningConfig { - private: - - float32 binRatio_; - - uint32 minBins_; - - uint32 maxBins_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - EqualFrequencyFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr); - - float32 getBinRatio() const override; - - IEqualFrequencyFeatureBinningConfig& setBinRatio(float32 binRatio) override; - - uint32 getMinBins() const override; - - IEqualFrequencyFeatureBinningConfig& setMinBins(uint32 minBins) override; - - uint32 getMaxBins() const override; - - IEqualFrequencyFeatureBinningConfig& setMaxBins(uint32 maxBins) override; - - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/binning/feature_binning_equal_width.hpp b/cpp/subprojects/common/include/common/binning/feature_binning_equal_width.hpp deleted file mode 100644 index 2c48c9fd..00000000 --- a/cpp/subprojects/common/include/common/binning/feature_binning_equal_width.hpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/feature_binning.hpp" -#include "common/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" - -/** - * Defines an interface for all classes that allow to configure a method that assigns numerical feature values to bins, - * such that each bin contains values from equally sized value ranges. - */ -class MLRLCOMMON_API IEqualWidthFeatureBinningConfig { - public: - - virtual ~IEqualWidthFeatureBinningConfig() {}; - - /** - * Returns the percentage that specifies how many bins are used. - * - * @return The percentage that specifies how many bins are used - */ - virtual float32 getBinRatio() const = 0; - - /** - * Sets the percentage that specifies how many bins should be used. - * - * @param binRatio The percentage that specifies how many bins should be used, e.g., if 100 values are - * available, a percentage of 0.5 means that `ceil(0.5 * 100) = 50` bins should be used. Must - * be in (0, 1) - * @return A reference to an object of type `IEqualWidthFeatureBinningConfig` that allows further - * configuration of the method that assigns numerical feature values to bins - */ - virtual IEqualWidthFeatureBinningConfig& setBinRatio(float32 binRatio) = 0; - - /** - * Returns the minimum number of bins that is used. - * - * @return The minimum number of bins that is used - */ - virtual uint32 getMinBins() const = 0; - - /** - * Sets the minimum number of bins that should be used. - * - * @param minBins The minimum number of bins that should be used. Must be at least 2 - * @return A reference to an object of type `IEqualWidthFeatureBinningConfig` that allows further - * configuration of the method that assigns numerical feature values to bins - */ - virtual IEqualWidthFeatureBinningConfig& setMinBins(uint32 minBins) = 0; - - /** - * Returns the maximum number of bins that is used. - * - * @return The maximum number of bins that is used - */ - virtual uint32 getMaxBins() const = 0; - - /** - * Sets the maximum number of bins that should be used. - * - * @param maxBins The maximum number of bins that should be used. Must be at least the minimum number of bins - * or 0, if the maximum number of bins should not be restricted - * @return A reference to an object of type `IEqualWidthFeatureBinningConfig` that allows further - * configuration of the method that assigns numerical feature values to bins - */ - virtual IEqualWidthFeatureBinningConfig& setMaxBins(uint32 maxBins) = 0; -}; - -/** - * Allows to configure a method that assigns numerical feature values to bins, such that each bin contains values from - * equally sized value ranges. - */ -class EqualWidthFeatureBinningConfig final : public IFeatureBinningConfig, - public IEqualWidthFeatureBinningConfig { - private: - - float32 binRatio_; - - uint32 minBins_; - - uint32 maxBins_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - EqualWidthFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr); - - float32 getBinRatio() const override; - - IEqualWidthFeatureBinningConfig& setBinRatio(float32 binRatio) override; - - uint32 getMinBins() const override; - - IEqualWidthFeatureBinningConfig& setMinBins(uint32 minBins) override; - - uint32 getMaxBins() const override; - - IEqualWidthFeatureBinningConfig& setMaxBins(uint32 maxBins) override; - - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/binning/feature_binning_no.hpp b/cpp/subprojects/common/include/common/binning/feature_binning_no.hpp deleted file mode 100644 index 793f7f75..00000000 --- a/cpp/subprojects/common/include/common/binning/feature_binning_no.hpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/feature_binning.hpp" -#include "common/multi_threading/multi_threading.hpp" - -/** - * Allows to configure a method that does not actually perform any feature binning. - */ -class NoFeatureBinningConfig final : public IFeatureBinningConfig { - private: - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel update of - * statistics - */ - NoFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr); - - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/binning/threshold_vector.hpp b/cpp/subprojects/common/include/common/binning/threshold_vector.hpp deleted file mode 100644 index 1d830581..00000000 --- a/cpp/subprojects/common/include/common/binning/threshold_vector.hpp +++ /dev/null @@ -1,103 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_dense.hpp" -#include "common/input/missing_feature_vector.hpp" - -/** - * An one-dimensional vector that stores thresholds that may be used by conditions. - */ -class ThresholdVector final : public MissingFeatureVector { - private: - - DenseVector vector_; - - uint32 sparseBinIndex_; - - public: - - /** - * @param missingFeatureVector A reference to an object of type `MissingFeatureVector` the missing indices - * should be taken from - * @param numElements The number of elements in the vector - */ - ThresholdVector(MissingFeatureVector& missingFeatureVector, uint32 numElements); - - /** - * @param missingFeatureVector A reference to an object of type `MissingFeatureVector` the missing indices - * should be taken from - * @param numElements The number of elements in the vector - * @param init True, if all elements in the vector should be value-initialized, false otherwise - */ - ThresholdVector(MissingFeatureVector& missingFeatureVector, uint32 numElements, bool init); - - /** - * An iterator that provides access to the thresholds in the vector and allows to modify them. - */ - typedef DenseVector::iterator iterator; - - /** - * An iterator that provides read-only access to the thresholds in the vector. - */ - typedef const DenseVector::const_iterator const_iterator; - - /** - * Returns an `iterator` to the beginning of the vector. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the vector. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns a `const_iterator` to the beginning of the vector. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the vector. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the number of elements in the vector. - * - * @return The number of elements in the vector - */ - uint32 getNumElements() const; - - /** - * Sets the number of elements in the vector. - * - * @param numElements The number of elements to be set - * @param freeMemory True, if unused memory should be freed, if possible, false otherwise - */ - void setNumElements(uint32 numElements, bool freeMemory); - - /** - * Returns the index of the bin, sparse values have been assigned to. - * - * @return The index of the bin, sparse values have been assigned to. If there is no such bin, the returned - * index is equal to `getNumElements()` - */ - uint32 getSparseBinIndex() const; - - /** - * Sets the index of the bin, sparse values have been assigned to. - * - * @param sparseBinIndex The index to be set - */ - void setSparseBinIndex(uint32 sparseBinIndex); -}; diff --git a/cpp/subprojects/common/include/common/data/arrays.hpp b/cpp/subprojects/common/include/common/data/arrays.hpp deleted file mode 100644 index 27df41e9..00000000 --- a/cpp/subprojects/common/include/common/data/arrays.hpp +++ /dev/null @@ -1,164 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include -#include - -/** - * Sets all elements in an array to zero. - * - * @tparam T The type of the array - * @param a A pointer to an array of template type `T` - * @param numElements The number of elements in the array - */ -template -static inline void setArrayToZeros(T* a, uint32 numElements) { - std::fill(a, a + numElements, 0); -} - -/** - * Sets all elements in an array to a specific value. - * - * @tparam T The type of the array - * @param a A pointer to an array of template type `T` - * @param numElements The number of elements in the array - * @param value The value to be set - */ -template -static inline void setArrayToValue(T* a, uint32 numElements, T value) { - std::fill(a, a + numElements, value); -} - -/** - * Sets the elements in an array to increasing values. - * - * @tparam T The type of the array - * @param a A pointer to an array of template type `T` - * @param numElements The number of elements in the array - * @param start The value to start at - * @param increment The difference between the values - */ -template -static inline void setArrayToIncreasingValues(T* a, uint32 numElements, T start, T increment) { - T nextValue = start; - - for (uint32 i = 0; i < numElements; i++) { - a[i] = nextValue; - nextValue += increment; - } -} - -/** - * Copy all elements from one array another one. - * - * @tparam T The type of the arrays - * @param from A pointer to an array of template type `T` to be copied - * @param to A pointer to an array of template type `T`, the elements should be copied to - * @param numElements The number of elements to be copied - */ -template -static inline void copyArray(const T* from, T* to, uint32 numElements) { - for (uint32 i = 0; i < numElements; i++) { - to[i] = from[i]; - } -} - -/** - * Copy all elements from an iterator to an array. - * - * @tparam FromIterator The type of the iterator to copy from - * @tparam T The type of the array to copy to - * @param from The iterator to copy from - * @param to The array to copy to - * @param numElements The number of elements to be copied - */ -template -static inline void copyArray(FromIterator from, T* to, uint32 numElements) { - for (uint32 i = 0; i < numElements; i++) { - to[i] = from[i]; - } -} - -/** - * Sets all elements in an array `a` to the difference between the elements in two other arrays `b` and `c`, such that - * `a = b - c`. - * - * @tparam T The type of the arrays `a`, `b` and `c` - * @param a A pointer to an array of template type `T` to be updated - * @param b A pointer to an array of template type `T` - * @param c A pointer to an array of template type `T` - * @param numElements The number of elements in the arrays `a`, `b` and `c` - */ -template -static inline void setArrayToDifference(T* a, const T* b, const T* c, uint32 numElements) { - for (uint32 i = 0; i < numElements; i++) { - a[i] = b[i] - c[i]; - } -} - -/** - * Sets all elements in an array `a` to the difference between the elements in two other array `b` and `c`, such that - * `a = b - c`. The indices of elements in the array `b` that correspond to the elements in arrays `a` and `c` are given - * as an additional array. - * - * @tparam T The type of the arrays `a`, `b` and `c` - * @param a A pointer to an array of template type `T` to be updated - * @param b A pointer to an array of template type `T` - * @param c A pointer to an array of template type `T` - * @param indices A pointer to an array of type `uint32` that stores the indices of the elements in the array `b` - * that correspond to the elements in arrays `a` and `c` - * @param numElements The number of elements in the array `a` - */ -template -static inline void setArrayToDifference(T* a, const T* b, const T* c, const uint32* indices, uint32 numElements) { - for (uint32 i = 0; i < numElements; i++) { - uint32 index = indices[i]; - a[i] = b[index] - c[i]; - } -} - -/** - * Calculates and returns a hash value from an array of type `uint32`. - * - * @param a A pointer to an array of type `uint32` - * @param numElements The number of elements in the array - * @return The hash value - */ -static inline constexpr std::size_t hashArray(const uint32* a, uint32 numElements) { - std::size_t hashValue = (std::size_t) numElements; - - for (uint32 i = 0; i < numElements; i++) { - hashValue ^= a[i] + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); - } - - return hashValue; -} - -/** - * Returns whether two arrays are equal or not. - * - * @tparam T The type of the arrays - * @param first A pointer to an array of template type `T` - * @param numFirst The number of elements in the array `first` - * @param second A pointer to another array of template type `T` - * @param numSecond The number of elements in the array `second` - * @return True, if both arrays are equal, false otherwise - */ -template -static inline constexpr bool compareArrays(const T* first, uint32 numFirst, const T* second, uint32 numSecond) { - if (numFirst != numSecond) { - return false; - } - - for (uint32 i = 0; i < numFirst; i++) { - if (first[i] != second[i]) { - return false; - } - } - - return true; -} diff --git a/cpp/subprojects/common/include/common/data/indexed_value.hpp b/cpp/subprojects/common/include/common/data/indexed_value.hpp deleted file mode 100644 index c13058a1..00000000 --- a/cpp/subprojects/common/include/common/data/indexed_value.hpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -/** - * A tuple that consists of an index and a value. - * - * @tparam T The type of the value - */ -template -struct IndexedValue final { - public: - - /** - * Allows to compare two objects of type `IndexedValue` by their index. - */ - struct CompareIndex final { - public: - - /** - * Returns whether the a given object of type `IndexedValue` should go before a second one. - * - * @param lhs A reference to a first object of type `IndexedValue` - * @param rhs A reference to a second object of type `IndexedValue` - * @return True, if the first object should go before the second one, false otherwise - */ - inline bool operator()(const IndexedValue& lhs, const IndexedValue& rhs) const { - return lhs.index < rhs.index; - } - }; - - /** - * Allows to compare two objects of type `IndexedValue` by their value. - */ - struct CompareValue final { - public: - - /** - * Returns whether the a given object of type `IndexedValue` should go before a second one. - * - * @param lhs A reference to a first object of type `IndexedValue` - * @param rhs A reference to a second object of type `IndexedValue` - * @return True, if the first object should go before the second one, false otherwise - */ - inline bool operator()(const IndexedValue& lhs, const IndexedValue& rhs) const { - return lhs.value < rhs.value; - } - }; - - IndexedValue() {} - - /** - * @param i The index - */ - IndexedValue(uint32 i) : index(i) {} - - /** - * @param i The index - * @param v The value - */ - IndexedValue(uint32 i, T v) : index(i), value(v) {} - - /** - * The index. - */ - uint32 index; - - /** - * The value. - */ - T value; -}; diff --git a/cpp/subprojects/common/include/common/data/list_of_lists.hpp b/cpp/subprojects/common/include/common/data/list_of_lists.hpp deleted file mode 100644 index e6e21cb2..00000000 --- a/cpp/subprojects/common/include/common/data/list_of_lists.hpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -/** - * A two-dimensional matrix that provides row-wise access to data stored in a lists of lists. - * - * @tparam T The type of the data that is stored by the matrix - */ -template -class ListOfLists final { - private: - - const uint32 numRows_; - - std::vector* array_; - - public: - - /** - * @param numRows The number of rows in the matrix - */ - ListOfLists(uint32 numRows); - - ~ListOfLists(); - - /** - * Provides access to a row and allows to modify its elements. - */ - typedef typename std::vector& row; - - /** - * Provides read-only access to a row. - */ - typedef const typename std::vector& const_row; - - /** - * An iterator that provides access to the elements at a row and allows to modify them. - */ - typedef typename std::vector::iterator iterator; - - /** - * An iterator that provides read-only access to the elements at a row. - */ - typedef typename std::vector::const_iterator const_iterator; - - /** - * Returns an `iterator` to the beginning of a specific row. - * - * @param row The row - * @return An `iterator` to the beginning - */ - iterator begin(uint32 row); - - /** - * Returns an `iterator` to the end of a specific row. - * - * @param row The row - * @return An `iterator` to the end - */ - iterator end(uint32 row); - - /** - * Returns a `const_iterator` to the beginning of a specific row. - * - * @param row The row - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin(uint32 row) const; - - /** - * Returns a `const_iterator` to the end of a specific row. - * - * @param row The row - * @return A `const_iterator` to the end - */ - const_iterator cend(uint32 row) const; - - /** - * Provides access to a specific row and allows to modify its elements. - * - * @param row The index of the row - * @return A `row` - */ - row operator[](uint32 row); - - /** - * Provides read-only access to a specific row. - * - * @param row The index of the row - * @return A `const_row` - */ - const_row operator[](uint32 row) const; - - /** - * Returns the number of rows in the matrix. - * - * @return The number of rows - */ - uint32 getNumRows() const; - - /** - * Sets the values of all elements to zero. - */ - void clear(); -}; diff --git a/cpp/subprojects/common/include/common/data/matrix_c_contiguous.hpp b/cpp/subprojects/common/include/common/data/matrix_c_contiguous.hpp deleted file mode 100644 index 308a6206..00000000 --- a/cpp/subprojects/common/include/common/data/matrix_c_contiguous.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_c_contiguous.hpp" - -/** - * A two-dimensional matrix that provides random access to a fixed number of elements stored in a C-contiguous array. - * - * @tparam T The type of the data that is stored in the matrix - */ -template -class CContiguousMatrix : public CContiguousView { - public: - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - */ - CContiguousMatrix(uint32 numRows, uint32 numCols); - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - * @param init True, if all elements in the matrix should be value-initialized, false otherwise - */ - CContiguousMatrix(uint32 numRows, uint32 numCols, bool init); - - virtual ~CContiguousMatrix() override; -}; diff --git a/cpp/subprojects/common/include/common/data/matrix_lil.hpp b/cpp/subprojects/common/include/common/data/matrix_lil.hpp deleted file mode 100644 index a47691b8..00000000 --- a/cpp/subprojects/common/include/common/data/matrix_lil.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/indexed_value.hpp" -#include "common/data/list_of_lists.hpp" - -/** - * A two-dimensional matrix that provides row-wise access to data that is stored in the list of lists (LIL) format. - * - * @tparam T The type of the data that is stored by the matrix - */ -template -using LilMatrix = ListOfLists>; diff --git a/cpp/subprojects/common/include/common/data/matrix_lil_binary.hpp b/cpp/subprojects/common/include/common/data/matrix_lil_binary.hpp deleted file mode 100644 index a2c07c2f..00000000 --- a/cpp/subprojects/common/include/common/data/matrix_lil_binary.hpp +++ /dev/null @@ -1,12 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/list_of_lists.hpp" - -/** - * A two-dimensional matrix that provides row-wise access to binary values that are stored in the list of lists (LIL) - * format. - */ -typedef ListOfLists BinaryLilMatrix; diff --git a/cpp/subprojects/common/include/common/data/matrix_sparse_set.hpp b/cpp/subprojects/common/include/common/data/matrix_sparse_set.hpp deleted file mode 100644 index 017d926d..00000000 --- a/cpp/subprojects/common/include/common/data/matrix_sparse_set.hpp +++ /dev/null @@ -1,274 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/matrix_c_contiguous.hpp" -#include "common/data/matrix_lil.hpp" - -/** - * A two-dimensional matrix that provides row-wise access to data that is stored in the list of lists (LIL) format. In - * contrast to a `LilMatrix`, this matrix does also provide random access to its elements. This additional functionality - * comes at the expense of memory efficiency, as it requires to not only maintain a sparse matrix that stores the - * non-zero elements, but also a dense matrix that stores for each element the corresponding position in the sparse - * matrix, if available. - * - * The data structure that is used for the representation of a single row is often referred to as an "unordered sparse - * set". It was originally proposed in "An efficient representation for sparse sets", Briggs, Torczon, 1993 (see - * https://dl.acm.org/doi/pdf/10.1145/176454.176484). - * - * @tparam T The type of the values that are stored in the matrix - */ -template -class SparseSetMatrix : virtual public ITwoDimensionalView { - private: - - /** - * Provides read-only access to a single row in the matrix. - */ - class ConstRow final { - private: - - const typename LilMatrix::const_row row_; - - typename CContiguousView::value_const_iterator indexIterator_; - - public: - - /** - * @param row A `LilMatrix::const_row` that provides access to the non-zero elements at the - * row - * @param indexIterator An iterator that provides access to the indices in `row` that correspond to - * individual columns - */ - ConstRow(const typename LilMatrix::const_row row, - CContiguousView::value_const_iterator indexIterator); - - /** - * An iterator that provides read-only access to the elements in the row. - */ - typedef typename LilMatrix::const_iterator const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the row. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the row. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the number of non-zero elements in the row. - * - * @return The number of non-zero elements in the row - */ - uint32 getNumElements() const; - - /** - * Returns a pointer to the element that corresponds to a specific index. - * - * @param index The index of the element to be returned - * @return A pointer to the element that corresponds to the given index or a null pointer, if no - * such element is available - */ - const IndexedValue* operator[](uint32 index) const; - }; - - /** - * Provides access to a single row in the matrix and allows to modify its elements. - */ - class Row final { - private: - - const typename LilMatrix::row row_; - - typename CContiguousView::value_iterator indexIterator_; - - public: - - /** - * @param row A `LilMatrix::row` that provides access to the the non-zero elements at the row - * @param indexIterator An iterator that provides access to the indices in `row` that correspond to - * individual columns - */ - Row(const typename LilMatrix::row row, CContiguousView::value_iterator indexIterator); - - /** - * Returns a `LilMatrix::iterator` to the beginning of the row. - * - * @return A `LilMatrix::iterator` to the beginning - */ - typename LilMatrix::iterator begin(); - - /** - * Returns a `LilMatrix::iterator` to the end of the row. - * - * @return A `LilMatrix::iterator` to the end - */ - typename LilMatrix::iterator end(); - - /** - * Returns a `LilMatrix::const_iterator` to the beginning of the row. - * - * @return A `LilMatrix::const_iterator` to the beginning - */ - typename LilMatrix::const_iterator cbegin() const; - - /** - * Returns a `LilMatrix::const_iterator` to the end of the row. - * - * @return A `LilMatrix::const_iterator` to the end - */ - typename LilMatrix::const_iterator cend() const; - - /** - * Returns the number of non-zero elements in the row. - * - * @return The number of non-zero elements in the row - */ - uint32 getNumElements() const; - - /** - * Returns a pointer to the element that corresponds to a specific index. - * - * @param index The index of the element to be returned - * @return A pointer to the element that corresponds to the given index or a null pointer, if no - * such element is available - */ - const IndexedValue* operator[](uint32 index) const; - - /** - * Returns a reference to the element that corresponds to a specific index. If no such element is - * available, it is inserted into the vector. - * - * @param index The index of the element to be returned - * @return A reference to the element that corresponds to the given index - */ - IndexedValue& emplace(uint32 index); - - /** - * Returns a reference to the element that corresponds to a specific index. If no such element is - * available, it is inserted into the vector using a specific default value. - * - * @param index The index of the element to be returned - * @param defaultValue The default value to be used - * @return A reference to the element that corresponds to the given index - */ - IndexedValue& emplace(uint32 index, const T& defaultValue); - - /** - * Removes the element that corresponds to a specific index, if available. - * - * @param index The index of the element to be removed - */ - void erase(uint32 index); - - /** - * Removes all elements from the row. - */ - void clear(); - }; - - LilMatrix lilMatrix_; - - CContiguousMatrix indexMatrix_; - - public: - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - */ - SparseSetMatrix(uint32 numRows, uint32 numCols); - - virtual ~SparseSetMatrix() override {}; - - /** - * Provides access to a row and allows to modify its elements. - */ - typedef typename SparseSetMatrix::Row row; - - /** - * Provides read-only access to a row. - */ - typedef typename SparseSetMatrix::ConstRow const_row; - - /** - * An iterator that provides access to the elements at a row and allows to modify them. - */ - typedef typename LilMatrix::iterator iterator; - - /** - * An iterator that provides read-only access to the elements at a row. - */ - typedef typename LilMatrix::const_iterator const_iterator; - - /** - * Returns an `iterator` to the beginning of a specific row. - * - * @param row The row - * @return An `iterator` to the beginning - */ - iterator begin(uint32 row); - - /** - * Returns an `iterator` to the end of a specific row. - * - * @param row The row - * @return An `iterator` to the end - */ - iterator end(uint32 row); - - /** - * Returns a `const_iterator` to the beginning of a specific row. - * - * @param row The row - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin(uint32 row) const; - - /** - * Returns a `const_iterator` to the end of a specific row. - * - * @param row The row - * @return A `const_iterator` to the end - */ - const_iterator cend(uint32 row) const; - - /** - * Provides access to a specific row and allows to modify its elements. - * - * @param row The index of the row - * @return A `row` - */ - row operator[](uint32 row); - - /** - * Provides read-only access to a specific row. - * - * @param row The index of the row - * @return A `const_row` - */ - const_row operator[](uint32 row) const; - - /** - * Sets the values of all elements to zero. - */ - void clear(); - - /** - * @see `ITwoDimensionalView::getNumRows` - */ - uint32 getNumRows() const override; - - /** - * @see `ITwoDimensionalView::getNumCols` - */ - uint32 getNumCols() const override; -}; diff --git a/cpp/subprojects/common/include/common/data/ring_buffer.hpp b/cpp/subprojects/common/include/common/data/ring_buffer.hpp deleted file mode 100644 index 6c6433ce..00000000 --- a/cpp/subprojects/common/include/common/data/ring_buffer.hpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -/** - * A ring buffer with fixed capacity. - * - * @tparam T The type of the values that are stored in the buffer - */ -template -class RingBuffer final { - private: - - T* array_; - - const uint32 capacity_; - - uint32 pos_; - - bool full_; - - public: - - /** - * @param capacity The maximum capacity of the buffer. Must be at least 1 - */ - RingBuffer(uint32 capacity); - - ~RingBuffer(); - - /** - * An iterator that provides read-only access to the elements in the buffer. - */ - typedef const T* const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the buffer. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the buffer. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the maximum capacity of the buffer. - * - * @return The maximum capacity - */ - uint32 getCapacity() const; - - /** - * Returns the number of values in the buffer. - * - * @return The number of values - */ - uint32 getNumElements() const; - - /** - * Returns whether the maximum capacity of the buffer has been reached or not. - * - * @return True, if the maximum capacity has been reached, false otherwise - */ - bool isFull() const; - - /** - * Adds a new value to the buffer. If the maximum capacity of the buffer has been reached, the oldest value will - * be overwritten. - * - * @param value The value to be added - * @return A `std::pair`, whose first value indicates whether a value has been overwritten or not. If a - * value has been overwritten, the pair's second value is set to the overwritten value, otherwise - * it is undefined - */ - std::pair push(T value); -}; diff --git a/cpp/subprojects/common/include/common/data/triple.hpp b/cpp/subprojects/common/include/common/data/triple.hpp deleted file mode 100644 index c1729ad9..00000000 --- a/cpp/subprojects/common/include/common/data/triple.hpp +++ /dev/null @@ -1,185 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -/** - * A triple that consists of three values of the same type. - * - * @tparam T The type of the values - */ -template -struct Triple final { - public: - - Triple() {} - - /** - * @param f The first value - * @param s The second value - * @param t The third value - */ - Triple(T f, T s, T t) : first(f), second(s), third(t) {} - - /** - * The first value. - */ - T first; - - /** - * The second value. - */ - T second; - - /** - * The third value. - */ - T third; - - /** - * Assigns a specific value to the first, second and third value of this triple. - * - * @param rhs A reference to the value to be assigned - * @return A reference to the modified triple - */ - Triple& operator=(const T& rhs) { - first = rhs; - second = rhs; - third = rhs; - return *this; - } - - /** - * Adds a specific value to the first, second and third value of this triple. - * - * @param rhs A reference to the value to be added - * @return A reference to the modified triple - */ - Triple& operator+=(const T& rhs) { - first += rhs; - second += rhs; - third += rhs; - return *this; - } - - /** - * Creates and returns a new triple that results from adding a specific value to the first, second and third - * value of an existing triple. - * - * @param lhs The original triple - * @param rhs A reference to the value to be added - * @return The triple that has been created - */ - friend Triple operator+(Triple lhs, const T& rhs) { - lhs += rhs; - return lhs; - } - - /** - * Adds the first, second and third value of a given triple to the first, second and third value of this triple, - * respectively, - * - * @param rhs A reference to the triple, whose first, second and third value should be added - * @return A reference to the modified triple - */ - Triple& operator+=(const Triple& rhs) { - first += rhs.first; - second += rhs.second; - third += rhs.third; - return *this; - } - - /** - * Creates and returns a new triple that results from adding the first, second and third value of a specific - * triple to the first, second and third value of an existing triple, respectively. - * - * @param lhs The original triple - * @param rhs A reference to the triple, whose first, second and third value should be added - * @return The triple that has been created - */ - friend Triple operator+(Triple lhs, const Triple& rhs) { - lhs += rhs; - return lhs; - } - - /** - * Subtracts a specific value from the first, second and third value of this triple. - * - * @param rhs A reference to the value to be subtracted - * @return A reference to the modified triple - */ - Triple& operator-=(const T& rhs) { - first -= rhs; - second -= rhs; - third -= rhs; - return *this; - } - - /** - * Creates and returns a new triple that results from subtracting a specific value from the first, second and - * third value of an existing triple, respectively. - * - * @param lhs The original triple - * @param rhs A reference to the value to be subtracted - * @return The triple that has been created - */ - friend Triple operator-(Triple lhs, const T& rhs) { - lhs -= rhs; - return lhs; - } - - /** - * Subtracts the first, second and third value of a given triple from the first, second and third value of this - * triple, respectively. - * - * @param rhs A reference to the triple, whose first, second and third value should be subtracted - * @return A reference to the modified triple - */ - Triple& operator-=(const Triple& rhs) { - first -= rhs.first; - second -= rhs.second; - third -= rhs.third; - return *this; - } - - /** - * Creates and returns a new triple that results from subtracting the first, second and third value of a - * specific triple from the first, second and third value of an existing triple, respectively. - * - * @param lhs The original triple - * @param rhs A reference to the value to be subtracted - * @return The triple that has been created - */ - friend Triple operator-(Triple lhs, const Triple& rhs) { - lhs -= rhs; - return lhs; - } - - /** - * Multiplies the first, second and third value of this triple with a specific value. - * - * @param rhs A reference to the value to be multiplied by - * @return A reference to the modified triple - */ - Triple& operator*=(const T& rhs) { - first *= rhs; - second *= rhs; - third *= rhs; - return *this; - } - - /** - * Creates and returns a new triple that results from multiplying the first, second and third value of an - * existing triple with a specific value. - * - * @param lhs The original triple - * @param rhs A reference to the value to be multiplied by - * @return The triple that has been created - */ - friend Triple operator*(Triple lhs, const T& rhs) { - lhs *= rhs; - return lhs; - } -}; diff --git a/cpp/subprojects/common/include/common/data/tuple.hpp b/cpp/subprojects/common/include/common/data/tuple.hpp deleted file mode 100644 index fb757988..00000000 --- a/cpp/subprojects/common/include/common/data/tuple.hpp +++ /dev/null @@ -1,172 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -/** - * A tuple that consists of two values of the same type. - * - * @tparam T The type of the values - */ -template -struct Tuple final { - public: - - Tuple() {} - - /** - * @param f The first value - * @param s The second value - */ - Tuple(T f, T s) : first(f), second(s) {} - - /** - * The first value. - */ - T first; - - /** - * The second value. - */ - T second; - - /** - * Assigns a specific value to the first and second value of this tuple. - * - * @param rhs A reference to the value to be assigned - * @return A reference to the modified tuple - */ - Tuple& operator=(const T& rhs) { - first = rhs; - second = rhs; - return *this; - } - - /** - * Adds a specific value to the first and second value of this tuple. - * - * @param rhs A reference to the value to be added - * @return A reference to the modified tuple - */ - Tuple& operator+=(const T& rhs) { - first += rhs; - second += rhs; - return *this; - } - - /** - * Creates and returns a new tuple that results from adding a specific value to the first and second value of an - * existing tuple. - * - * @param lhs The original tuple - * @param rhs A reference to the value to be added - * @return The tuple that has been created - */ - friend Tuple operator+(Tuple lhs, const T& rhs) { - lhs += rhs; - return lhs; - } - - /** - * Adds the first and second value of a given tuple to the first and second value of this tuple, respectively, - * - * @param rhs A reference to the tuple, whose first and second value should be added - * @return A reference to the modified tuple - */ - Tuple& operator+=(const Tuple& rhs) { - first += rhs.first; - second += rhs.second; - return *this; - } - - /** - * Creates and returns a new tuple that results from adding the first and second value of a specific tuple to - * the first and second value of an existing tuple, respectively. - * - * @param lhs The original tuple - * @param rhs A reference to the tuple, whose first and second value should be added - * @return The tuple that has been created - */ - friend Tuple operator+(Tuple lhs, const Tuple& rhs) { - lhs += rhs; - return lhs; - } - - /** - * Subtracts a specific value from the first and second value of this tuple. - * - * @param rhs A reference to the value to be subtracted - * @return A reference to the modified tuple - */ - Tuple& operator-=(const T& rhs) { - first -= rhs; - second -= rhs; - return *this; - } - - /** - * Creates and returns a new tuple that results from subtracting a specific value from the first and second - * value of an existing tuple, respectively. - * - * @param lhs The original tuple - * @param rhs A reference to the value to be subtracted - * @return The tuple that has been created - */ - friend Tuple operator-(Tuple lhs, const T& rhs) { - lhs -= rhs; - return lhs; - } - - /** - * Subtracts the first and second value of a given tuple from the first and second value of this tuple, - * respectively. - * - * @param rhs A reference to the tuple, whose first and second value should be subtracted - * @return A reference to the modified tuple - */ - Tuple& operator-=(const Tuple& rhs) { - first -= rhs.first; - second -= rhs.second; - return *this; - } - - /** - * Creates and returns a new tuple that results from subtracting the first and second value of a specific tuple - * from the first and second value of an existing tuple, respectively. - * - * @param lhs The original tuple - * @param rhs A reference to the value to be subtracted - * @return The tuple that has been created - */ - friend Tuple operator-(Tuple lhs, const Tuple& rhs) { - lhs -= rhs; - return lhs; - } - - /** - * Multiplies the first and second value of this tuple with a specific value. - * - * @param rhs A reference to the value to be multiplied by - * @return A reference to the modified tuple - */ - Tuple& operator*=(const T& rhs) { - first *= rhs; - second *= rhs; - return *this; - } - - /** - * Creates and returns a new tuple that results from multiplying the first and second value of an existing tuple - * with a specific value. - * - * @param lhs The original tuple - * @param rhs A reference to the value to be multiplied by - * @return The tuple that has been created - */ - friend Tuple operator*(Tuple lhs, const T& rhs) { - lhs *= rhs; - return lhs; - } -}; diff --git a/cpp/subprojects/common/include/common/data/types.hpp b/cpp/subprojects/common/include/common/data/types.hpp deleted file mode 100644 index bf2ea96f..00000000 --- a/cpp/subprojects/common/include/common/data/types.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include -#include - -typedef long int int64; -typedef unsigned char uint8; -typedef unsigned int uint32; -typedef float float32; -typedef double float64; - -/** - * Returns whether two floating point values `a` and `b` are (approximately) equal. - * - * @tparam T The type of the floating point values to be compared - * @param a The first floating point value - * @param b The second floating point value - * @return True if the given floating point values are equal, false otherwise - */ -template -static inline constexpr bool isEqual(T a, T b) { - return std::fabs(a - b) <= std::numeric_limits::epsilon() * std::fmax(1, std::fmax(std::fabs(a), std::fabs(b))); -} diff --git a/cpp/subprojects/common/include/common/data/vector_binned_dense.hpp b/cpp/subprojects/common/include/common/data/vector_binned_dense.hpp deleted file mode 100644 index 7f3c6206..00000000 --- a/cpp/subprojects/common/include/common/data/vector_binned_dense.hpp +++ /dev/null @@ -1,264 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_dense.hpp" - -#include - -/** - * An one-dimensional vector that provides random access to a fixed number of elements, corresponding to bins, stored in - * a C-contiguous array. - * - * @tparam T The type of the data that is stored in the vector - */ -template -class DenseBinnedVector : virtual public IOneDimensionalView { - private: - - DenseVector binIndices_; - - DenseVector values_; - - public: - - /** - * An iterator that provides read-only access to the values of all elements in a `DenseBinnedVector`. - */ - class ValueConstIterator final { - private: - - DenseVector::const_iterator binIndexIterator_; - - typename DenseVector::const_iterator valueIterator_; - - public: - - /** - * @param binIndexIterator An iterator to the bin indices of individual elements - * @param valueIterator An iterator to the values of individual bins - */ - ValueConstIterator(DenseVector::const_iterator binIndexIterator, - typename DenseVector::const_iterator valueIterator); - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef const T value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef const T* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef const T& reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::random_access_iterator_tag iterator_category; - - /** - * Returns the element at a specific index. - * - * @param index The index of the element to be returned - * @return The element at the given index - */ - reference operator[](uint32 index) const; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const; - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator to the next element - */ - ValueConstIterator& operator++(); - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator to the next element - */ - ValueConstIterator& operator++(int n); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator to the previous element - */ - ValueConstIterator& operator--(); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator to the previous element - */ - ValueConstIterator& operator--(int n); - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const ValueConstIterator& rhs) const; - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const ValueConstIterator& rhs) const; - - /** - * Returns the difference between this iterator and another one. - * - * @param rhs A reference to another iterator - * @return The difference between the iterators - */ - difference_type operator-(const ValueConstIterator& rhs) const; - }; - - /** - * @param numElements The number of elements in the vector - * @param numBins The number of bins - */ - DenseBinnedVector(uint32 numElements, uint32 numBins); - - virtual ~DenseBinnedVector() override {}; - - /** - * An iterator that provides access to the indices that correspond to individual bins and allows to modify them. - */ - typedef typename DenseVector::iterator index_iterator; - - /** - * An iterator that provides read-only access to the indices that correspond to individual bins. - */ - typedef typename DenseVector::const_iterator index_const_iterator; - - /** - * An iterator that provides access to the elements that correspond to individual bins and allows to modify - * them. - */ - typedef typename DenseVector::iterator value_iterator; - - /** - * An iterator that provides read-only access to the elements that correspond to individual bins. - */ - typedef typename DenseVector::const_iterator value_const_iterator; - - /** - * An iterator that provides read-only access to the elements in the vector. - */ - typedef ValueConstIterator const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the vector. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the vector. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns an `index_iterator` to the beginning of the bin indices individual elements in the vector correspond - * to. - * - * @return An `index_iterator` to the beginning - */ - index_iterator indices_begin(); - - /** - * Returns an `index_iterator` to the end of the bin indices individual elements in the vector correspond to. - * - * @return An `index_iterator` to the end - */ - index_iterator indices_end(); - - /** - * Returns an `index_const_iterator` to the beginning of the bin indices individual elements in the vector - * correspond to. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the bin indices individual elements in the vector correspond - * to. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator indices_cend() const; - - /** - * Returns a `value_iterator` to the beginning of the values that correspond to individual bins. - * - * @return A `value_iterator` to the beginning - */ - value_iterator values_begin(); - - /** - * Returns a `value_iterator` to the end of the values that correspond to individual bins. - * - * @return A `value_iterator` to the end - */ - value_iterator values_end(); - - /** - * Returns a `value_const_iterator` to the beginning of the values that correspond to individual bins. - * - * @return A `value_const_iterator` to the beginning - */ - value_const_iterator values_cbegin() const; - - /** - * Returns a `value_const_iterator` to the end of the values that correspond to individual bins. - * - * @return A `value_const_iterator` to the end - */ - value_const_iterator values_cend() const; - - /** - * Returns the number of bins. - * - * @return The number of bins - */ - uint32 getNumBins() const; - - /** - * Sets the number of bins. - * - * @param numBins The number of bins to be set - * @param freeMemory True, if unused memory should be freed, if possible, false otherwise - */ - void setNumBins(uint32 numBins, bool freeMemory); - - /** - * @see `IOneDimensionalView::getNumElements` - */ - uint32 getNumElements() const override; -}; diff --git a/cpp/subprojects/common/include/common/data/vector_bit.hpp b/cpp/subprojects/common/include/common/data/vector_bit.hpp deleted file mode 100644 index a772034a..00000000 --- a/cpp/subprojects/common/include/common/data/vector_bit.hpp +++ /dev/null @@ -1,60 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -/** - * An one-dimension vector that stores binary data in a space-efficient way. - */ -class BitVector final { - private: - - const uint32 numElements_; - - uint32* array_; - - public: - - /** - * @param numElements The number of elements in the vector - */ - BitVector(uint32 numElements); - - /** - * @param numElements The number of elements in the vector - * @param init True, if all elements in the vector should be value-initialized, false otherwise - */ - BitVector(uint32 numElements, bool init); - - ~BitVector(); - - /** - * Returns the value of the element at a specific position. - * - * @param pos The position of the element - * @return The value of the specified element - */ - bool operator[](uint32 pos) const; - - /** - * Sets a value to the element at a specific position. - * - * @param pos The position of the element - * @param value The value to be set - */ - void set(uint32 pos, bool value); - - /** - * Returns the number of elements in the vector. - * - * @return The number of elements in the vector - */ - uint32 getNumElements() const; - - /** - * Sets the values of all elements to zero. - */ - void clear(); -}; diff --git a/cpp/subprojects/common/include/common/data/vector_dense.hpp b/cpp/subprojects/common/include/common/data/vector_dense.hpp deleted file mode 100644 index 18645348..00000000 --- a/cpp/subprojects/common/include/common/data/vector_dense.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_vector.hpp" - -/** - * An one-dimensional vector that provides random access to a fixed number of elements stored in a C-contiguous array. - * - * @tparam T The type of the data that is stored in the vector - */ -template -class MLRLCOMMON_API DenseVector : public VectorView { - private: - - uint32 maxCapacity_; - - public: - - /** - * @param numElements The number of elements in the vector - */ - DenseVector(uint32 numElements); - - /** - * @param numElements The number of elements in the vector - * @param init True, if all elements in the vector should be value-initialized, false otherwise - */ - DenseVector(uint32 numElements, bool init); - - virtual ~DenseVector() override; - - /** - * Sets the number of elements in the vector. - * - * @param numElements The number of elements to be set - * @param freeMemory True, if unused memory should be freed, if possible, false otherwise - */ - void setNumElements(uint32 numElements, bool freeMemory); -}; diff --git a/cpp/subprojects/common/include/common/data/vector_dok.hpp b/cpp/subprojects/common/include/common/data/vector_dok.hpp deleted file mode 100644 index 0f810e3e..00000000 --- a/cpp/subprojects/common/include/common/data/vector_dok.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -/** - * An one-dimensional sparse vector that stores data using the dictionary of keys (DOK) format. - * - * @tparam T The type of the data that is stored in the vector - */ -template -class DokVector final { - private: - - std::unordered_map data_; - - const T sparseValue_; - - public: - - /** - * @param sparseValue The value of sparse elements - */ - DokVector(T sparseValue); - - /** - * An iterator that provides access to the elements in the vector and allows to modify them. - */ - typedef typename std::unordered_map::iterator iterator; - - /** - * An iterator that provides read-only access to the elements in the vector. - */ - typedef typename std::unordered_map::const_iterator const_iterator; - - /** - * Returns an `iterator` to the beginning of the vector. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the vector. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns a `const_iterator` to the beginning of the vector. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the vector. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the value of the element at a specific position. - * - * @param pos The position of the element - * @return The value of the specified element - */ - const T& operator[](uint32 pos) const; - - /** - * Sets a value to the element at a specific position. - * - * @param pos The position of the element - * @param value The value to be set - */ - void set(uint32 pos, T value); - - /** - * Sets the values of all elements to zero. - */ - void clear(); -}; diff --git a/cpp/subprojects/common/include/common/data/vector_dok_binary.hpp b/cpp/subprojects/common/include/common/data/vector_dok_binary.hpp deleted file mode 100644 index 597bacb6..00000000 --- a/cpp/subprojects/common/include/common/data/vector_dok_binary.hpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -/** - * An one-dimensional sparse vector that stores binary data using the dictionary of keys (DOK) format. - */ -class BinaryDokVector final { - private: - - std::unordered_set data_; - - public: - - /** - * An iterator that provides read-only access to the elements in the vector. - */ - typedef std::unordered_set::const_iterator index_const_iterator; - - /** - * Returns an `index_const_iterator` to the beginning of the indices. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the indices. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator indices_cend() const; - - /** - * Returns the value of the element at a specific position. - * - * @param pos The position of the element - * @return The value of the specified element - */ - bool operator[](uint32 pos) const; - - /** - * Sets a value to the element at a specific position. - * - * @param pos The position of the element - * @param value The value to be set - */ - void set(uint32 pos, bool value); - - /** - * Sets the values of all elements to zero. - */ - void clear(); -}; diff --git a/cpp/subprojects/common/include/common/data/vector_sparse_array.hpp b/cpp/subprojects/common/include/common/data/vector_sparse_array.hpp deleted file mode 100644 index 48277b06..00000000 --- a/cpp/subprojects/common/include/common/data/vector_sparse_array.hpp +++ /dev/null @@ -1,532 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/indexed_value.hpp" -#include "common/data/vector_dense.hpp" - -#include - -/** - * An one-dimensional sparse vector that stores a fixed number of elements, consisting of an index and a value, in a - * C-contiguous array. - * - * @tparam T The type of the data that is stored in the vector - */ -template -class SparseArrayVector final : public DenseVector> { - private: - - /** - * An iterator that provides random read-only access to the indices in a `SparseArrayVector`. - */ - class IndexConstIterator final { - private: - - typename VectorConstView>::const_iterator iterator_; - - public: - - /** - * @param iterator An iterator that provides access to the elements in the `SparseArrayVector` - */ - IndexConstIterator(typename VectorConstView>::const_iterator iterator); - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef uint32 value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef const uint32* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef const uint32& reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::random_access_iterator_tag iterator_category; - - /** - * Returns the element at a specific index. - * - * @param index The index of the element to be returned - * @return The element at the given index - */ - reference operator[](uint32 index) const; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const; - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - IndexConstIterator& operator++(); - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - IndexConstIterator& operator++(int n); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - IndexConstIterator& operator--(); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - IndexConstIterator& operator--(int n); - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const IndexConstIterator& rhs) const; - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const IndexConstIterator& rhs) const; - - /** - * Returns the difference between this iterator and another one. - * - * @param rhs A reference to another iterator - * @return The difference between the iterators - */ - difference_type operator-(const IndexConstIterator& rhs) const; - }; - - /** - * An iterator that provides random access to the indices in a `SparseArrayVector` and allows to modify them. - */ - class IndexIterator final { - private: - - typename VectorView>::iterator iterator_; - - public: - - /** - * @param iterator An iterator that provides access to the elements in the `SparseArrayVector` - */ - IndexIterator(typename VectorView>::iterator iterator); - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef uint32 value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef uint32* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef uint32& reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::random_access_iterator_tag iterator_category; - - /** - * Returns the element at a specific index. - * @param index The index of the element to be returned - * @return The element at the given index - */ - reference operator[](uint32 index) const; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const; - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - IndexIterator& operator++(); - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - IndexIterator& operator++(int n); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - IndexIterator& operator--(); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - IndexIterator& operator--(int n); - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const IndexIterator& rhs) const; - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const IndexIterator& rhs) const; - - /** - * Returns the difference between this iterator and another one. - * - * @param rhs A reference to another iterator - * @return The difference between the iterators - */ - difference_type operator-(const IndexIterator& rhs) const; - }; - - /** - * An iterator that provides random access to the values in a `SparseArrayVector` and allows to modify them. - */ - class ValueConstIterator final { - private: - - typename VectorConstView>::const_iterator iterator_; - - public: - - /** - * @param iterator An iterator that provides access to the elements in the `SparseArrayVector` - */ - ValueConstIterator(typename VectorConstView>::const_iterator iterator); - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef const T value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef const T* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef const T& reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::random_access_iterator_tag iterator_category; - - /** - * Returns the element at a specific index. - * @param index The index of the element to be returned - * @return The element at the given index - */ - reference operator[](uint32 index) const; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const; - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - ValueConstIterator& operator++(); - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - ValueConstIterator& operator++(int n); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - ValueConstIterator& operator--(); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - ValueConstIterator& operator--(int n); - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const ValueConstIterator& rhs) const; - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const ValueConstIterator& rhs) const; - - /** - * Returns the difference between this iterator and another one. - * - * @param rhs A reference to another iterator - * @return The difference between the iterators - */ - difference_type operator-(const ValueConstIterator& rhs) const; - }; - - /** - * An iterator that provides random access to the values in a `SparseArrayVector` and allows to modify them. - */ - class ValueIterator final { - private: - - typename VectorView>::iterator iterator_; - - public: - - /** - * @param iterator An iterator that provides access to the elements in the `SparseArrayVector` - */ - ValueIterator(typename VectorView>::iterator iterator); - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef T value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef T* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef T& reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::random_access_iterator_tag iterator_category; - - /** - * Returns the element at a specific index. - * @param index The index of the element to be returned - * @return The element at the given index - */ - reference operator[](uint32 index) const; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const; - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - ValueIterator& operator++(); - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - ValueIterator& operator++(int n); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - ValueIterator& operator--(); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - ValueIterator& operator--(int n); - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const ValueIterator& rhs) const; - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const ValueIterator& rhs) const; - - /** - * Returns the difference between this iterator and another one. - * - * @param rhs A reference to another iterator - * @return The difference between the iterators - */ - difference_type operator-(const ValueIterator& rhs) const; - }; - - public: - - /** - * @param numElements The number of elements in the vector - */ - SparseArrayVector(uint32 numElements); - - /** - * An iterator that provides access to the indices in the vector and allows to modify them. - */ - typedef IndexIterator index_iterator; - - /** - * An iterator that provides read-only access to the indices in the vector. - */ - typedef IndexConstIterator index_const_iterator; - - /** - * An iterator that provides access to the values in the vector and allows to modify them. - */ - typedef ValueIterator value_iterator; - - /** - * An iterator that provides read-only access to the values in the vector. - */ - typedef ValueConstIterator value_const_iterator; - - /** - * Returns an `index_iterator` to the beginning of the indices in the vector. - * - * @return An `index_iterator` to the beginning - */ - index_iterator indices_begin(); - - /** - * Returns an `index_iterator` to the end of the indices in the vector. - * - * @return An `index_iterator` to the end - */ - index_iterator indices_end(); - - /** - * Returns an `index_const_iterator` to the beginning of the indices in the vector. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the indices in the vector. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator indices_cend() const; - - /** - * Returns a `value_iterator` to the beginning of the values in the vector. - * - * @return A `value_iterator` to the beginning - */ - value_iterator values_begin(); - - /** - * Returns a `value_iterator` to the end of the values in the vector. - * - * @return A `value_iterator` to the end - */ - value_iterator values_end(); - - /** - * Returns a `value_const_iterator` to the beginning of the values in the vector. - * - * @return A `value_const_iterator` to the beginning - */ - value_const_iterator values_cbegin() const; - - /** - * Returns a `value_const_iterator` to the end of the values in the vector. - * - * @return A `value_const_iterator` to the end - */ - value_const_iterator values_cend() const; -}; diff --git a/cpp/subprojects/common/include/common/data/vector_sparse_array_binary.hpp b/cpp/subprojects/common/include/common/data/vector_sparse_array_binary.hpp deleted file mode 100644 index 495a2c9f..00000000 --- a/cpp/subprojects/common/include/common/data/vector_sparse_array_binary.hpp +++ /dev/null @@ -1,11 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_dense.hpp" - -/** - * An one-dimensional sparse vector that stores a fixed number of indices in a C-contiguous array. - */ -typedef DenseVector BinarySparseArrayVector; diff --git a/cpp/subprojects/common/include/common/data/view_c_contiguous.hpp b/cpp/subprojects/common/include/common/data/view_c_contiguous.hpp deleted file mode 100644 index 27981dba..00000000 --- a/cpp/subprojects/common/include/common/data/view_c_contiguous.hpp +++ /dev/null @@ -1,115 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_two_dimensional.hpp" - -/** - * Implements row-wise read-only access to the values that are stored in a pre-allocated C-contiguous array. - * - * @tparam T The type of the values - */ -template -class MLRLCOMMON_API CContiguousConstView : virtual public ITwoDimensionalView { - protected: - - /** - * The number of rows in the view. - */ - const uint32 numRows_; - - /** - * The number of columns in the view. - */ - const uint32 numCols_; - - /** - * A pointer to the array that stores the values, the view provides access to. - */ - T* array_; - - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param array A pointer to a C-contiguous array of template type `T` that stores the values, the view - * provides access to - */ - CContiguousConstView(uint32 numRows, uint32 numCols, T* array); - - virtual ~CContiguousConstView() override {}; - - /** - * An iterator that provides read-only access to the elements in the view. - */ - typedef const T* value_const_iterator; - - /** - * Returns a `value_const_iterator` to the beginning of a specific row. - * - * @param row The row - * @return A `value_const_iterator` to the beginning of the given row - */ - value_const_iterator values_cbegin(uint32 row) const; - - /** - * Returns a `value_const_iterator` to the end of a specific row. - * - * @param row The row - * @return A `value_const_iterator` to the end of the given row - */ - value_const_iterator values_cend(uint32 row) const; - - /** - * @see `ITwoDimensionalView::getNumRows` - */ - uint32 getNumRows() const override final; - - /** - * @see `ITwoDimensionalView::getNumCols` - */ - uint32 getNumCols() const override final; -}; - -/** - * Implements row-wise read and write access to the values that are stored in a pre-allocated C-contiguous array. - * - * @tparam T The type of the values - */ -template -class MLRLCOMMON_API CContiguousView : public CContiguousConstView { - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param array A pointer to a C-contiguous array of template type `T` that stores the values, the view - * provides access to - */ - CContiguousView(uint32 numRows, uint32 numCols, T* array); - - virtual ~CContiguousView() override {}; - - /** - * An iterator that provides access to the elements in the view and allows to modify them. - */ - typedef T* value_iterator; - - /** - * Returns a `value_iterator` to the beginning of a specific row. - * - * @param row The row - * @return A `value_iterator` to the beginning of the given row - */ - value_iterator values_begin(uint32 row); - - /** - * Returns a `value_iterator` to the end of a specific row. - * - * @param row The row - * @return A `value_iterator` to the end of the given row - */ - value_iterator values_end(uint32 row); -}; diff --git a/cpp/subprojects/common/include/common/data/view_csc.hpp b/cpp/subprojects/common/include/common/data/view_csc.hpp deleted file mode 100644 index 15257c83..00000000 --- a/cpp/subprojects/common/include/common/data/view_csc.hpp +++ /dev/null @@ -1,187 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_two_dimensional.hpp" - -/** - * Implements column-wise read-only access to the values that are stored in a pre-allocated matrix in the compressed - * sparse column (CSC) format. - * - * @tparam T The type of the values - */ -template -class CscConstView : virtual public ITwoDimensionalView { - protected: - - /** - * The number of rows in the view. - */ - const uint32 numRows_; - - /** - * The number of columns in the view. - */ - const uint32 numCols_; - - /** - * A pointer to an array that stores all non-zero values. - */ - T* data_; - - /** - * A pointer to an array that stores the row-indices, the values in `data_` correspond to. - */ - uint32* rowIndices_; - - /** - * A pointer to an array that stores the indices of the first element in `data_` and `rowIndices_` that - * corresponds to a certain column. - */ - uint32* colIndices_; - - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param data A pointer to an array of template type `T`, shape `(num_non_zero_values)`, that stores - * all non-zero values - * @param rowIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * row-indices, the values in `data` correspond to - * @param colIndices A pointer to an array of type `uint32`, shape `(numCols + 1)`, that stores the indices - * of the first element in `data` and `rowIndices` that corresponds to a certain column. - * The index at the last position is equal to `num_non_zero_values` - */ - CscConstView(uint32 numRows, uint32 numCols, T* data, uint32* rowIndices, uint32* colIndices); - - virtual ~CscConstView() override {}; - - /** - * An iterator that provides read-only access to the values in the view. - */ - typedef const T* value_const_iterator; - - /** - * An iterator that provides read-only access to the indices in the view. - */ - typedef const uint32* index_const_iterator; - - /** - * Returns a `value_const_iterator` to the beginning of the values at a specific column. - * - * @param col The column - * @return A `value_const_iterator` to the beginning of the values - */ - value_const_iterator values_cbegin(uint32 col) const; - - /** - * Returns a `value_const_iterator` to the end of the values at a specific column. - * - * @param col The column - * @return A `value_const_iterator` to the end of the values - */ - value_const_iterator values_cend(uint32 col) const; - - /** - * Returns an `index_const_iterator` to the beginning of the indices at a specific column. - * - * @param col The column - * @return An `index_const_iterator` to the beginning of the indices - */ - index_const_iterator indices_cbegin(uint32 col) const; - - /** - * Returns an `index_const_iterator` to the end of the indices at a specific column. - * - * @param col The column - * @return An `index_const_iterator` to the end of the indices - */ - index_const_iterator indices_cend(uint32 col) const; - - /** - * Returns the number of non-zero elements in the view. - * - * @return The number of non-zero elements - */ - uint32 getNumNonZeroElements() const; - - /** - * @see `ITwoDimensionalView::getNumRows` - */ - uint32 getNumRows() const override final; - - /** - * @see `ITwoDimensionalView::getNumCols` - */ - uint32 getNumCols() const override final; -}; - -/** - * Implements column-wise read and write access to the values that are stored in a pre-allocated matrix in the - * compressed sparse column (CSC) format. - * - * @tparam T The type of the values - */ -template -class CscView : public CscConstView { - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param data A pointer to an array of template type `T`, shape `(num_non_zero_values)`, that stores - * all non-zero values - * @param rowIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * row-indices, the values in `data` correspond to - * @param colIndices A pointer to an array of type `uint32`, shape `(numCols + 1)`, that stores the indices - * of the first element in `data` and `rowIndices` that corresponds to a certain column. - * The index at the last position is equal to `num_non_zero_values` - */ - CscView(uint32 numRows, uint32 numCols, T* data, uint32* rowIndices, uint32* colIndices); - - virtual ~CscView() override {}; - - /** - * An iterator that provides access to the values in the view and allows to modify them. - */ - typedef T* value_iterator; - - /** - * An iterator that provides access to the indices in the view and allows to modify them. - */ - typedef uint32* index_iterator; - - /** - * Returns a `value_iterator` to the beginning of the values at a specific column. - * - * @param col The column - * @return A `value_iterator` to the beginning of the values - */ - value_iterator values_begin(uint32 col); - - /** - * Returns a `value_iterator` to the end of the values at a specific column. - * - * @param col The column - * @return A `value_iterator` to the end of the values - */ - value_iterator values_end(uint32 col); - - /** - * Returns an `index_iterator` to the beginning of the indices at a specific column. - * - * @param col The column - * @return An `index_iterator` to the beginning of the indices - */ - index_iterator indices_begin(uint32 col); - - /** - * Returns an `index_iterator` to the end of the indices at a specific column. - * - * @param col The column - * @return An `index_iterator` to the end of the indices - */ - index_iterator indices_end(uint32 col); -}; diff --git a/cpp/subprojects/common/include/common/data/view_csc_binary.hpp b/cpp/subprojects/common/include/common/data/view_csc_binary.hpp deleted file mode 100644 index 09b25b92..00000000 --- a/cpp/subprojects/common/include/common/data/view_csc_binary.hpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_two_dimensional.hpp" - -/** - * Implements column-wise read-only access to binary values that are stored in a pre-allocated matrix in the compressed - * sparse column (CSC) format. - */ -class BinaryCscConstView : virtual public ITwoDimensionalView { - protected: - - /** - * The number of rows in the view. - */ - const uint32 numRows_; - - /** - * The number of columns in the view. - */ - const uint32 numCols_; - - /** - * A pointer to an array that stores the row-indices, the non-zero elements correspond to. - */ - uint32* rowIndices_; - - /** - * A pointer to an array that stores the indices of the first element in `rowIndices_` that corresponds to a - * certain column. - */ - uint32* colIndices_; - - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param rowIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * row-indices, the non-zero elements correspond to - * @param colIndices A pointer to an array of type `uint32`, shape `(numCols + 1)`, that stores the indices - * of the first element in `rowIndices` that corresponds to a certain column. The index at - * the last position is equal to `num_non_zero_values` - */ - BinaryCscConstView(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices); - - virtual ~BinaryCscConstView() override {}; - - /** - * An iterator that provides read-only access to the indices in the view. - */ - typedef const uint32* index_const_iterator; - - /** - * Returns an `index_const_iterator` to the beginning of the indices at a specific column. - * - * @param col The column - * @return An `index_const_iterator` to the beginning of the indices - */ - index_const_iterator indices_cbegin(uint32 col) const; - - /** - * Returns an `index_const_iterator` to the end of the indices at a specific column. - * - * @param col The column - * @return An `index_const_iterator` to the end of the indices - */ - index_const_iterator indices_cend(uint32 col) const; - - /** - * Returns the number of non-zero elements in the view. - * - * @return The number of non-zero elements - */ - uint32 getNumNonZeroElements() const; - - uint32 getNumRows() const override final; - - uint32 getNumCols() const override final; -}; - -/** - * Implements column-wise read and write access to binary values that are stored in a pre-allocated matrix in the - * compressed sparse column (CSC) format. - */ -class BinaryCscView : public BinaryCscConstView { - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param rowIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * row-indices, the non-zero elements correspond to - * @param colIndices A pointer to an array of type `uint32`, shape `(numCols + 1)`, that stores the indices - * of the first element in `rowIndices` that corresponds to a certain column. The index at - * the last position is equal to `num_non_zero_values` - */ - BinaryCscView(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices); - - virtual ~BinaryCscView() override {}; - - /** - * An iterator that provides access to the indices in the view and allows to modify them. - */ - typedef uint32* index_iterator; - - /** - * Returns an `index_iterator` to the beginning of the indices at a specific column. - * - * @param col The column - * @return An `index_iterator` to the beginning of the indices - */ - index_iterator indices_begin(uint32 col); - - /** - * Returns an `index_iterator` to the end of the indices at a specific column. - * - * @param col The column - * @return An `index_iterator` to the end of the indices - */ - index_iterator indices_end(uint32 col); -}; diff --git a/cpp/subprojects/common/include/common/data/view_csr.hpp b/cpp/subprojects/common/include/common/data/view_csr.hpp deleted file mode 100644 index 1dbf3941..00000000 --- a/cpp/subprojects/common/include/common/data/view_csr.hpp +++ /dev/null @@ -1,187 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_two_dimensional.hpp" - -/** - * Implements row-wise read-only access to the values that are stored in a pre-allocated matrix in the compressed sparse - * row (CSR) format. - * - * @tparam T The type of the values - */ -template -class CsrConstView : virtual public ITwoDimensionalView { - protected: - - /** - * The number of rows in the view. - */ - const uint32 numRows_; - - /** - * The number of columns in the view. - */ - const uint32 numCols_; - - /** - * A pointer to an array that stores all non-zero values. - */ - T* data_; - - /** - * A pointer to an array that stores the indices of the first element in `data_` and `colIndices_` that - * corresponds to a certain row. - */ - uint32* rowIndices_; - - /** - * A pointer to an array that stores the column-indices, the values in `data_` correspond to. - */ - uint32* colIndices_; - - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param data A pointer to an array of template type `T`, shape `(num_non_zero_values)`, that stores - * all non-zero values - * @param rowIndices A pointer to an array of type `uint32`, shape `(numRows + 1)`, that stores the indices - * of the first element in `data` and `colIndices` that corresponds to a certain row. The - * index at the last position is equal to `num_non_zero_values` - * @param colIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * column-indices, the values in `data` correspond to - */ - CsrConstView(uint32 numRows, uint32 numCols, T* data, uint32* rowIndices, uint32* colIndices); - - virtual ~CsrConstView() override {}; - - /** - * An iterator that provides read-only access to the values in the view. - */ - typedef const T* value_const_iterator; - - /** - * An iterator that provides read-only access to the indices in the view. - */ - typedef const uint32* index_const_iterator; - - /** - * Returns a `value_const_iterator` to the beginning of the values at a specific row. - * - * @param row The row - * @return A `value_const_iterator` to the beginning of the values - */ - value_const_iterator values_cbegin(uint32 row) const; - - /** - * Returns a `value_const_iterator` to the end of the values at a specific row. - * - * @param row The row - * @return A `value_const_iterator` to the end of the values - */ - value_const_iterator values_cend(uint32 row) const; - - /** - * Returns an `index_const_iterator` to the beginning of the indices at a specific row. - * - * @param row The row - * @return An `index_const_iterator` to the beginning of the indices - */ - index_const_iterator indices_cbegin(uint32 row) const; - - /** - * Returns an `index_const_iterator` to the end of the indices at a specific row. - * - * @param row The row - * @return An `index_const_iterator` to the end of the indices - */ - index_const_iterator indices_cend(uint32 row) const; - - /** - * Returns the number of non-zero elements in the view. - * - * @return The number of non-zero elements - */ - uint32 getNumNonZeroElements() const; - - /** - * @see `ITwoDimensionalView::getNumRows` - */ - uint32 getNumRows() const override final; - - /** - * @see `ITwoDimensionalView::getNumCols` - */ - uint32 getNumCols() const override final; -}; - -/** - * Implements row-wise read and write access to the values that are stored in a pre-allocated matrix in the compressed - * sparse row (CSR) format. - * - * @tparam T The type of the values - */ -template -class CsrView : public CsrConstView { - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param data A pointer to an array of template type `T`, shape `(num_non_zero_values)`, that stores - * all non-zero values - * @param rowIndices A pointer to an array of type `uint32`, shape `(numRows + 1)`, that stores the indices - * of the first element in `data` and `colIndices` that corresponds to a certain row. The - * index at the last position is equal to `num_non_zero_values` - * @param colIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * column-indices, the values in `data` correspond to - */ - CsrView(uint32 numRows, uint32 numCols, T* data, uint32* rowIndices, uint32* colIndices); - - virtual ~CsrView() override {}; - - /** - * An iterator that provides access to the values in the view and allows to modify them. - */ - typedef T* value_iterator; - - /** - * An iterator that provides access to the indices in the view and allows to modify them. - */ - typedef uint32* index_iterator; - - /** - * Returns a `value_iterator` to the beginning of the values at a specific row. - * - * @param row The row - * @return A `value_iterator` to the beginning of the values - */ - value_iterator values_begin(uint32 row); - - /** - * Returns a `value_iterator` to the end of the values at a specific row. - * - * @param row The row - * @return A `value_iterator` to the end of the values - */ - value_iterator values_end(uint32 row); - - /** - * Returns an `index_iterator` to the beginning of the indices at a specific row. - * - * @param row The row - * @return An `index_iterator` to the beginning of the indices - */ - index_iterator indices_begin(uint32 row); - - /** - * Returns an `index_iterator` to the end of the indices at a specific row. - * - * @param row The row - * @return An `index_iterator` to the end of the indices - */ - index_iterator indices_end(uint32 row); -}; diff --git a/cpp/subprojects/common/include/common/data/view_csr_binary.hpp b/cpp/subprojects/common/include/common/data/view_csr_binary.hpp deleted file mode 100644 index fa6e85d0..00000000 --- a/cpp/subprojects/common/include/common/data/view_csr_binary.hpp +++ /dev/null @@ -1,124 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_two_dimensional.hpp" - -/** - * Implements row-wise read-only access to binary values that are stored in a pre-allocated matrix in the compressed - * sparse row (CSR) format. - */ -class MLRLCOMMON_API BinaryCsrConstView : virtual public ITwoDimensionalView { - protected: - - /** - * The number of rows in the view. - */ - const uint32 numRows_; - - /** - * The number of columns in the view. - */ - const uint32 numCols_; - - /** - * A pointer to an array that stores the indices of the first element in `colIndices_` that corresponds to a - * certain row. - */ - uint32* rowIndices_; - - /** - * A pointer to an array that stores the column-indices, the non-zero elements correspond to. - */ - uint32* colIndices_; - - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param rowIndices A pointer to an array of type `uint32`, shape `(numRows + 1)`, that stores the indices - * of the first element in `colIndices` that corresponds to a certain row. The index at the - * last position is equal to `num_non_zero_values` - * @param colIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * column-indices, the non-zero elements correspond to - */ - BinaryCsrConstView(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices); - - virtual ~BinaryCsrConstView() override {}; - - /** - * An iterator that provides read-only access to the indices in the view. - */ - typedef const uint32* index_const_iterator; - - /** - * Returns an `index_const_iterator` to the beginning of the indices at a specific row. - * - * @param row The row - * @return An `index_const_iterator` to the beginning of the indices - */ - index_const_iterator indices_cbegin(uint32 row) const; - - /** - * Returns an `index_const_iterator` to the end of the indices at a specific row. - * - * @param row The row - * @return An `index_const_iterator` to the end of the indices - */ - index_const_iterator indices_cend(uint32 row) const; - - /** - * Returns the number of non-zero elements in the view. - * - * @return The number of non-zero elements - */ - uint32 getNumNonZeroElements() const; - - uint32 getNumRows() const override final; - - uint32 getNumCols() const override final; -}; - -/** - * Implements row-wise read and write access to binary values that are stored in a pre-allocated matrix in the - * compressed sparse row (CSR) format. - */ -class BinaryCsrView : public BinaryCsrConstView { - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param rowIndices A pointer to an array of type `uint32`, shape `(numRows + 1)`, that stores the indices - * of the first element in `colIndices` that corresponds to a certain row. The index at the - * last position is equal to `num_non_zero_values` - * @param colIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * column-indices, the non-zero elements correspond to - */ - BinaryCsrView(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices); - - virtual ~BinaryCsrView() override {}; - - /** - * An iterator that provides access to the indices of the view and allows to modify them. - */ - typedef uint32* index_iterator; - - /** - * Returns an `index_iterator` to the beginning of the indices at a specific row. - * - * @param row The row - * @return An `index_iterator` to the beginning of the indices - */ - index_iterator indices_begin(uint32 row); - - /** - * Returns an `index_iterator` to the end of the indices at a specific row. - * - * @param row The row - * @return An `index_iterator` to the end of the indices - */ - index_iterator indices_end(uint32 row); -}; diff --git a/cpp/subprojects/common/include/common/data/view_fortran_contiguous.hpp b/cpp/subprojects/common/include/common/data/view_fortran_contiguous.hpp deleted file mode 100644 index 5c06821b..00000000 --- a/cpp/subprojects/common/include/common/data/view_fortran_contiguous.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_two_dimensional.hpp" - -/** - * Implements column-wise read-only access to the values that are stored in a pre-allocated Fortran-contiguous array. - * - * @tparam T The type of the values - */ -template -class FortranContiguousConstView : virtual public ITwoDimensionalView { - protected: - - /** - * The number of rows in the view. - */ - const uint32 numRows_; - - /** - * The number of columns in the view. - */ - const uint32 numCols_; - - /** - * A pointer to an array that stores the values. - */ - T* array_; - - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param array A pointer to a Fortran-contiguous array of template type `T` that stores the values, the - * view provides access to - */ - FortranContiguousConstView(uint32 numRows, uint32 numCols, T* array); - - virtual ~FortranContiguousConstView() override {}; - - /** - * An iterator that provides read-only access to the values in the view. - */ - typedef const T* value_const_iterator; - - /** - * Returns a `value_const_iterator` to the beginning of a specific column. - * - * @param col The column - * @return A `value_const_iterator` to the beginning - */ - value_const_iterator values_cbegin(uint32 col) const; - - /** - * Returns a `value_const_iterator` to the end of a specific column. - * - * @param col The column - * @return A `value_const_iterator` to the end - */ - value_const_iterator values_cend(uint32 col) const; - - /** - * @see `ITwoDimensionalView::getNumRows` - */ - uint32 getNumRows() const override final; - - /** - * @see `ITwoDimensionalView::getNumCols` - */ - uint32 getNumCols() const override final; -}; - -/** - * Implements column-wise read and write access to the values that are stored in a pre-allocated Fortran-contiguous - * array. - * - * @tparam T The type of the values - */ -template -class FortranContiguousView : public FortranContiguousConstView { - public: - - /** - * @param numRows The number of rows in the view - * @param numCols The number of columns in the view - * @param array A pointer to a Fortran-contiguous array of template type `T` that stores the values, the - * view provides access to - */ - FortranContiguousView(uint32 numRows, uint32 numCols, T* array); - - virtual ~FortranContiguousView() override {}; - - /** - * An iterator that provides access to the values in the view and allows to modify them. - */ - typedef T* value_iterator; - - /** - * Returns a `value_iterator` to the beginning of a specific column. - * - * @param col The column - * @return A `value_iterator` to the beginning - */ - value_iterator values_begin(uint32 col); - - /** - * Returns a `value_iterator` to the end of a specific column. - * - * @param col The column - * @return A `value_iterator` to the end - */ - value_iterator values_end(uint32 col); -}; diff --git a/cpp/subprojects/common/include/common/data/view_one_dimensional.hpp b/cpp/subprojects/common/include/common/data/view_one_dimensional.hpp deleted file mode 100644 index 38a30078..00000000 --- a/cpp/subprojects/common/include/common/data/view_one_dimensional.hpp +++ /dev/null @@ -1,23 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" -#include "common/macros.hpp" - -/** - * Defines an interface for all one-dimensional views. - */ -class MLRLCOMMON_API IOneDimensionalView { - public: - - virtual ~IOneDimensionalView() {}; - - /** - * Returns the number of elements in the view. - * - * @return The number of elements - */ - virtual uint32 getNumElements() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/data/view_two_dimensional.hpp b/cpp/subprojects/common/include/common/data/view_two_dimensional.hpp deleted file mode 100644 index 5394d8a2..00000000 --- a/cpp/subprojects/common/include/common/data/view_two_dimensional.hpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" -#include "common/macros.hpp" - -/** - * Defines an interface for all two-dimensional views. - */ -class MLRLCOMMON_API ITwoDimensionalView { - public: - - virtual ~ITwoDimensionalView() {}; - - /** - * Returns the number of rows in the view. - * - * @return The number of rows - */ - virtual uint32 getNumRows() const = 0; - - /** - * Returns the number of columns in the view. - * - * @return The number of columns - */ - virtual uint32 getNumCols() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/data/view_vector.hpp b/cpp/subprojects/common/include/common/data/view_vector.hpp deleted file mode 100644 index 764db0d8..00000000 --- a/cpp/subprojects/common/include/common/data/view_vector.hpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_one_dimensional.hpp" - -/** - * Implements read-only access to the values that are stored in a pre-allocated C-contiguous array. - * - * @tparam T The type of the values - */ -template -class MLRLCOMMON_API VectorConstView : public IOneDimensionalView { - protected: - - /** - * The number of elements in the view. - */ - uint32 numElements_; - - /** - * A pointer to the array that stores the values, the view provides access to. - */ - T* array_; - - public: - - /** - * @param numElements The number of elements in the view - * @param array A pointer to a C-contiguous array of template type `T` that stores the values, the view - * provides access to - */ - VectorConstView(uint32 numElements, T* array); - - virtual ~VectorConstView() override {}; - - /** - * An iterator that provides read-only access to the elements in the view. - */ - typedef const T* const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the view. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the view. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns a const reference to the element at a specific position. - * - * @param pos The position of the element - * @return A const reference to the specified element - */ - const T& operator[](uint32 pos) const; - - /** - * @see `IOneDimensionalView::getNumElements` - */ - uint32 getNumElements() const override final; -}; - -/** - * Implements read and write access to the values that are stored in a pre-allocated C-contiguous array. - * - * @tparam T The type of the values - */ -template -class MLRLCOMMON_API VectorView : public VectorConstView { - public: - - /** - * @param numElements The number of elements in the view - * @param array A pointer to a C-contiguous array of template type `T` that stores the values, the view - * provides access to - */ - VectorView(uint32 numElements, T* array); - - virtual ~VectorView() override {}; - - /** - * An iterator that provides access to the elements in the view and allows to modify them. - */ - typedef T* iterator; - - /** - * Returns an `iterator` to the beginning of the view. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the view. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns a const reference to the element at a specific position. - * - * @param pos The position of the element - * @return A const reference to the specified element - */ - const T& operator[](uint32 pos) const; - - /** - * Returns a reference to the element at a specific position. - * - * @param pos The position of the element - * @return A reference to the specified element - */ - T& operator[](uint32 pos); -}; diff --git a/cpp/subprojects/common/include/common/indices/index_vector.hpp b/cpp/subprojects/common/include/common/indices/index_vector.hpp deleted file mode 100644 index 078d0389..00000000 --- a/cpp/subprojects/common/include/common/indices/index_vector.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -// Forward declarations -class IRuleRefinement; -class IThresholdsSubset; - -/** - * Defines an interface for all classes that provide random access to indices. - */ -class IIndexVector { - public: - - virtual ~IIndexVector() {}; - - /** - * Returns the number of indices. - * - * @return The number of indices - */ - virtual uint32 getNumElements() const = 0; - - /** - * Returns whether the indices are partial, i.e., some indices in the range [0, getNumElements()) are missing, - * or not. - * - * @return True, if the indices are partial, false otherwise - */ - virtual bool isPartial() const = 0; - - /** - * Returns the index at a specific position. - * - * @param pos The position of the index. Must be in [0, getNumElements()) - * @return The index at the given position - */ - virtual uint32 getIndex(uint32 pos) const = 0; - - /** - * Creates and return a new instance of type `IRuleRefinement` that allows to search for the best refinement of - * an existing rule that predicts only for the labels whose indices are stored in this vector. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to create - * the instance - * @param featureIndex The index of the feature that should be considered when searching for the refinement - * @return An unique pointer to an object of type `IRuleRefinement` that has been created - */ - virtual std::unique_ptr createRuleRefinement(IThresholdsSubset& thresholdsSubset, - uint32 featureIndex) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/indices/index_vector_complete.hpp b/cpp/subprojects/common/include/common/indices/index_vector_complete.hpp deleted file mode 100644 index b55eb2fe..00000000 --- a/cpp/subprojects/common/include/common/indices/index_vector_complete.hpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector.hpp" -#include "common/iterator/index_iterator.hpp" - -/** - * Provides random access to all indices within a continuous range [0, numIndices). - */ -class CompleteIndexVector final : public IIndexVector { - private: - - uint32 numElements_; - - public: - - /** - * @param numElements The number of indices, the vector provides access to - */ - CompleteIndexVector(uint32 numElements); - - /** - * An iterator that provides read-only access to the indices in the vector. - */ - typedef IndexIterator const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the indices. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the indices. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Sets the number of indices. - * - * @param numElements The number of indices to be set - * @param freeMemory True, if unused memory should be freed, if possible, false otherwise - */ - void setNumElements(uint32 numElements, bool freeMemory); - - uint32 getNumElements() const override; - - bool isPartial() const override; - - uint32 getIndex(uint32 pos) const override; - - std::unique_ptr createRuleRefinement(IThresholdsSubset& thresholdsSubset, - uint32 featureIndex) const override; -}; diff --git a/cpp/subprojects/common/include/common/indices/index_vector_partial.hpp b/cpp/subprojects/common/include/common/indices/index_vector_partial.hpp deleted file mode 100644 index f59ef63d..00000000 --- a/cpp/subprojects/common/include/common/indices/index_vector_partial.hpp +++ /dev/null @@ -1,84 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_dense.hpp" -#include "common/indices/index_vector.hpp" - -/** - * Provides random access to a fixed number of indices stored in a C-contiguous array. - */ -class PartialIndexVector final : public IIndexVector { - private: - - DenseVector vector_; - - public: - - /** - * @param numElements The number of elements in the vector - */ - PartialIndexVector(uint32 numElements); - - /** - * @param numElements The number of elements in the vector - * @param init True, if all elements in the vector should be value-initialized, false otherwise - */ - PartialIndexVector(uint32 numElements, bool init); - - /** - * An iterator that provides access to the indices in the vector and allows to modify them. - */ - typedef DenseVector::iterator iterator; - - /** - * An iterator that provides read-only access to the indices in the vector. - */ - typedef DenseVector::const_iterator const_iterator; - - /** - * Returns an `iterator` to the beginning of the indices. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the indices. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns a `const_iterator` to the beginning of the indices. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the indices. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Sets the number of indices. - * - * @param numElements The number of indices to be set - * @param freeMemory True, if unused memory should be freed, if possible, false otherwise - */ - void setNumElements(uint32 numElements, bool freeMemory); - - uint32 getNumElements() const override; - - bool isPartial() const override; - - uint32 getIndex(uint32 pos) const override; - - std::unique_ptr createRuleRefinement(IThresholdsSubset& thresholdsSubset, - uint32 featureIndex) const override; -}; diff --git a/cpp/subprojects/common/include/common/input/feature_info.hpp b/cpp/subprojects/common/include/common/input/feature_info.hpp deleted file mode 100644 index 7319c651..00000000 --- a/cpp/subprojects/common/include/common/input/feature_info.hpp +++ /dev/null @@ -1,27 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" -#include "common/input/feature_type.hpp" -#include "common/macros.hpp" - -#include - -/** - * Defines an interface for all classes that provide information about the types of individual features. - */ -class MLRLCOMMON_API IFeatureInfo { - public: - - virtual ~IFeatureInfo() {}; - - /** - * Creates and returns a new object of type `IFeatureType` that corresponds to the type of the feature at a - * specific index. - * - * @return An unique pointer to an object of the type `IFeatureType` that has been created - */ - virtual std::unique_ptr createFeatureType(uint32 featureIndex) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/input/feature_info_equal.hpp b/cpp/subprojects/common/include/common/input/feature_info_equal.hpp deleted file mode 100644 index 741da14e..00000000 --- a/cpp/subprojects/common/include/common/input/feature_info_equal.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_info.hpp" - -/** - * Defines an interface for all classes that provide information about the types of individual features in cases where - * all features are of the same type, i.e., where all features are either binary, nominal or numerical/ordinal. - */ -class MLRLCOMMON_API IEqualFeatureInfo : public IFeatureInfo { - public: - - virtual ~IEqualFeatureInfo() override {}; -}; - -/** - * Creates and returns a new object of type `IEqualFeatureInfo` in cases where all features are ordinal. - * - * @return An unique pointer to an object of type `IEqualFeatureInfo` that has been created - */ -MLRLCOMMON_API std::unique_ptr createOrdinalFeatureInfo(); - -/** - * Creates and returns a new object of type `IEqualFeatureInfo` in cases where all features are nominal. - * - * @return An unique pointer to an object of type `IEqualFeatureInfo` that has been created - */ -MLRLCOMMON_API std::unique_ptr createNominalFeatureInfo(); - -/** - * Creates and returns a new object of type `IEqualFeatureInfo` in cases where all features are numerical. - * - * @return An unique pointer to an object of type `IEqualFeatureInfo` that has been created - */ -MLRLCOMMON_API std::unique_ptr createNumericalFeatureInfo(); diff --git a/cpp/subprojects/common/include/common/input/feature_info_mixed.hpp b/cpp/subprojects/common/include/common/input/feature_info_mixed.hpp deleted file mode 100644 index 807cccec..00000000 --- a/cpp/subprojects/common/include/common/input/feature_info_mixed.hpp +++ /dev/null @@ -1,45 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_info.hpp" - -/** - * Defines an interface for all classes that provide information about the types of individual features in cases where - * different types of features, i.e., ordinal, nominal and numerical ones, are available. - */ -class MLRLCOMMON_API IMixedFeatureInfo : public IFeatureInfo { - public: - - virtual ~IMixedFeatureInfo() override {}; - - /** - * Marks the feature at a specific index as numerical. - * - * @param featureIndex The index of the feature - */ - virtual void setNumerical(uint32 featureIndex) = 0; - - /** - * Marks the feature at a specific index as ordinal. - * - * @param featureIndex The index of the feature - */ - virtual void setOrdinal(uint32 featureIndex) = 0; - - /** - * Marks the feature at a specific index as nominal. - * - * @param featureIndex The index of the feature - */ - virtual void setNominal(uint32 featureIndex) = 0; -}; - -/** - * Creates and returns a new object of type `IMixedFeatureInfo`. - * - * @param numFeatures The total number of available features - * @return An unique pointer to an object of type `IMixedFeatureInfo` that has been created - */ -MLRLCOMMON_API std::unique_ptr createMixedFeatureInfo(uint32 numFeatures); diff --git a/cpp/subprojects/common/include/common/input/feature_matrix.hpp b/cpp/subprojects/common/include/common/input/feature_matrix.hpp deleted file mode 100644 index 690a9b76..00000000 --- a/cpp/subprojects/common/include/common/input/feature_matrix.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_two_dimensional.hpp" - -/** - * Defines an interface for all feature matrices. - */ -class MLRLCOMMON_API IFeatureMatrix : virtual public ITwoDimensionalView { - public: - - virtual ~IFeatureMatrix() override {}; - - /** - * Returns whether the feature matrix is sparse or not. - * - * @return True, if the feature matrix is sparse, false otherwise - */ - virtual bool isSparse() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/input/feature_matrix_c_contiguous.hpp b/cpp/subprojects/common/include/common/input/feature_matrix_c_contiguous.hpp deleted file mode 100644 index 051d2e11..00000000 --- a/cpp/subprojects/common/include/common/input/feature_matrix_c_contiguous.hpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "common/data/view_c_contiguous.hpp" -#include "common/input/feature_matrix_row_wise.hpp" - -/** - * Defines an interface for all feature matrices that provide row-wise access to the feature values of examples that are - * stored in a C-contiguous array. - */ -class MLRLCOMMON_API ICContiguousFeatureMatrix : virtual public IRowWiseFeatureMatrix { - public: - - virtual ~ICContiguousFeatureMatrix() override {}; -}; - -/** - * An implementation of the type `ICContiguousFeatureMatrix` that provides row-wise read-only access to the feature - * values of examples that are stored in a C-contiguous array. - */ -class CContiguousFeatureMatrix final : public CContiguousConstView, - virtual public ICContiguousFeatureMatrix { - public: - - /** - * @param numRows The number of rows in the feature matrix - * @param numCols The number of columns in the feature matrix - * @param array A pointer to a C-contiguous array of type `float32` that stores the values, the feature - * matrix provides access to - */ - CContiguousFeatureMatrix(uint32 numRows, uint32 numCols, const float32* array); - - bool isSparse() const override; - - std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const override; - - std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; -}; - -/** - * Creates and returns a new object of the type `ICContiguousFeatureMatrix`. - * - * @param numRows The number of rows in the feature matrix - * @param numCols The number of columns in the feature matrix - * @param array A pointer to a C-contiguous array of type `float32` that stores the values, the feature matrix - * provides access to - * @return An unique pointer to an object of type `ICContiguousFeatureMatrix` that has been created - */ -MLRLCOMMON_API std::unique_ptr createCContiguousFeatureMatrix(uint32 numRows, uint32 numCols, - const float32* array); - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/common/include/common/input/feature_matrix_column_wise.hpp b/cpp/subprojects/common/include/common/input/feature_matrix_column_wise.hpp deleted file mode 100644 index 44117edf..00000000 --- a/cpp/subprojects/common/include/common/input/feature_matrix_column_wise.hpp +++ /dev/null @@ -1,29 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_matrix.hpp" -#include "common/input/feature_vector.hpp" - -#include - -/** - * Defines an interface for all feature matrices that provide column-wise access to the feature values of examples. - */ -class MLRLCOMMON_API IColumnWiseFeatureMatrix : virtual public IFeatureMatrix { - public: - - virtual ~IColumnWiseFeatureMatrix() override {}; - - /** - * Fetches a feature vector that stores the indices of the training examples, as well as their feature values, - * for a specific feature and stores it in a given unique pointer. - * - * @param featureIndex The index of the feature - * @param featureVectorPtr An unique pointer to an object of type `FeatureVector` that should be used to store - * the feature vector - */ - virtual void fetchFeatureVector(uint32 featureIndex, - std::unique_ptr& featureVectorPtr) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/input/feature_matrix_csc.hpp b/cpp/subprojects/common/include/common/input/feature_matrix_csc.hpp deleted file mode 100644 index bea55c12..00000000 --- a/cpp/subprojects/common/include/common/input/feature_matrix_csc.hpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_matrix_column_wise.hpp" - -/** - * Defines an interface for all feature matrices that provide column-wise access to the feature values of examples that - * are stored in a sparse matrix in the compressed sparse column (CSC) format. - */ -class MLRLCOMMON_API ICscFeatureMatrix : virtual public IColumnWiseFeatureMatrix { - public: - - virtual ~ICscFeatureMatrix() override {}; -}; - -/** - * Creates and returns a new object of the type `ICscFeatureMatrix`. - * - * @param numRows The number of rows in the feature matrix - * @param numCols The number of columns in the feature matrix - * @param data A pointer to an array of type `float32`, shape `(num_non_zero_values)`, that stores all - * non-zero feature values - * @param rowIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * row-indices, the values in `data` correspond to - * @param colIndices A pointer to an array of type `uint32`, shape `(numCols + 1)`, that stores the indices - * of the first element in `data` and `rowIndices` that corresponds to a certain column. - * The index at the last position is equal to `num_non_zero_values` - * @return An unique pointer to an object of type `ICscFeatureMatrix` that has been created - */ -MLRLCOMMON_API std::unique_ptr createCscFeatureMatrix(uint32 numRows, uint32 numCols, - const float32* data, uint32* rowIndices, - uint32* colIndices); diff --git a/cpp/subprojects/common/include/common/input/feature_matrix_csr.hpp b/cpp/subprojects/common/include/common/input/feature_matrix_csr.hpp deleted file mode 100644 index c152b59c..00000000 --- a/cpp/subprojects/common/include/common/input/feature_matrix_csr.hpp +++ /dev/null @@ -1,90 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "common/data/view_csr.hpp" -#include "common/input/feature_matrix_row_wise.hpp" - -/** - * Defines an interface for all feature matrices that provide row-wise access to the feature values of examples that are - * stored in a sparse matrix in the compressed sparse row (CSR) format. - */ -class MLRLCOMMON_API ICsrFeatureMatrix : virtual public IRowWiseFeatureMatrix { - public: - - virtual ~ICsrFeatureMatrix() override {}; -}; - -/** - * An implementation of the type `ICsrFeatureMatrix` that provides row-wise read-only access to the feature values of - * examples that are stored in a sparse matrix in the compressed sparse row (CSR) format. - */ -class CsrFeatureMatrix final : public CsrConstView, - virtual public ICsrFeatureMatrix { - public: - - /** - * @param numRows The number of rows in the feature matrix - * @param numCols The number of columns in the feature matrix - * @param data A pointer to an array of type `float32`, shape `(num_non_zero_values)`, that stores all - * non-zero values - * @param rowIndices A pointer to an array of type `uint32`, shape `(numRows + 1)`, that stores the indices - * of the first element in `data` and `colIndices` that corresponds to a certain row. The - * index at the last position is equal to `num_non_zero_values` - * @param colIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * column-indices, the values in `data` correspond to - */ - CsrFeatureMatrix(uint32 numRows, uint32 numCols, const float32* data, uint32* rowIndices, uint32* colIndices); - - bool isSparse() const override; - - std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const override; - - std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; -}; - -/** - * Creates and returns a new object of the type `ICsrFeatureMatrix`. - * - * @param numRows The number of rows in the feature matrix - * @param numCols The number of columns in the feature matrix - * @param data A pointer to an array of type `float32`, shape `(num_non_zero_values)`, that stores all non-zero - * values - * @param rowIndices A pointer to an array of type `uint32`, shape `(numRows + 1)`, that stores the indices of the - * first element in `data` and `colIndices` that corresponds to a certain row. The index at the - * last position is equal to `num_non_zero_values` - * @param colIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * column-indices, the values in `data` correspond to - * @return An unique pointer to an object of type `ICsrFeatureMatrix` that has been created - */ -MLRLCOMMON_API std::unique_ptr createCsrFeatureMatrix(uint32 numRows, uint32 numCols, - const float32* data, uint32* rowIndices, - uint32* colIndices); - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/common/include/common/input/feature_matrix_fortran_contiguous.hpp b/cpp/subprojects/common/include/common/input/feature_matrix_fortran_contiguous.hpp deleted file mode 100644 index 31a7212b..00000000 --- a/cpp/subprojects/common/include/common/input/feature_matrix_fortran_contiguous.hpp +++ /dev/null @@ -1,27 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_matrix_column_wise.hpp" - -/** - * Defines an interface for all feature matrices that provide column-wise access to the feature values of examples that - * are stored in a Fortran-contiguous array. - */ -class MLRLCOMMON_API IFortranContiguousFeatureMatrix : virtual public IColumnWiseFeatureMatrix { - public: - - virtual ~IFortranContiguousFeatureMatrix() override {}; -}; - -/** - * Creates and returns a new object of type `IFortranContiguousFeatureMatrix`. - * - * @param numRows The number of rows in the feature matrix - * @param numCols The number of columns in the feature matrix - * @param array A pointer to a Fortran-contiguous array of type `float32` that stores the feature values - * @return An unique pointer to an object of type `IFortranContiguousFeatureMatrix` that has been created - */ -MLRLCOMMON_API std::unique_ptr createFortranContiguousFeatureMatrix( - uint32 numRows, uint32 numCols, const float32* array); diff --git a/cpp/subprojects/common/include/common/input/feature_matrix_row_wise.hpp b/cpp/subprojects/common/include/common/input/feature_matrix_row_wise.hpp deleted file mode 100644 index a5aba9ed..00000000 --- a/cpp/subprojects/common/include/common/input/feature_matrix_row_wise.hpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_matrix.hpp" - -#include - -// Forward declarations -class IRuleModel; -class ILabelSpaceInfo; -class IMarginalProbabilityCalibrationModel; -class IJointProbabilityCalibrationModel; -class IBinaryPredictor; -class IBinaryPredictorFactory; -class ISparseBinaryPredictor; -class ISparseBinaryPredictorFactory; -class IScorePredictor; -class IScorePredictorFactory; -class IProbabilityPredictor; -class IProbabilityPredictorFactory; - -/** - * Defines an interface for all feature matrices that provide row-wise access to the feature values of examples. - */ -class MLRLCOMMON_API IRowWiseFeatureMatrix : virtual public IFeatureMatrix { - public: - - virtual ~IRowWiseFeatureMatrix() override {}; - - /** - * Creates and returns a new instance of the class `IBinaryPredictor`, based on the type of this feature matrix. - * - * @param factory A reference to an object of type `IBinaryPredictorFactory` that - * should be used to create the instance - * @param ruleModel A reference to an object of type `IRuleModel` that should be used - * to obtain predictions - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * making predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IBinaryPredictor` that has - * been created - */ - virtual std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `ISparseBinaryPredictor`, based on the type of this feature - * matrix. - * - * @param factory A reference to an object of type `ISparseBinaryPredictorFactory` - * that should be used to create the instance - * @param ruleModel A reference to an object of type `IRuleModel` that should be used - * to obtain predictions - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * making predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictor` - * that has been created - */ - virtual std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IScorePredictor`, based on the type of this feature matrix. - * - * @param factory A reference to an object of type `IScorePredictorFactory` that should be used to - * create the instance - * @param ruleModel A reference to an object of type `IRuleModel` that should be used to obtain - * predictions - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides information about the - * label space that may be used as a basis for making predictions - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IScorePredictor` that has been created - */ - virtual std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IProbabilityPredictor`, based on the type of this feature - * matrix. - * - * @param factory A reference to an object of type `IProbabilityPredictorFactory` - * that should be used to create the instance - * @param ruleModel A reference to an object of type `IRuleModel` that should be used - * to obtain predictions - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * making predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IProbabilityPredictor` - * that has been created - */ - virtual std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/input/feature_type.hpp b/cpp/subprojects/common/include/common/input/feature_type.hpp deleted file mode 100644 index e1960f75..00000000 --- a/cpp/subprojects/common/include/common/input/feature_type.hpp +++ /dev/null @@ -1,20 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -/** - * Defines an interface for all classes that represent the type of a feature. - */ -class IFeatureType { - public: - - virtual ~IFeatureType() {}; - - /** - * Returns whether the feature is nominal or not. - * - * @return True, if the feature is nominal, false otherwise - */ - virtual bool isNominal() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/input/feature_type_nominal.hpp b/cpp/subprojects/common/include/common/input/feature_type_nominal.hpp deleted file mode 100644 index 867a61b7..00000000 --- a/cpp/subprojects/common/include/common/input/feature_type_nominal.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_type.hpp" - -/** - * Represents a nominal feature. - */ -class NominalFeatureType final : public IFeatureType { - public: - - bool isNominal() const override; -}; diff --git a/cpp/subprojects/common/include/common/input/feature_type_numerical.hpp b/cpp/subprojects/common/include/common/input/feature_type_numerical.hpp deleted file mode 100644 index 81c247f0..00000000 --- a/cpp/subprojects/common/include/common/input/feature_type_numerical.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_type.hpp" - -/** - * Represents a numerical/ordinal feature. - */ -class NumericalFeatureType final : public IFeatureType { - public: - - bool isNominal() const override; -}; diff --git a/cpp/subprojects/common/include/common/input/feature_type_ordinal.hpp b/cpp/subprojects/common/include/common/input/feature_type_ordinal.hpp deleted file mode 100644 index 5482fcc7..00000000 --- a/cpp/subprojects/common/include/common/input/feature_type_ordinal.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_type.hpp" - -/** - * Represents an ordinal feature. - */ -class OrdinalFeatureType final : public IFeatureType { - public: - - bool isNominal() const override; -}; diff --git a/cpp/subprojects/common/include/common/input/feature_vector.hpp b/cpp/subprojects/common/include/common/input/feature_vector.hpp deleted file mode 100644 index 9ad813bd..00000000 --- a/cpp/subprojects/common/include/common/input/feature_vector.hpp +++ /dev/null @@ -1,82 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_sparse_array.hpp" -#include "common/input/missing_feature_vector.hpp" - -/** - * An one-dimensional sparse vector that stores the values of training examples for a certain feature, as well as the - * indices of examples with missing feature values. - */ -class FeatureVector final : public MissingFeatureVector { - private: - - SparseArrayVector vector_; - - public: - - /** - * @param numElements The number of elements in the vector - */ - FeatureVector(uint32 numElements); - - /** - * An iterator that provides access to the feature values in the vector and allows to modify them. - */ - typedef SparseArrayVector::iterator iterator; - - /** - * An iterator that provides read-only access to the feature values in the vector. - */ - typedef SparseArrayVector::const_iterator const_iterator; - - /** - * Returns an `iterator` to the beginning of the vector. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the vector. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns a `const_iterator` to the beginning of the vector. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the vector. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the number of elements in the vector. - * - * @return The number of elements in the vector - */ - uint32 getNumElements() const; - - /** - * Sets the number of elements in the vector. - * - * @param numElements The number of elements to be set - * @param freeMemory True, if unused memory should be freed, if possible, false otherwise - */ - void setNumElements(uint32 numElements, bool freeMemory); - - /** - * Sorts the elements in the vector in ascending order based on their values. - */ - void sortByValues(); -}; diff --git a/cpp/subprojects/common/include/common/input/label_matrix.hpp b/cpp/subprojects/common/include/common/input/label_matrix.hpp deleted file mode 100644 index d2dec16e..00000000 --- a/cpp/subprojects/common/include/common/input/label_matrix.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_two_dimensional.hpp" - -/** - * Defines an interface for all label matrices. - */ -class MLRLCOMMON_API ILabelMatrix : virtual public ITwoDimensionalView { - public: - - virtual ~ILabelMatrix() override {}; - - /** - * Returns whether the label matrix is sparse or not. - * - * @return True, if the label matrix is sparse, false otherwise - */ - virtual bool isSparse() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/input/label_matrix_c_contiguous.hpp b/cpp/subprojects/common/include/common/input/label_matrix_c_contiguous.hpp deleted file mode 100644 index e120e80f..00000000 --- a/cpp/subprojects/common/include/common/input/label_matrix_c_contiguous.hpp +++ /dev/null @@ -1,164 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "common/data/arrays.hpp" -#include "common/data/view_c_contiguous.hpp" -#include "common/data/view_vector.hpp" -#include "common/input/label_matrix_row_wise.hpp" - -/** - * Defines an interface for all label matrices that provide row-wise access to the labels of individual examples that - * are stored in a C-contiguous array. - */ -class MLRLCOMMON_API ICContiguousLabelMatrix : virtual public IRowWiseLabelMatrix { - public: - - virtual ~ICContiguousLabelMatrix() override {}; -}; - -/** - * Implements random read-only access to the labels of individual training examples that are stored in a pre-allocated - * C-contiguous array. - */ -class CContiguousLabelMatrix final : public CContiguousConstView, - virtual public ICContiguousLabelMatrix { - public: - - /** - * Provides access to the values that are stored in a single row of a `CContiguousLabelMatrix`. - */ - class View final : public VectorConstView { - public: - - /** - * Allows to compute hash values for objects of type `CContiguousLabelMatrix::View`. - */ - struct Hash final { - public: - - /** - * Computes and returns a hash value for an object of type `CContiguousLabelMatrix::View`. - * - * @param v A reference to an object of type `CContiguousLabelMatrix::View` - * @return The hash value - */ - inline std::size_t operator()(const View& v) const { - uint32 numElements = v.getNumElements(); - std::size_t hashValue = (std::size_t) numElements; - View::const_iterator it = v.cbegin(); - - for (uint32 i = 0; i < numElements; i++) { - if (it[i]) { - hashValue ^= i + 0x9e3779b9 + (hashValue << 6) + (hashValue >> 2); - } - } - - return hashValue; - } - }; - - /** - * Allows to check whether two objects of type `CContiguousLabelMatrix::View` are equal or not. - */ - struct Pred final { - public: - - /** - * Returns whether two objects of type `CContiguousLabelMatrix::View` are equal or not. - * - * @param lhs A reference to a first object of type `CContiguousLabelMatrix::View` - * @param rhs A reference to a second object of type `CContiguousLabelMatrix::View` - * @return True, if the given objects are equal, false otherwise - */ - inline bool operator()(const View& lhs, const View& rhs) const { - return compareArrays(lhs.cbegin(), lhs.getNumElements(), rhs.cbegin(), - rhs.getNumElements()); - } - }; - - /** - * @param labelMatrix A reference to an object of type `CContiguousLabelMatrix`, the view provides - * access to - * @param row The row, the view provides access to - */ - View(const CContiguousLabelMatrix& labelMatrix, uint32 row); - }; - - /** - * @param numRows The number of rows in the label matrix - * @param numCols The number of columns in the label matrix - * @param array A pointer to a C-contiguous array of type `uint8` that stores the labels - */ - CContiguousLabelMatrix(uint32 numRows, uint32 numCols, const uint8* array); - - /** - * The type of the view that provides access to the values that are stored in a single row of the label matrix. - */ - typedef const View view_type; - - /** - * Creates and returns a view that provides access to the values at a specific row of the label matrix. - * - * @param row The row - * @return An object of type `view_type` that has been created - */ - view_type createView(uint32 row) const; - - bool isSparse() const override; - - float32 calculateLabelCardinality() const override; - - std::unique_ptr createLabelVector(uint32 row) const override; - - std::unique_ptr createStatisticsProvider( - const IStatisticsProviderFactory& factory) const override; - - std::unique_ptr createPartitionSampling( - const IPartitionSamplingFactory& factory) const override; - - std::unique_ptr createInstanceSampling(const IInstanceSamplingFactory& factory, - const SinglePartition& partition, - IStatistics& statistics) const override; - - std::unique_ptr createInstanceSampling(const IInstanceSamplingFactory& factory, - BiPartition& partition, - IStatistics& statistics) const override; - - std::unique_ptr fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const override; - - std::unique_ptr fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const override; - - std::unique_ptr fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const override; - - std::unique_ptr fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const override; -}; - -/** - * Creates and returns a new object of the type `ICContiguousLabelMatrix`. - - * @param numRows The number of rows in the label matrix - * @param numCols The number of columns in the label matrix - * @param array A pointer to a C-contiguous array of type `uint8` that stores the labels - * @return An unique pointer to an object of type `ICContiguousLabelMatrix` that has been created - */ -MLRLCOMMON_API std::unique_ptr createCContiguousLabelMatrix(uint32 numRows, uint32 numCols, - const uint8* array); - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/common/include/common/input/label_matrix_csc.hpp b/cpp/subprojects/common/include/common/input/label_matrix_csc.hpp deleted file mode 100644 index 9c60a7c5..00000000 --- a/cpp/subprojects/common/include/common/input/label_matrix_csc.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_c_contiguous.hpp" -#include "common/data/view_csc_binary.hpp" -#include "common/data/view_csr_binary.hpp" -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" - -/** - * Implements column-wise read-only access to the labels of individual training examples that are stored in a matrix in - * the compressed sparse column (CSC) format. - * - * This class provides copy constructors for copying an existing `CContiguousConstView`, which provides random access, - * or a `BinaryCsrConstView`, which provides row-wise access to the labels of the training examples. These constructors - * expect the indices of the examples to be considered when copying the existing label matrix to be provided. - */ -class CscLabelMatrix final : public BinaryCscConstView { - public: - - /** - * @param labelMatrix A reference to an object of type `CContiguousConstView` to be copied - * @param indicesBegin A `CompleteIndexVector::const_iterator` to the beginning of the indices of the examples - * to be considered - * @param indicesEnd A `CompleteIndexVector::const_iterator` to the end of the indices of the examples to be - * considered - */ - CscLabelMatrix(const CContiguousConstView& labelMatrix, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd); - - /** - * @param labelMatrix A reference to an object of type `CContiguousConstView` to be copied - * @param indicesBegin A `PartialIndexVector::const_iterator` to the beginning of the indices of the examples - * to be considered - * @param indicesEnd A `PartialIndexVector::const_iterator` to the end of the indices of the examples to be - * considered - */ - CscLabelMatrix(const CContiguousConstView& labelMatrix, - PartialIndexVector::const_iterator indicesBegin, PartialIndexVector::const_iterator indicesEnd); - - /** - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` to be copied - * @param indicesBegin A `CompleteIndexVector::const_iterator` to the beginning of the indices of the examples - * to be considered - * @param indicesEnd A `CompleteIndexVector::const_iterator` to the end of the indices of the examples to be - * considered - */ - CscLabelMatrix(const BinaryCsrConstView& labelMatrix, CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd); - - /** - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` to be copied - * @param indicesBegin A `PartialIndexVector::const_iterator` to the beginning of the indices of the examples - * to be considered - * @param indicesEnd A `PartialIndexVector::const_iterator` to the end of the indices of the examples to be - * considered - */ - CscLabelMatrix(const BinaryCsrConstView& labelMatrix, PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd); - - ~CscLabelMatrix() override; -}; diff --git a/cpp/subprojects/common/include/common/input/label_matrix_csr.hpp b/cpp/subprojects/common/include/common/input/label_matrix_csr.hpp deleted file mode 100644 index c6a27fb4..00000000 --- a/cpp/subprojects/common/include/common/input/label_matrix_csr.hpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "common/data/arrays.hpp" -#include "common/data/view_csr_binary.hpp" -#include "common/data/view_vector.hpp" -#include "common/input/label_matrix_row_wise.hpp" - -/** - * Defines an interface for all label matrices that provide row-wise access to the labels of individual examples that - * are stored in a sparse matrix in the compressed sparse row (CSR) format. - */ -class MLRLCOMMON_API ICsrLabelMatrix : virtual public IRowWiseLabelMatrix { - public: - - virtual ~ICsrLabelMatrix() override {}; -}; - -/** - * Implements row-wise read-only access to the labels of individual training examples that are stored in a pre-allocated - * sparse matrix in the compressed sparse row (CSR) format. - */ -class CsrLabelMatrix final : public BinaryCsrConstView, - virtual public ICsrLabelMatrix { - public: - - /** - * Provides access to the values that are stored in a single row of a `CsrLabelMatrix``. - */ - class View final : public VectorConstView { - public: - - /** - * Allows to compute hash values for objects of type `CsrLabelMatrix::View`. - */ - struct Hash final { - public: - - /** - * Computes and returns a hash value for a given object of type `CsrLabelMatrix::View`. - * - * @param v A reference to an object of type `CsrLabelMatrix::View` - * @return The hash value - */ - inline std::size_t operator()(const View& v) const { - return hashArray(v.cbegin(), v.getNumElements()); - } - }; - - /** - * Allows to check whether two objects of type `CsrLabelMatrix::View` are equal or not. - */ - struct Pred final { - public: - - /** - * Returns whether two objects of type `CsrLabelMatrix::View` are equal or not. - * - * @param lhs A reference to a first object of type `CsrLabelMatrix::View` - * @param rhs A reference to a second object of type `CsrLabelMatrix::View` - * @return True, if the given objects are equal, false otherwise - */ - inline bool operator()(const View& lhs, const View& rhs) const { - return compareArrays(lhs.cbegin(), lhs.getNumElements(), rhs.cbegin(), - rhs.getNumElements()); - } - }; - - /** - * @param labelMatrix A reference to an object of type `CsrLabelMatrix`, the view provides access to - * @param row The row, the view provides access to - */ - View(const CsrLabelMatrix& labelMatrix, uint32 row); - }; - - /** - * @param numRows The number of rows in the label matrix - * @param numCols The number of columns in the label matrix - * @param rowIndices A pointer to an array of type `uint32`, shape `(numRows + 1)`, that stores the indices - * of the first element in `colIndices` that corresponds to a certain row. The index at the - * last position is equal to `num_non_zero_values` - * @param colIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * column-indices, the relevant labels correspond to - */ - CsrLabelMatrix(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices); - - /** - * The type of the view that provides access to the values that are stored in a single row of the label matrix. - */ - typedef const View view_type; - - /** - * Creates and returns a view that provides access to the values at a specific row of the label matrix. - * - * @param row The row - * @return An object of type `view_type` that has been created - */ - view_type createView(uint32 row) const; - - bool isSparse() const override; - - float32 calculateLabelCardinality() const override; - - std::unique_ptr createLabelVector(uint32 row) const override; - - std::unique_ptr createStatisticsProvider( - const IStatisticsProviderFactory& factory) const override; - - std::unique_ptr createPartitionSampling( - const IPartitionSamplingFactory& factory) const override; - - std::unique_ptr createInstanceSampling(const IInstanceSamplingFactory& factory, - const SinglePartition& partition, - IStatistics& statistics) const override; - - std::unique_ptr createInstanceSampling(const IInstanceSamplingFactory& factory, - BiPartition& partition, - IStatistics& statistics) const override; - - std::unique_ptr fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const override; - - std::unique_ptr fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const override; - - std::unique_ptr fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const override; - - std::unique_ptr fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const override; -}; - -/** - * Creates and returns a new object of the type `ICsrLabelMatrix`. - * - * @param numRows The number of rows in the label matrix - * @param numCols The number of columns in the label matrix - * @param rowIndices A pointer to an array of type `uint32`, shape `(numRows + 1)`, that stores the indices - * of the first element in `colIndices` that corresponds to a certain row. The index at the last - * position is equal to `num_non_zero_values` - * @param colIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * column-indices, the relevant labels correspond to - * @return An unique pointer to an object of type `ICsrLabelMatrix` that has been created - */ -MLRLCOMMON_API std::unique_ptr createCsrLabelMatrix(uint32 numRows, uint32 numCols, uint32* rowIndices, - uint32* colIndices); - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/common/include/common/input/label_matrix_row_wise.hpp b/cpp/subprojects/common/include/common/input/label_matrix_row_wise.hpp deleted file mode 100644 index aa108bcf..00000000 --- a/cpp/subprojects/common/include/common/input/label_matrix_row_wise.hpp +++ /dev/null @@ -1,168 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/label_matrix.hpp" -#include "common/input/label_vector.hpp" - -#include - -// Forward declarations -class IStatisticsProvider; -class IStatisticsProviderFactory; -class IPartitionSampling; -class IPartitionSamplingFactory; -class IInstanceSampling; -class IInstanceSamplingFactory; -class IStatistics; -class SinglePartition; -class BiPartition; -class IMarginalProbabilityCalibrator; -class IMarginalProbabilityCalibrationModel; -class IJointProbabilityCalibrator; -class IJointProbabilityCalibrationModel; - -/** - * Defines an interface for all label matrices that provide access to the labels of the training examples. - */ -class MLRLCOMMON_API IRowWiseLabelMatrix : virtual public ILabelMatrix { - public: - - virtual ~IRowWiseLabelMatrix() override {}; - - /** - * Calculates and returns the label cardinality, i.e., the average number of relevant labels per example. - * - * @return The label cardinality - */ - virtual float32 calculateLabelCardinality() const = 0; - - /** - * Creates and returns a label vector that corresponds to a specific row in the label matrix. - * - * @param row The row - * @return An unique pointer to an object of type `LabelVector` that has been created - */ - virtual std::unique_ptr createLabelVector(uint32 row) const = 0; - - /** - * Creates and returns a new instance of the class `IStatisticsProvider`, based on the type of this label - * matrix. - * - * @param factory A reference to an object of type `IStatisticsProviderFactory` that should be used to create - * the instance - * @return An unique pointer to an object of type `IStatisticsProvider` that has been created - */ - virtual std::unique_ptr createStatisticsProvider( - const IStatisticsProviderFactory& factory) const = 0; - - /** - * Creates and returns a new instance of the class `IPartitionSampling`, based on the type of this label matrix. - * - * @param factory A reference to an object of type `IPartitionSamplingFactory` that should be used to create - * the instance - * @return An unique pointer to an object of type `IPartitionSampling` that has been created - */ - virtual std::unique_ptr createPartitionSampling( - const IPartitionSamplingFactory& factory) const = 0; - - /** - * Creates and returns a new instance of the class `IInstanceSampling`, based on the type of this label matrix. - * - * @param factory A reference to an object of type `IInstanceSamplingFactory` that should be used to - * create the instance - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that are included in the training set - * @param statistics A reference to an object of type `IStatistics` that provides access to the statistics - * which serve as a basis for learning rules - * @return An unique pointer to an object of type `IInstanceSampling` that has been created - */ - virtual std::unique_ptr createInstanceSampling(const IInstanceSamplingFactory& factory, - const SinglePartition& partition, - IStatistics& statistics) const = 0; - - /** - * Creates and returns a new instance of the class `IInstanceSampling`, based on the type of this label matrix. - * - * @param factory A reference to an object of type `IInstanceSamplingFactory` that should be used to - * create the instance - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that are included in the training set and the holdout set, - * respectively - * @param statistics A reference to an object of type `IStatistics` that provides access to the statistics - * which serve as a basis for learning rules - * @return An unique pointer to an object of type `IInstanceSampling` that has been created - */ - virtual std::unique_ptr createInstanceSampling(const IInstanceSamplingFactory& factory, - BiPartition& partition, - IStatistics& statistics) const = 0; - - /** - * Fits and returns a model for the calibration of marginal probabilities, based on the type of this label - * matrix. - * - * @param probabilityCalibrator A reference to an object of type `IMarginalProbabilityCalibrator` that should be - * used to fit the calibration model - * @param partition A reference to an object of type `SinglePartition` that provides access to the - * indices of the training examples that are included in the training set - * @param statistics A reference to an object of type `IStatistics` that provides access to - * statistics about the labels of the training examples - * @return An unique pointer to an object of type `IMarginalProbabilityCalibrationModel` - * that has been fit - */ - virtual std::unique_ptr fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const = 0; - - /** - * Fits and returns a model for the calibration of marginal probabilities, based on the type of this label - * matrix. - * - * @param probabilityCalibrator A reference to an object of type `IMarginalProbabilityCalibrator` that should be - * used to fit the calibration model - * @param partition A reference to an object of type `BiPartition` that provides access to the - * indices of the training examples that are included in the training set and the - * holdout set, respectively - * @param statistics A reference to an object of type `IStatistics` that provides access to - * statistics about the labels of the training examples - * @return An unique pointer to an object of type `IMarginalProbabilityCalibrationModel` - * that has been fit - */ - virtual std::unique_ptr fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const = 0; - - /** - * Fits and returns a model for the calibration of joint probabilities, based on the type of this label matrix. - * - * @param probabilityCalibrator A reference to an object of type `IJointProbabilityCalibrator` that should be - * used to fit the calibration model - * @param partition A reference to an object of type `SinglePartition` that provides access to the - * indices of the training examples that are included in the training set - * @param statistics A reference to an object of type `IStatistics` that provides access to - * statistics about the labels of the training examples - * @return An unique pointer to an object of type `IJointProbabilityCalibrationModel` that - * has been fit - */ - virtual std::unique_ptr fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const = 0; - - /** - * Fits and returns a model for the calibration of joint probabilities, based on the type of this label matrix. - * - * @param probabilityCalibrator A reference to an object of type `IJointProbabilityCalibrator` that should be - * used to fit the calibration model - * @param partition A reference to an object of type `BiPartition` that provides access to the - * indices of the training examples that are included in the training set and the - * holdout set, respectively - * @param statistics A reference to an object of type `IStatistics` that provides access to - * statistics about the labels of the training examples - * @return An unique pointer to an object of type `IJointProbabilityCalibrationModel` that - * has been fit - */ - virtual std::unique_ptr fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/input/label_vector.hpp b/cpp/subprojects/common/include/common/input/label_vector.hpp deleted file mode 100644 index 07159969..00000000 --- a/cpp/subprojects/common/include/common/input/label_vector.hpp +++ /dev/null @@ -1,11 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_sparse_array_binary.hpp" - -/** - * An one-dimensional sparse vector that stores the indices of labels that are relevant to an example. - */ -typedef BinarySparseArrayVector LabelVector; diff --git a/cpp/subprojects/common/include/common/input/missing_feature_vector.hpp b/cpp/subprojects/common/include/common/input/missing_feature_vector.hpp deleted file mode 100644 index ad103096..00000000 --- a/cpp/subprojects/common/include/common/input/missing_feature_vector.hpp +++ /dev/null @@ -1,67 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_dok_binary.hpp" - -#include - -/** - * An one-dimensional sparse vector that stores the indices of training examples with missing feature values using the - * dictionary of keys (DOK) format. - */ -class MissingFeatureVector { - private: - - std::unique_ptr missingIndicesPtr_; - - public: - - MissingFeatureVector(); - - /** - * @param missingFeatureVector A reference to an object of type `MissingFeatureVector`, the missing indices - * should be taken from - */ - MissingFeatureVector(MissingFeatureVector& missingFeatureVector); - - /** - * An iterator that provides read-only access to the missing indices. - */ - typedef BinaryDokVector::index_const_iterator missing_index_const_iterator; - - /** - * Returns a `missing_index_const_iterator` to the beginning of the missing indices. - * - * @return A `missing_index_const_iterator` to the beginning - */ - missing_index_const_iterator missing_indices_cbegin() const; - - /** - * Returns a `missing_index_const_iterator` to the end of the missing indices. - * - * @return A `missing_index_const_iterator` to the end - */ - missing_index_const_iterator missing_indices_cend() const; - - /** - * Adds the index of an example with missing feature value. - * - * @param index The index to be added - */ - void addMissingIndex(uint32 index); - - /** - * Returns whether the example at a specific index has a missing feature value. - * - * @param index The index of the example to be checked - * @return True, if the example at the given index has a missing feature value, false otherwise - */ - bool isMissing(uint32 index) const; - - /** - * Removes all indices of examples with missing feature values. - */ - void clearMissingIndices(); -}; diff --git a/cpp/subprojects/common/include/common/iterator/binary_forward_iterator.hpp b/cpp/subprojects/common/include/common/iterator/binary_forward_iterator.hpp deleted file mode 100644 index 72bbb757..00000000 --- a/cpp/subprojects/common/include/common/iterator/binary_forward_iterator.hpp +++ /dev/null @@ -1,145 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -/** - * An iterator adaptor that adapts an iterator, which provides access to a fixed number of indices in increasing order, - * such that it acts as a forward iterator that returns a boolean value for each possible index, indicating whether the - * respective index is present in the original iterator or not. - * - * @tparam Iterator The type of the iterator to be adapted - */ -template -class BinaryForwardIterator final { - private: - - Iterator iterator_; - - Iterator end_; - - uint32 index_; - - uint32 iteratorIndex_; - - public: - - /** - * @param begin An iterator to the beginning of the indices - * @param end An iterator to the end of the indices - * @param index The index to start at - */ - BinaryForwardIterator(Iterator begin, Iterator end, uint32 index) - : iterator_(begin), end_(end), index_(index), iteratorIndex_(iterator_ != end_ ? *iterator_ : 0) {} - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef bool value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef const bool* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef bool reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::forward_iterator_tag iterator_category; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const { - return iterator_ != end_ && iteratorIndex_ == index_; - } - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - BinaryForwardIterator& operator++() { - ++index_; - - if (iterator_ != end_ && iteratorIndex_ < index_) { - iterator_++; - - if (iterator_ != end_) { - iteratorIndex_ = *iterator_; - } - } - - return *this; - } - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - BinaryForwardIterator& operator++(int n) { - index_++; - - if (iterator_ != end_ && iteratorIndex_ < index_) { - iterator_++; - - if (iterator_ != end_) { - iteratorIndex_ = *iterator_; - } - } - - return *this; - } - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const BinaryForwardIterator& rhs) const { - return index_ != rhs.index_; - } - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const BinaryForwardIterator& rhs) const { - return index_ == rhs.index_; - } -}; - -/** - * Creates and returns a new `BinaryForwardIterator`. - * - * @tparam Iterator The type of the iterator to be adapted - * @param begin An iterator to the beginning of the indices - * @param end An iterator to the end of the indices - * @param index The index to start at - * @return A `BinaryForwardIterator` that has been created - */ -template -static inline BinaryForwardIterator make_binary_forward_iterator(Iterator begin, Iterator end, - uint32 index = 0) { - return BinaryForwardIterator(begin, end, index); -} diff --git a/cpp/subprojects/common/include/common/iterator/index_iterator.hpp b/cpp/subprojects/common/include/common/iterator/index_iterator.hpp deleted file mode 100644 index 004d87a4..00000000 --- a/cpp/subprojects/common/include/common/iterator/index_iterator.hpp +++ /dev/null @@ -1,118 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -/** - * An iterator that provides random read-only access to the indices in a continuous range. - */ -class IndexIterator final { - private: - - uint32 index_; - - public: - - IndexIterator(); - - /** - * @param index The index to start with - */ - IndexIterator(uint32 index); - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef uint32 value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef const uint32* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef uint32 reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::random_access_iterator_tag iterator_category; - - /** - * Returns the element at a specific index. - * - * @param index The index of the element to be returned - * @return The element at the given index - */ - reference operator[](uint32 index) const; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const; - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - IndexIterator& operator++(); - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - IndexIterator& operator++(int n); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - IndexIterator& operator--(); - - /** - * Returns an iterator to the previous element. - * - * @return A reference to an iterator that refers to the previous element - */ - IndexIterator& operator--(int n); - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const IndexIterator& rhs) const; - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const IndexIterator& rhs) const; - - /** - * Returns the difference between this iterator and another one. - * - * @param rhs A reference to another iterator - * @return The difference between the iterators - */ - difference_type operator-(const IndexIterator& rhs) const; -}; diff --git a/cpp/subprojects/common/include/common/iterator/non_zero_index_forward_iterator.hpp b/cpp/subprojects/common/include/common/iterator/non_zero_index_forward_iterator.hpp deleted file mode 100644 index 6445c88d..00000000 --- a/cpp/subprojects/common/include/common/iterator/non_zero_index_forward_iterator.hpp +++ /dev/null @@ -1,155 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include -#include - -/** - * An iterator adaptor that adapts an iterator, which provides access to a fixed number of values, such that it acts as - * a forward iterator that returns the indices of all non-zero values. - * - * @tparam Iterator The type of the iterator to be adapted - */ -template -class NonZeroIndexForwardIterator { - private: - - Iterator iterator_; - - Iterator end_; - - uint32 index_; - - public: - - /** - * @param begin An iterator to the beginning of the values - * @param end An iterator to the end of the values - */ - NonZeroIndexForwardIterator(Iterator begin, Iterator end) : iterator_(begin), end_(end), index_(0) { - for (; iterator_ != end_; iterator_++) { - auto value = *iterator_; - - if (value != 0) { - break; - } - - index_++; - } - } - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef uint32 value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef const uint32* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef uint32 reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::forward_iterator_tag iterator_category; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const { - return index_; - } - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - NonZeroIndexForwardIterator& operator++() { - iterator_++; - ++index_; - - for (; iterator_ != end_; iterator_++) { - auto value = *iterator_; - - if (value != 0) { - break; - } - - ++index_; - } - - return *this; - } - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - NonZeroIndexForwardIterator& operator++(int n) { - iterator_++; - index_++; - - for (; iterator_ != end_; iterator_++) { - auto value = *iterator_; - - if (value != 0) { - break; - } - - index_++; - } - - return *this; - } - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const NonZeroIndexForwardIterator& rhs) const { - return iterator_ != rhs.iterator_; - } - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const NonZeroIndexForwardIterator& rhs) const { - return iterator_ == rhs.iterator_; - } -}; - -/** - * Creates and returns a new `NonZeroIndexForwardIterator`. - * - * @tparam Iterator The type of the iterator to be adapted - * @param begin An iterator to the beginning of the values - * @param end An iterator to the end of the values - * @return A `NonZeroIndexForwardIterator` that has been created - */ -template -static inline NonZeroIndexForwardIterator make_non_zero_index_forward_iterator(Iterator begin, Iterator end) { - return NonZeroIndexForwardIterator(begin, end); -} diff --git a/cpp/subprojects/common/include/common/learner.hpp b/cpp/subprojects/common/include/common/learner.hpp deleted file mode 100644 index 32a9bd6e..00000000 --- a/cpp/subprojects/common/include/common/learner.hpp +++ /dev/null @@ -1,2015 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/feature_binning_equal_frequency.hpp" -#include "common/binning/feature_binning_equal_width.hpp" -#include "common/binning/feature_binning_no.hpp" -#include "common/input/feature_info.hpp" -#include "common/input/feature_matrix_column_wise.hpp" -#include "common/input/feature_matrix_row_wise.hpp" -#include "common/input/label_matrix_row_wise.hpp" -#include "common/multi_threading/multi_threading_manual.hpp" -#include "common/multi_threading/multi_threading_no.hpp" -#include "common/post_optimization/post_optimization_phase_list.hpp" -#include "common/post_optimization/post_optimization_sequential.hpp" -#include "common/post_optimization/post_optimization_unused_rule_removal.hpp" -#include "common/post_processing/post_processor_no.hpp" -#include "common/prediction/label_space_info.hpp" -#include "common/prediction/prediction_matrix_dense.hpp" -#include "common/prediction/prediction_matrix_sparse_binary.hpp" -#include "common/prediction/predictor_binary.hpp" -#include "common/prediction/predictor_probability.hpp" -#include "common/prediction/predictor_score.hpp" -#include "common/prediction/probability_calibration_joint.hpp" -#include "common/prediction/probability_calibration_no.hpp" -#include "common/rule_induction/rule_induction_top_down_beam_search.hpp" -#include "common/rule_induction/rule_induction_top_down_greedy.hpp" -#include "common/rule_model_assemblage/default_rule.hpp" -#include "common/rule_model_assemblage/rule_model_assemblage.hpp" -#include "common/rule_model_assemblage/rule_model_assemblage_sequential.hpp" -#include "common/rule_pruning/rule_pruning_irep.hpp" -#include "common/rule_pruning/rule_pruning_no.hpp" -#include "common/sampling/feature_sampling_no.hpp" -#include "common/sampling/feature_sampling_without_replacement.hpp" -#include "common/sampling/instance_sampling_no.hpp" -#include "common/sampling/instance_sampling_stratified_example_wise.hpp" -#include "common/sampling/instance_sampling_stratified_label_wise.hpp" -#include "common/sampling/instance_sampling_with_replacement.hpp" -#include "common/sampling/instance_sampling_without_replacement.hpp" -#include "common/sampling/label_sampling_no.hpp" -#include "common/sampling/label_sampling_round_robin.hpp" -#include "common/sampling/label_sampling_without_replacement.hpp" -#include "common/sampling/partition_sampling_bi_random.hpp" -#include "common/sampling/partition_sampling_bi_stratified_example_wise.hpp" -#include "common/sampling/partition_sampling_bi_stratified_label_wise.hpp" -#include "common/sampling/partition_sampling_no.hpp" -#include "common/stopping/global_pruning_post.hpp" -#include "common/stopping/global_pruning_pre.hpp" -#include "common/stopping/stopping_criterion_list.hpp" -#include "common/stopping/stopping_criterion_size.hpp" -#include "common/stopping/stopping_criterion_time.hpp" - -/** - * Defines an interface for all classes that provide access to the results of fitting a rule learner to training data. - * It incorporates the model that has been trained, as well as additional information that is necessary for obtaining - * predictions for unseen data. - */ -class MLRLCOMMON_API ITrainingResult { - public: - - virtual ~ITrainingResult() {}; - - /** - * Returns the number of labels for which a model has been trained. - * - * @return The number of labels - */ - virtual uint32 getNumLabels() const = 0; - - /** - * Returns the model that has been trained. - * - * @return An unique pointer to an object of type `IRuleModel` that has been trained - */ - virtual std::unique_ptr& getRuleModel() = 0; - - /** - * Returns the model that has been trained. - * - * @return An unique pointer to an object of type `IRuleModel` that has been trained - */ - virtual const std::unique_ptr& getRuleModel() const = 0; - - /** - * Returns information about the label space that may be used as a basis for making predictions. - * - * @return An unique pointer to an object of type `ILabelSpaceInfo` that may be used as a basis for making - * predictions - */ - virtual std::unique_ptr& getLabelSpaceInfo() = 0; - - /** - * Returns information about the label space that may be used as a basis for making predictions. - * - * @return An unique pointer to an object of type `ILabelSpaceInfo` that may be used as a basis for making - * predictions - */ - virtual const std::unique_ptr& getLabelSpaceInfo() const = 0; - - /** - * Returns a model that may be used for the calibration of marginal probabilities. - * - * @return An unique pointer to an object of type `IMarginalProbabilityCalibrationModel` that may be used for - * the calibration of marginal probabilities - */ - virtual std::unique_ptr& getMarginalProbabilityCalibrationModel() = 0; - - /** - * Returns a model that may be used for the calibration of marginal probabilities. - * - * @return An unique pointer to an object of type `IMarginalProbabilityCalibrationModel` that may be used for - * the calibration of marginal probabilities - */ - virtual const std::unique_ptr& getMarginalProbabilityCalibrationModel() - const = 0; - - /** - * Returns a model that may be used for the calibration of joint probabilities. - * - * @return An unique pointer to an object of type `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - */ - virtual std::unique_ptr& getJointProbabilityCalibrationModel() = 0; - - /** - * Returns a model that may be used for the calibration of joint probabilities. - * - * @return An unique pointer to an object of type `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - */ - virtual const std::unique_ptr& getJointProbabilityCalibrationModel() - const = 0; -}; - -/** - * Defines an interface for all rule learners. - */ -class MLRLCOMMON_API IRuleLearner { - public: - - /** - * Defines an interface for all classes that allow to configure a rule learner. - */ - class IConfig { - friend class AbstractRuleLearner; - - protected: - - /** - * Returns the definition of the function that should be used for comparing the quality of different - * rules. - * - * @return An object of type `RuleCompareFunction` that defines the function that should be used for - * comparing the quality of different rules - */ - virtual RuleCompareFunction getRuleCompareFunction() const = 0; - - /** - * Returns an unique pointer to the configuration of the default that is included in a rule-based model. - * - * @return A reference to an unique pointer of type `IDefaultRuleConfig` that stores the configuration - * of the default rule that is included in a rule-based model - */ - virtual std::unique_ptr& getDefaultRuleConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the algorithm for the induction of several rules - * that are added to a rule-based model. - * - * @return A reference to an unique pointer of type `IRuleModelAssemblageConfig` that stores the - * configuration of the algorithm for the induction of several rules that are added to a - * rule-based model - */ - virtual std::unique_ptr& getRuleModelAssemblageConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the algorithm for the induction of individual - * rules. - * - * @return A reference to an unique pointer of type `IRuleInductionConfig` that stores the configuration - * of the algorithm for the induction of individual rules - */ - virtual std::unique_ptr& getRuleInductionConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the method for the assignment of numerical feature - * values to bins. - * - * @return A reference to an unique pointer of type `IFeatureBinningConfig` that stores the - * configuration of the method for the assignment of numerical feature values to bins - */ - virtual std::unique_ptr& getFeatureBinningConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the method for sampling labels. - * - * @return A reference to an unique pointer of type `ILabelSamplingConfig` that stores the configuration - * of the method for sampling labels - */ - virtual std::unique_ptr& getLabelSamplingConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the method for sampling instances. - * - * @return A reference to an unique pointer of type `IInstanceSamplingConfig` that stores the - * configuration of the method for sampling instances - */ - virtual std::unique_ptr& getInstanceSamplingConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the method for sampling features. - * - * @return A reference to an unique pointer of type `IFeatureSamplingConfig` that specifies the - * configuration of the method for sampling features - */ - virtual std::unique_ptr& getFeatureSamplingConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the method for partitioning the available training - * examples into a training set and a holdout set. - * - * @return A reference to an unique pointer of type `IPartitionSamplingConfig` that stores the - * configuration of the method for partitioning the available training examples into a training - * set and a holdout set - */ - virtual std::unique_ptr& getPartitionSamplingConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the method for pruning individual rules. - * - * @return A reference to an unique pointer of type `IRulePruningConfig` that stores the configuration - * of the method for pruning individual rules - */ - virtual std::unique_ptr& getRulePruningConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the method for post-processing the predictions of - * rules once they have been learned. - * - * @return A reference to an unique pointer of type `IPostProcessorConfig` that stores the configuration - * of the method that post-processes the predictions of rules once they have been learned - */ - virtual std::unique_ptr& getPostProcessorConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the multi-threading behavior that is used for the - * parallel refinement of rules. - * - * @return A reference to an unique pointer of type `IMultiThreadingConfig` that stores the - * configuration of the multi-threading behavior that is used for the parallel refinement of - * rules - */ - virtual std::unique_ptr& getParallelRuleRefinementConfigPtr() = 0; - - /** - * Returns an unique pointer to the the configuration of the multi-threading behavior that is used for - * the parallel update of statistics. - * - * @return A reference to an unique pointer of type `IMultiThreadingConfig` that stores the - * configuration of the multi-threading behavior that is used for the parallel update of - * statistics - */ - virtual std::unique_ptr& getParallelStatisticUpdateConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the multi-threading behavior that is used to - * predict for several query examples in parallel. - * - * @return A reference to an unique pointer of type `IMultiThreadingConfig` that stores the - * configuration of the multi-threading behavior that is used to predict for several query - * examples in parallel - */ - virtual std::unique_ptr& getParallelPredictionConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the stopping criterion that ensures that the number - * of rules does not exceed a certain maximum. - * - * @return A reference to an unique pointer of type `SizeStoppingCriterionConfig` that stores the - * configuration of the stopping criterion that ensures that the number of rules does not exceed - * a certain maximum or a null pointer, if no such stopping criterion should be used - */ - virtual std::unique_ptr& getSizeStoppingCriterionConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the stopping criterion that ensures that a certain - * time limit is not exceeded. - * - * @return A reference to an unique pointer of type `TimeStoppingCriterionConfig` that stores the - * configuration of the stopping criterion that ensures that a certain time limit is not - * exceeded or a null pointer, if no such stopping criterion should be used - */ - virtual std::unique_ptr& getTimeStoppingCriterionConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the stopping criterion that allows to decide how - * many rules should be included in a model, such that its performance is optimized globally. - * - * @return A reference to an unique pointer of type `IGlobalPruningConfig` that stores the configuration - * of the stopping criterion that allows to decide how many rules should be included in a model, - * such that its performance is optimized globally, or a null pointer, if no such stopping - * criterion should be used - */ - virtual std::unique_ptr& getGlobalPruningConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the post-optimization method that optimizes each - * rule in a model by relearning it in the context of the other rules. - * - * @return A reference to an unique pointer of type `SequentialPostOptimizationConfig` that stores the - * configuration of the post-optimization method that optimizes each rule in a model by - * relearning it in the context of the other rules or a null pointer, if no such - * post-optimization method should be used - */ - virtual std::unique_ptr& getSequentialPostOptimizationConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the post-optimization method that removes unused - * rules from a model. - * - * @return A reference to an unique pointer of type `UnusedRuleRemovalConfig` that stores the - * configuration of the post-optimization method that removes unused rules from a model or a - * null pointer, if no such post-optimization method should be used - */ - virtual std::unique_ptr& getUnusedRuleRemovalConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the calibrator that allows to fit a model for the - * calibration of marginal probabilities. - * - * @return A reference to an unique pointer of type `IMarginalProbabilityCalibratorConfig` that stores - * the configuration of the calibrator that allows to fit a model for the calibration of - * marginal probabilities - */ - virtual std::unique_ptr& - getMarginalProbabilityCalibratorConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the calibrator that allows to fit a model for the - * calibration of joint probabilities. - * - * @return A reference to an unique pointer of type `IJointProbabilityCalibratorConfig` that stores the - * configuration of the calibrator that allows to fit a model for the calibration of joint - * probabilities - */ - virtual std::unique_ptr& - getJointProbabilityCalibratorConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the predictor that allows to predict binary labels. - * - * @return A reference to an unique pointer of type `IBinaryPredictorConfig` that stores the - * configuration of the predictor that allows to predict binary labels or a null pointer if the - * prediction of binary labels is not supported - */ - virtual std::unique_ptr& getBinaryPredictorConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the predictor that allows to predict regression - * scores. - * - * @return A reference to an unique pointer of type `IScorePredictorConfig` that stores the - * configuration of the predictor that allows to predict regression scores or a null pointer, if - * the prediction of regression scores is not supported - */ - virtual std::unique_ptr& getScorePredictorConfigPtr() = 0; - - /** - * Returns an unique pointer to the configuration of the predictor that allows to predict probability - * estimates. - * - * @return A reference to an unique pointer of type `IProbabilityPredictorConfig` that stores the - * configuration of the predictor that allows to predict probability estimates or a null - * pointer, if the prediction of probability estimates is not supported - */ - virtual std::unique_ptr& getProbabilityPredictorConfigPtr() = 0; - - public: - - virtual ~IConfig() {}; - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use an algorithm that - * sequentially induces several rules. - */ - class ISequentialRuleModelAssemblageMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~ISequentialRuleModelAssemblageMixin() override {}; - - /** - * Configures the rule learner to use an algorithm that sequentially induces several rules, optionally - * starting with a default rule, that are added to a rule-based model. - */ - virtual void useSequentialRuleModelAssemblage() { - std::unique_ptr& ruleModelAssemblageConfigPtr = - this->getRuleModelAssemblageConfigPtr(); - ruleModelAssemblageConfigPtr = - std::make_unique(this->getDefaultRuleConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to induce a default rule. - */ - class IDefaultRuleMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IDefaultRuleMixin() override {}; - - /** - * Configures the rule learner to induce a default rule. - */ - virtual void useDefaultRule() { - std::unique_ptr& defaultRuleConfigPtr = this->getDefaultRuleConfigPtr(); - defaultRuleConfigPtr = std::make_unique(true); - }; - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a greedy top-down search - * for the induction of individual rules. - */ - class IGreedyTopDownRuleInductionMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IGreedyTopDownRuleInductionMixin() override {}; - - /** - * Configures the rule learner to use a greedy top-down search for the induction of individual rules. - * - * @return A reference to an object of type `IGreedyTopDownRuleInductionConfig` that allows further - * configuration of the algorithm for the induction of individual rules - */ - virtual IGreedyTopDownRuleInductionConfig& useGreedyTopDownRuleInduction() { - std::unique_ptr& ruleInductionConfigPtr = this->getRuleInductionConfigPtr(); - std::unique_ptr ptr = - std::make_unique(this->getRuleCompareFunction(), - this->getParallelRuleRefinementConfigPtr()); - IGreedyTopDownRuleInductionConfig& ref = *ptr; - ruleInductionConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a top-down beam search. - */ - class IBeamSearchTopDownRuleInductionMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IBeamSearchTopDownRuleInductionMixin() override {}; - - /** - * Configures the rule learner to use a top-down beam search for the induction of individual rules. - * - * @return A reference to an object of type `IBeamSearchTopDownRuleInduction` that allows further - * configuration of the algorithm for the induction of individual rules - */ - virtual IBeamSearchTopDownRuleInductionConfig& useBeamSearchTopDownRuleInduction() { - std::unique_ptr& ruleInductionConfigPtr = this->getRuleInductionConfigPtr(); - std::unique_ptr ptr = - std::make_unique( - this->getRuleCompareFunction(), this->getParallelRuleRefinementConfigPtr()); - IBeamSearchTopDownRuleInductionConfig& ref = *ptr; - ruleInductionConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use any post processor. - */ - class INoPostProcessorMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoPostProcessorMixin() override {}; - - /** - * Configures the rule learner to not use any post processor. - */ - virtual void useNoPostProcessor() { - std::unique_ptr& postProcessorConfigPtr = this->getPostProcessorConfigPtr(); - postProcessorConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use any method for the - * assignment of numerical features values to bins. - */ - class INoFeatureBinningMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoFeatureBinningMixin() override {}; - - /** - * Configures the rule learner to not use any method for the assignment of numerical feature values to - * bins. - */ - virtual void useNoFeatureBinning() { - std::unique_ptr& featureBinningConfigPtr = - this->getFeatureBinningConfigPtr(); - featureBinningConfigPtr = - std::make_unique(this->getParallelStatisticUpdateConfigPtr()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use equal-width feature - * binning. - */ - class IEqualWidthFeatureBinningMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IEqualWidthFeatureBinningMixin() override {}; - - /** - * Configures the rule learner to use a method for the assignment of numerical feature values to bins, - * such that each bin contains values from equally sized value ranges. - * - * @return A reference to an object of type `IEqualWidthFeatureBinningConfig` that allows further - * configuration of the method for the assignment of numerical feature values to bins - */ - virtual IEqualWidthFeatureBinningConfig& useEqualWidthFeatureBinning() { - std::unique_ptr& featureBinningConfigPtr = - this->getFeatureBinningConfigPtr(); - std::unique_ptr ptr = - std::make_unique(this->getParallelStatisticUpdateConfigPtr()); - IEqualWidthFeatureBinningConfig& ref = *ptr; - featureBinningConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use equal-frequency feature - * binning. - */ - class IEqualFrequencyFeatureBinningMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IEqualFrequencyFeatureBinningMixin() override {}; - - /** - * Configures the rule learner to use a method for the assignment of numerical feature values to bins, - * such that each bin contains approximately the same number of values. - * - * @return A reference to an object of type `IEqualFrequencyFeatureBinningConfig` that allows further - * configuration of the method for the assignment of numerical feature values to bins - */ - virtual IEqualFrequencyFeatureBinningConfig& useEqualFrequencyFeatureBinning() { - std::unique_ptr& featureBinningConfigPtr = - this->getFeatureBinningConfigPtr(); - std::unique_ptr ptr = - std::make_unique(this->getParallelStatisticUpdateConfigPtr()); - IEqualFrequencyFeatureBinningConfig& ref = *ptr; - featureBinningConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use label sampling. - */ - class INoLabelSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoLabelSamplingMixin() override {}; - - /** - * Configures the rule learner to not sample from the available labels whenever a new rule should be - * learned. - */ - virtual void useNoLabelSampling() { - std::unique_ptr& labelSamplingConfigPtr = this->getLabelSamplingConfigPtr(); - labelSamplingConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use label sampling without - * replacement. - */ - class ILabelSamplingWithoutReplacementMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~ILabelSamplingWithoutReplacementMixin() override {}; - - /** - * Configures the rule learner to sample from the available labels with replacement whenever a new rule - * should be learned. - * - * @return A reference to an object of type `ILabelSamplingWithoutReplacementConfig` that allows further - * configuration of the method for sampling labels - */ - virtual ILabelSamplingWithoutReplacementConfig& useLabelSamplingWithoutReplacement() { - std::unique_ptr& labelSamplingConfigPtr = this->getLabelSamplingConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - ILabelSamplingWithoutReplacementConfig& ref = *ptr; - labelSamplingConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to sample single labels in a - * round-robin fashion. - */ - class IRoundRobinLabelSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IRoundRobinLabelSamplingMixin() override {}; - - /** - * Configures the rule learner to sample a single labels in a round-robin fashion whenever a new rule - * should be learned. - */ - virtual void useRoundRobinLabelSampling() { - std::unique_ptr& labelSamplingConfigPtr = this->getLabelSamplingConfigPtr(); - labelSamplingConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use instance sampling. - */ - class INoInstanceSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoInstanceSamplingMixin() override {}; - - /** - * Configures the rule learner to not sample from the available training examples whenever a new rule - * should be learned. - */ - virtual void useNoInstanceSampling() { - std::unique_ptr& instanceSamplingConfigPtr = - this->getInstanceSamplingConfigPtr(); - instanceSamplingConfigPtr = std::make_unique(); - }; - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use instance sampling with - * replacement. - */ - class IInstanceSamplingWithReplacementMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IInstanceSamplingWithReplacementMixin() override {}; - - /** - * Configures the rule learner to sample from the available training examples with replacement whenever - * a new rule should be learned. - * - * @return A reference to an object of type `IInstanceSamplingWithReplacementConfig` that allows further - * configuration of the method for sampling instances - */ - virtual IInstanceSamplingWithReplacementConfig& useInstanceSamplingWithReplacement() { - std::unique_ptr& instanceSamplingConfigPtr = - this->getInstanceSamplingConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - IInstanceSamplingWithReplacementConfig& ref = *ptr; - instanceSamplingConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use instance sampling without - * replacement. - */ - class IInstanceSamplingWithoutReplacementMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IInstanceSamplingWithoutReplacementMixin() override {}; - - /** - * Configures the rule learner to sample from the available training examples without replacement - * whenever a new rule should be learned. - * - * @return A reference to an object of type `IInstanceSamplingWithoutReplacementConfig` that allows - * further configuration of the method for sampling instances - */ - virtual IInstanceSamplingWithoutReplacementConfig& useInstanceSamplingWithoutReplacement() { - std::unique_ptr& instanceSamplingConfigPtr = - this->getInstanceSamplingConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - IInstanceSamplingWithoutReplacementConfig& ref = *ptr; - instanceSamplingConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use label-wise stratified - * instance sampling. - */ - class ILabelWiseStratifiedInstanceSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~ILabelWiseStratifiedInstanceSamplingMixin() override {}; - - /** - * Configures the rule learner to sample from the available training examples using stratification, such - * that for each label the proportion of relevant and irrelevant examples is maintained, whenever a new - * rule should be learned. - * - * @return A reference to an object of type `ILabelWiseStratifiedInstanceSamplingConfig` that allows - * further configuration of the method for sampling instances - */ - virtual ILabelWiseStratifiedInstanceSamplingConfig& useLabelWiseStratifiedInstanceSampling() { - std::unique_ptr& instanceSamplingConfigPtr = - this->getInstanceSamplingConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - ILabelWiseStratifiedInstanceSamplingConfig& ref = *ptr; - instanceSamplingConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use example-wise stratified - * instance sampling. - */ - class IExampleWiseStratifiedInstanceSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IExampleWiseStratifiedInstanceSamplingMixin() override {}; - - /** - * Configures the rule learner to sample from the available training examples using stratification, - * where distinct label vectors are treated as individual classes, whenever a new rule should be - * learned. - * - * @return A reference to an object of type `IExampleWiseStratifiedInstanceSamplingConfig` that allows - * further configuration of the method for sampling instances - */ - virtual IExampleWiseStratifiedInstanceSamplingConfig& useExampleWiseStratifiedInstanceSampling() { - std::unique_ptr& instanceSamplingConfigPtr = - this->getInstanceSamplingConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - IExampleWiseStratifiedInstanceSamplingConfig& ref = *ptr; - instanceSamplingConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use feature sampling. - */ - class INoFeatureSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoFeatureSamplingMixin() override {}; - - /** - * Configures the rule learner to not sample from the available features whenever a rule should be - * refined. - */ - virtual void useNoFeatureSampling() { - std::unique_ptr& featureSamplingConfigPtr = - this->getFeatureSamplingConfigPtr(); - featureSamplingConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use feature sampling without - * replacement. - */ - class IFeatureSamplingWithoutReplacementMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IFeatureSamplingWithoutReplacementMixin() override {}; - - /** - * Configures the rule learner to sample from the available features with replacement whenever a rule - * should be refined. - * - * @return A reference to an object of type `IFeatureSamplingWithoutReplacementConfig` that allows - * further configuration of the method for sampling features - */ - virtual IFeatureSamplingWithoutReplacementConfig& useFeatureSamplingWithoutReplacement() { - std::unique_ptr& featureSamplingConfigPtr = - this->getFeatureSamplingConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - IFeatureSamplingWithoutReplacementConfig& ref = *ptr; - featureSamplingConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not partition the available - * training examples into a training set and a holdout set. - */ - class INoPartitionSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoPartitionSamplingMixin() override {}; - - /** - * Configures the rule learner to not partition the available training examples into a training set and - * a holdout set. - */ - virtual void useNoPartitionSampling() { - std::unique_ptr& partitionSamplingConfigPtr = - this->getPartitionSamplingConfigPtr(); - partitionSamplingConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to partition the available - * training example into a training set and a holdout set by randomly splitting the training examples into two - * mutually exclusive sets. - */ - class IRandomBiPartitionSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IRandomBiPartitionSamplingMixin() override {}; - - /** - * Configures the rule learner to partition the available training examples into a training set and a - * holdout set by randomly splitting the training examples into two mutually exclusive sets. - * - * @return A reference to an object of type `IRandomBiPartitionSamplingConfig` that allows further - * configuration of the method for partitioning the available training examples into a training - * set and a holdout set - */ - virtual IRandomBiPartitionSamplingConfig& useRandomBiPartitionSampling() { - std::unique_ptr& partitionSamplingConfigPtr = - this->getPartitionSamplingConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - IRandomBiPartitionSamplingConfig& ref = *ptr; - partitionSamplingConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to partition the available - * training examples into a training set and a holdout set using stratification, such that for each label the - * proportion of relevant and irrelevant examples is maintained. - */ - class ILabelWiseStratifiedBiPartitionSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~ILabelWiseStratifiedBiPartitionSamplingMixin() override {}; - - /** - * Configures the rule learner to partition the available training examples into a training set and a - * holdout set using stratification, such that for each label the proportion of relevant and irrelevant - * examples is maintained. - * - * @return A reference to an object of type `ILabelWiseStratifiedBiPartitionSamplingConfig` that allows - * further configuration of the method for partitioning the available training examples into a - * training and a holdout set - */ - virtual ILabelWiseStratifiedBiPartitionSamplingConfig& useLabelWiseStratifiedBiPartitionSampling() { - std::unique_ptr& partitionSamplingConfigPtr = - this->getPartitionSamplingConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - ILabelWiseStratifiedBiPartitionSamplingConfig& ref = *ptr; - partitionSamplingConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to partition the available - * training examples into a training set and a holdout set using stratification, where distinct label vectors - * are treated as individual classes. - */ - class IExampleWiseStratifiedBiPartitionSamplingMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IExampleWiseStratifiedBiPartitionSamplingMixin() override {}; - - /** - * Configures the rule learner to partition the available training examples into a training set and a - * holdout set using stratification, where distinct label vectors are treated as individual classes - * - * @return A reference to an object of type `IExampleWiseStratifiedBiPartitionSamplingConfig` that - * allows further configuration of the method for partitioning the available training examples - * into a training and a holdout set - */ - virtual IExampleWiseStratifiedBiPartitionSamplingConfig& useExampleWiseStratifiedBiPartitionSampling() { - std::unique_ptr& partitionSamplingConfigPtr = - this->getPartitionSamplingConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - IExampleWiseStratifiedBiPartitionSamplingConfig& ref = *ptr; - partitionSamplingConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not prune individual rules. - */ - class INoRulePruningMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoRulePruningMixin() override {}; - - /** - * Configures the rule learner to not prune individual rules. - */ - virtual void useNoRulePruning() { - std::unique_ptr& rulePruningConfigPtr = this->getRulePruningConfigPtr(); - rulePruningConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to prune individual rules by - * following the principles of "incremental reduced error pruning" (IREP). - */ - class IIrepRulePruningMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IIrepRulePruningMixin() override {}; - - /** - * Configures the rule learner to prune individual rules by following the principles of "incremental - * reduced error pruning" (IREP). - */ - virtual void useIrepRulePruning() { - std::unique_ptr& rulePruningConfigPtr = this->getRulePruningConfigPtr(); - rulePruningConfigPtr = std::make_unique(this->getRuleCompareFunction()); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use any multi-threading - * for the parallel refinement of rules. - */ - class INoParallelRuleRefinementMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoParallelRuleRefinementMixin() override {} - - /** - * Configures the rule learner to not use any multi-threading for the parallel refinement of rules. - */ - virtual void useNoParallelRuleRefinement() { - std::unique_ptr& parallelRuleRefinementConfigPtr = - this->getParallelRuleRefinementConfigPtr(); - parallelRuleRefinementConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use multi-threading for the - * parallel refinement of rules. - */ - class IParallelRuleRefinementMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IParallelRuleRefinementMixin() override {}; - - /** - * Configures the rule learner to use multi-threading for the parallel refinement of rules. - * - * @return A reference to an object of type `IManualMultiThreadingConfig` that allows further - * configuration of the multi-threading behavior - */ - virtual IManualMultiThreadingConfig& useParallelRuleRefinement() { - std::unique_ptr& parallelRuleRefinementConfigPtr = - this->getParallelRuleRefinementConfigPtr(); - std::unique_ptr ptr = std::make_unique(); - IManualMultiThreadingConfig& ref = *ptr; - parallelRuleRefinementConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use any multi-threading - * for the parallel update of statistics. - */ - class INoParallelStatisticUpdateMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoParallelStatisticUpdateMixin() override {}; - - /** - * Configures the rule learner to not use any multi-threading for the parallel update of statistics. - */ - virtual void useNoParallelStatisticUpdate() { - std::unique_ptr& parallelStatisticUpdateConfigPtr = - this->getParallelStatisticUpdateConfigPtr(); - parallelStatisticUpdateConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use multi-threading for the - * parallel update of statistics. - */ - class IParallelStatisticUpdateMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IParallelStatisticUpdateMixin() override {}; - - /** - * Configures the rule learner to use multi-threading for the parallel update of statistics. - * - * @return A reference to an object of type `IManualMultiThreadingConfig` that allows further - * configuration of the multi-threading behavior - */ - virtual IManualMultiThreadingConfig& useParallelStatisticUpdate() { - std::unique_ptr& parallelStatisticUpdateConfigPtr = - this->getParallelStatisticUpdateConfigPtr(); - std::unique_ptr ptr = std::make_unique(); - IManualMultiThreadingConfig& ref = *ptr; - parallelStatisticUpdateConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use any multi-threading - * for prediction. - */ - class INoParallelPredictionMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoParallelPredictionMixin() override {}; - - /** - * Configures the rule learner to not use any multi-threading to predict for several query examples in - * parallel. - */ - virtual void useNoParallelPrediction() { - std::unique_ptr& parallelPredictionConfigPtr = - this->getParallelPredictionConfigPtr(); - parallelPredictionConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use multi-threading to predict - * for several examples in parallel. - */ - class IParallelPredictionMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IParallelPredictionMixin() override {}; - - /** - * Configures the rule learner to use multi-threading to predict for several query examples in parallel. - * - * @return A reference to an object of type `IManualMultiThreadingConfig` that allows further - * configuration of the multi-threading behavior - */ - virtual IManualMultiThreadingConfig& useParallelPrediction() { - std::unique_ptr& parallelPredictionConfigPtr = - this->getParallelPredictionConfigPtr(); - std::unique_ptr ptr = std::make_unique(); - IManualMultiThreadingConfig& ref = *ptr; - parallelPredictionConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use a stopping criterion - * that ensures that the number of induced rules does not exceed a certain maximum. - */ - class INoSizeStoppingCriterionMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoSizeStoppingCriterionMixin() override {}; - - /** - * Configures the rule learner to not use a stopping criterion that ensures that the number of induced - * rules does not exceed a certain maximum. - */ - virtual void useNoSizeStoppingCriterion() { - std::unique_ptr& sizeStoppingCriterionConfigPtr = - this->getSizeStoppingCriterionConfigPtr(); - sizeStoppingCriterionConfigPtr = nullptr; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a stopping criterion that - * ensures that the number of induced rules does not exceed a certain maximum. - */ - class ISizeStoppingCriterionMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~ISizeStoppingCriterionMixin() override {}; - - /** - * Configures the rule learner to use a stopping criterion that ensures that the number of induced rules - * does not exceed a certain maximum. - * - * @return A reference to an object of type `ISizeStoppingCriterionConfig` that allows further - * configuration of the stopping criterion - */ - virtual ISizeStoppingCriterionConfig& useSizeStoppingCriterion() { - std::unique_ptr& sizeStoppingCriterionConfigPtr = - this->getSizeStoppingCriterionConfigPtr(); - std::unique_ptr ptr = std::make_unique(); - ISizeStoppingCriterionConfig& ref = *ptr; - sizeStoppingCriterionConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use a stopping criterion - * that ensures that a certain time limit is not exceeded. - */ - class INoTimeStoppingCriterionMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoTimeStoppingCriterionMixin() override {}; - - /** - * Configures the rule learner to not use a stopping criterion that ensures that a certain time limit is - * not exceeded. - */ - virtual void useNoTimeStoppingCriterion() { - std::unique_ptr& timeStoppingCriterionConfigPtr = - this->getTimeStoppingCriterionConfigPtr(); - timeStoppingCriterionConfigPtr = nullptr; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a stopping criterion that - * ensures that a certain time limit is not exceeded. - */ - class ITimeStoppingCriterionMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~ITimeStoppingCriterionMixin() override {}; - - /** - * Configures the rule learner to use a stopping criterion that ensures that a certain time limit is not - * exceeded. - * - * @return A reference to an object of type `ITimeStoppingCriterionConfig` that allows further - * configuration of the stopping criterion - */ - virtual ITimeStoppingCriterionConfig& useTimeStoppingCriterion() { - std::unique_ptr& timeStoppingCriterionConfigPtr = - this->getTimeStoppingCriterionConfigPtr(); - std::unique_ptr ptr = std::make_unique(); - ITimeStoppingCriterionConfig& ref = *ptr; - timeStoppingCriterionConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a stopping criterion that - * stops the induction of rules as soon as the quality of a model's predictions for the examples in the training - * or holdout set do not improve according to a certain measure. - */ - class IPrePruningMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IPrePruningMixin() override {}; - - /** - * Configures the rule learner to use a stopping criterion that stops the induction of rules as soon as - * the quality of a model's predictions for the examples in the training or holdout set do not improve - * according to a certain measure. - * - * @return A reference to an object of the type `IPrePruningConfig` that allows further configuration of - * the stopping criterion - */ - virtual IPrePruningConfig& useGlobalPrePruning() { - std::unique_ptr& globalPruningConfigPtr = this->getGlobalPruningConfigPtr(); - std::unique_ptr ptr = std::make_unique(); - IPrePruningConfig& ref = *ptr; - globalPruningConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use global pruning. - */ - class INoGlobalPruningMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoGlobalPruningMixin() override {}; - - /** - * Configures the rule learner to not use global pruning. - */ - virtual void useNoGlobalPruning() { - std::unique_ptr& globalPruningConfigPtr = this->getGlobalPruningConfigPtr(); - globalPruningConfigPtr = nullptr; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a stopping criterion that - * keeps track of the number of rules in a model that perform best with respect to the examples in the training - * or holdout set according to a certain measure. - */ - class IPostPruningMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~IPostPruningMixin() override {}; - - /** - * Configures the rule learner to use a stopping criterion that keeps track of the number of rules in a - * model that perform best with respect to the examples in the training or holdout set according to a - * certain measure. - */ - virtual IPostPruningConfig& useGlobalPostPruning() { - std::unique_ptr& globalPruningConfigPtr = this->getGlobalPruningConfigPtr(); - std::unique_ptr ptr = std::make_unique(); - IPostPruningConfig& ref = *ptr; - globalPruningConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not use a post-optimization - * method that optimizes each rule in a model by relearning it in the context of the other rules. - */ - class INoSequentialPostOptimizationMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoSequentialPostOptimizationMixin() override {}; - - /** - * Configures the rule learner to not use a post-optimization method that optimizes each rule in a model - * by relearning it in the context of the other rules. - */ - virtual void useNoSequentialPostOptimization() { - std::unique_ptr& sequentialPostOptimizationConfigPtr = - this->getSequentialPostOptimizationConfigPtr(); - sequentialPostOptimizationConfigPtr = nullptr; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to use a post-optimization method - * that optimizes each rule in a model by relearning it in the context of the other rules. - */ - class ISequentialPostOptimizationMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~ISequentialPostOptimizationMixin() override {}; - - /** - * Configures the rule learner to use a post-optimization method that optimizes each rule in a model by - * relearning it in the context of the other rules. - * - * @return A reference to an object of type `ISequentialPostOptimizationConfig` that allows further - * configuration of the post-optimization method - */ - virtual ISequentialPostOptimizationConfig& useSequentialPostOptimization() { - std::unique_ptr& sequentialPostOptimizationConfigPtr = - this->getSequentialPostOptimizationConfigPtr(); - std::unique_ptr ptr = - std::make_unique(); - ISequentialPostOptimizationConfig& ref = *ptr; - sequentialPostOptimizationConfigPtr = std::move(ptr); - return ref; - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not calibrate marginal - * probabilities. - */ - class INoMarginalProbabilityCalibrationMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoMarginalProbabilityCalibrationMixin() override {}; - - /** - * Configures the rule learner to not calibrate marginal probabilities. - */ - virtual void useNoMarginalProbabilityCalibration() { - std::unique_ptr& marginalProbabilityCalibratorConfigPtr = - this->getMarginalProbabilityCalibratorConfigPtr(); - marginalProbabilityCalibratorConfigPtr = std::make_unique(); - } - }; - - /** - * Defines an interface for all classes that allow to configure a rule learner to not calibrate joint - * probabilities. - */ - class INoJointProbabilityCalibrationMixin : virtual public IRuleLearner::IConfig { - public: - - virtual ~INoJointProbabilityCalibrationMixin() override {}; - - /** - * Configures the rule learner to not calibrate joint probabilities. - */ - virtual void useNoJointProbabilityCalibration() { - std::unique_ptr& jointProbabilityCalibratorConfigPtr = - this->getJointProbabilityCalibratorConfigPtr(); - jointProbabilityCalibratorConfigPtr = std::make_unique(); - } - }; - - virtual ~IRuleLearner() {}; - - /** - * Applies the rule learner to given training examples and corresponding ground truth labels. - * - * @param featureInfo A reference to an object of type `IFeatureInfo` that provides information about the - * types of individual features - * @param featureMatrix A reference to an object of type `IColumnWiseFeatureMatrix` that provides - * column-wise access to the feature values of the training examples - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise access - * to the ground truth labels of the training examples - * @param randomState The seed to be used by random number generators - * @return An unique pointer to an object of type `ITrainingResult` that provides access to the - * results of fitting the rule learner to the training data - */ - virtual std::unique_ptr fit(const IFeatureInfo& featureInfo, - const IColumnWiseFeatureMatrix& featureMatrix, - const IRowWiseLabelMatrix& labelMatrix, - uint32 randomState) const = 0; - - /** - * Returns whether the rule learner is able to predict binary labels or not. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise - * access to the feature values of the query examples - * @param trainingResult A reference to an object of type `ITrainingResult` that provides access to the model - * and additional information that should be used to obtain predictions - * @return True, if the rule learner is able to predict binary labels, false otherwise - */ - virtual bool canPredictBinary(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const = 0; - - /** - * Returns whether the rule learner is able to predict binary labels or not. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise - * access to the feature values of the query examples - * @param numLabels The number of labels to predict for - * @return True, if the rule learner is able to predict binary labels, false otherwise - */ - virtual bool canPredictBinary(const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const = 0; - - /** - * Creates and returns a predictor that may be used to predict binary labels for given query examples. If the - * prediction of binary labels is not supported by the rule learner, a `std::runtime_error` is thrown. - * - * @throws std::runtime_exception The exception that is thrown if the prediction of binary labels is not - * supported by the rule learner - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides - * row-wise access to the feature values of the query examples - * @param trainingResult A reference to an object of type `ITrainingResult` that provides access to - * the model and additional information that should be used to obtain - * predictions - * @return An unique pointer to an object of type `IBinaryPredictor` that may be used - * to predict binary labels for the given query examples - */ - virtual std::unique_ptr createBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const ITrainingResult& trainingResult) const = 0; - - /** - * Creates and returns a predictor that may be used to predict binary labels for given query examples. If the - * prediction of binary labels is not supported by the rule learner, a `std::runtime_error` is thrown. - * - * @throws std::runtime_exception The exception that is thrown if the prediction of binary labels is - * not supported by the rule learner - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that - * provides row-wise access to the feature values of the query - * examples - * @param ruleModel A reference to an object of type `IRuleModel` that should be used - * to obtain predictions - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * obtaining predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IBinaryPredictor` that may - * be used to predict binary labels for the given query examples - */ - virtual std::unique_ptr createBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a predictor that may be used to predict sparse binary labels for given query examples. If - * the prediction of sparse binary labels is not supported by the rule learner, a `std::runtime_error` is - * thrown. - * - * @throws std::runtime_exception The exception that is thrown if the prediction of sparse binary labels is - * not supported by the rule learner - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides - * row-wise access to the feature values of the query examples - * @param trainingResult A reference to an object of type `ITrainingResult` that provides access to - * the model and additional information that should be used to obtain - * predictions - * @return An unique pointer to an object of type `ISparseBinaryPredictor` that may be - * used to predict sparse binary labels for the given query examples - */ - virtual std::unique_ptr createSparseBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const ITrainingResult& trainingResult) const = 0; - - /** - * Creates and returns a predictor that may be used to predict sparse binary labels for given query examples. If - * the prediction of sparse binary labels is not supported by the rule learner, a `std::runtime_error` is - * thrown. - * - * @throws std::runtime_exception The exception that is thrown if the prediction of sparse binary - * labels is not supported by the rule learner - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that - * provides row-wise access to the feature values of the query - * examples - * @param ruleModel A reference to an object of type `IRuleModel` that should be used - * to obtain predictions - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * obtaining predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictor` - * that may be used to predict sparse binary labels for the given - * query examples - */ - virtual std::unique_ptr createSparseBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Returns whether the rule learner is able to predict regression scores or not. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise - * access to the feature values of the query examples - * @param trainingResult A reference to an object of type `ITrainingResult` that provides access to the model - * and additional information that should be used to obtain predictions - * @return True, if the rule learner is able to predict regression scores, false otherwise - */ - virtual bool canPredictScores(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const = 0; - - /** - * Returns whether the rule learner is able to predict regression scores or not. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise - * access to the feature values of the query examples - * @param numLabels The number of labels to predict for - * @return True, if the rule learner is able to predict regression scores, false otherwise - */ - virtual bool canPredictScores(const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const = 0; - - /** - * Creates and returns a predictor that may be used to predict regression scores for given query examples. If - * the prediction of regression scores is not supported by the rule learner, a `std::runtime_error` is thrown. - * - * @throws std::runtime_exception The exception that is thrown if the prediction of regression scores is not - * supported by the rule learner - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides - * row-wise access to the feature values of the query examples - * @param trainingResult A reference to an object of type `ITrainingResult` that provides access to - * the model and additional information that should be used to obtain - * predictions - * @return An unique pointer to an object of type `IScorePredictor` that may be used to - * predict regression scores for the given query examples - */ - virtual std::unique_ptr createScorePredictor(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const = 0; - - /** - * Creates and returns a predictor that may be used to predict regression scores for given query examples. If - * the prediction of regression scores is not supported by the rule learner, a `std::runtime_error` is thrown. - * - * @throws std::runtime_exception The exception that is thrown if the prediction of regression scores is not - * supported by the rule learner - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides - * row-wise access to the feature values of the query examples - * @param ruleModel A reference to an object of type `IRuleModel` that should be used to obtain - * predictions - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides information - * about the label space that may be used as a basis for obtaining predictions - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IScorePredictor` that may be used to - * predict regression scores for the given query examples - */ - virtual std::unique_ptr createScorePredictor(const IRowWiseFeatureMatrix& featureMatrix, - const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const = 0; - - /** - * Returns whether the rule learner is able to predict probabilities or not. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise - * access to the feature values of the query examples - * @param trainingResult A reference to an object of type `ITrainingResult` that provides access to the model - * and additional information that should be used to obtain predictions - * @return True, if the rule learner is able to predict probabilities, false otherwise - */ - virtual bool canPredictProbabilities(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const = 0; - - /** - * Returns whether the rule learner is able to predict probabilities or not. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise - * access to the feature values of the query examples - * @param numLabels The number of labels to predict for - * @return True, if the rule learner is able to predict probabilities, false otherwise - */ - virtual bool canPredictProbabilities(const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const = 0; - - /** - * Creates and returns a predictor that may be used to predict probability estimates for given query examples. - * If the prediction of probability estimates is not supported by the rule learner, a `std::runtime_error` is - * thrown. - * - * @throws std::runtime_exception The exception that is thrown if the prediction of probability estimates is - * not supported by the rule learner - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides - * row-wise access to the feature values of the query examples - * @param trainingResult A reference to an object of type `ITrainingResult` that provides access to - * the model and additional information that should be used to obtain - * predictions - * @return An unique pointer to an object of type `IProbabilityPredictor` that may be - * used to predict probability estimates for the given query examples - */ - virtual std::unique_ptr createProbabilityPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const ITrainingResult& trainingResult) const = 0; - - /** - * Creates and returns a predictor that may be used to predict probability estimates for given query examples. - * If the prediction of probability estimates is not supported by the rule learner, a `std::runtime_error` is - * thrown. - * - * @throws std::runtime_exception The exception that is thrown if the prediction of probability - * estimates is not supported by the rule learner - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that - * provides row-wise access to the feature values of the query - * examples - * @param ruleModel A reference to an object of type `IRuleModel` that should be used - * to obtain predictions - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * obtaining predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IProbabilityPredictor` - * that may be used to predict probability estimates for the given - * query examples - */ - virtual std::unique_ptr createProbabilityPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; -}; - -/** - * An abstract base class for all rule learners. - */ -class AbstractRuleLearner : virtual public IRuleLearner { - public: - - /** - * Allows to configure a rule learner. - */ - class Config : virtual public IRuleLearner::IConfig { - private: - - const RuleCompareFunction ruleCompareFunction_; - - protected: - - /** - * An unique pointer that stores the configuration of the default rule that is included in a rule-based - * model. - */ - std::unique_ptr defaultRuleConfigPtr_; - - /** - * An unique pointer that stores the configuration of the method for the induction of several rules that - * are added to a rule-based model. - */ - std::unique_ptr ruleModelAssemblageConfigPtr_; - - /** - * An unique pointer that stores the configuration of the algorithm for the induction of individual - * rules. - */ - std::unique_ptr ruleInductionConfigPtr_; - - /** - * An unique pointer that stores the configuration of the method for the assignment of numerical feature - * values to bins - */ - std::unique_ptr featureBinningConfigPtr_; - - /** - * An unique pointer that stores the configuration of the method for sampling labels. - */ - std::unique_ptr labelSamplingConfigPtr_; - - /** - * An unique pointer that stores the configuration of the method for sampling instances. - */ - std::unique_ptr instanceSamplingConfigPtr_; - - /** - * An unique pointer that stores the configuration of the method for sampling features. - */ - std::unique_ptr featureSamplingConfigPtr_; - - /** - * An unique pointer that stores the configuration of the method for partitioning the available training - * examples into a training set and a holdout set. - */ - std::unique_ptr partitionSamplingConfigPtr_; - - /** - * An unique pointer that stores the configuration of the method for pruning individual rules. - */ - std::unique_ptr rulePruningConfigPtr_; - - /** - * An unique pointer that stores the configuration of the method for post-processing the predictions of - * rules once they have been learned. - */ - std::unique_ptr postProcessorConfigPtr_; - - /** - * An unique pointer that stores the configuration of the multi-threading behavior that is used for the - * parallel refinement of rules. - */ - std::unique_ptr parallelRuleRefinementConfigPtr_; - - /** - * An unique pointer that stores the configuration of the multi-threading behavior that is used for the - * parallel update of statistics. - */ - std::unique_ptr parallelStatisticUpdateConfigPtr_; - - /** - * An unique pointer that stores the configuration of the multi-threading behavior that is used to - * predict for several query examples in parallel. - */ - std::unique_ptr parallelPredictionConfigPtr_; - - /** - * An unique pointer that stores the configuration of the stopping criterion that ensures that the - * number of rules does not exceed a certain maximum. - */ - std::unique_ptr sizeStoppingCriterionConfigPtr_; - - /** - * An unique pointer that stores the configuration of the stopping criterion that ensures that a certain - * time limit is not exceeded. - */ - std::unique_ptr timeStoppingCriterionConfigPtr_; - - /** - * An unique pointer that stores the configuration of the stopping criterion that allows to decide how - * many rules should be included in a model, such that its performance is optimized globally. - */ - std::unique_ptr globalPruningConfigPtr_; - - /** - * An unique pointer that stores the configuration of the post-optimization method that optimizes each - * rule in a model by relearning it in the context of the other rules. - */ - std::unique_ptr sequentialPostOptimizationConfigPtr_; - - /** - * An unique pointer that stores the configuration of the post-optimization method that removes unused - * rules from a model. - */ - std::unique_ptr unusedRuleRemovalConfigPtr_; - - /** - * An unique pointer that stores the configuration of the calibrator that allows to fit a model for the - * calibration of marginal probabilities. - */ - std::unique_ptr marginalProbabilityCalibratorConfigPtr_; - - /** - * An unique pointer that stores the configuration of the calibrator that allows to fit a model for the - * calibration of joint probabilities. - */ - std::unique_ptr jointProbabilityCalibratorConfigPtr_; - - /** - * An unique pointer that stores the configuration of the predictor that allows to predict binary - * labels. - */ - std::unique_ptr binaryPredictorConfigPtr_; - - /** - * An unique pointer that stores the configuration of the predictor that allows to predict regression - * scores. - */ - std::unique_ptr scorePredictorConfigPtr_; - - /** - * An unique pointer that stores the configuration of the predictor that allows to predict probability - * estimates. - */ - std::unique_ptr probabilityPredictorConfigPtr_; - - private: - - RuleCompareFunction getRuleCompareFunction() const override final; - - std::unique_ptr& getDefaultRuleConfigPtr() override final; - - std::unique_ptr& getRuleModelAssemblageConfigPtr() override final; - - std::unique_ptr& getRuleInductionConfigPtr() override final; - - std::unique_ptr& getFeatureBinningConfigPtr() override final; - - std::unique_ptr& getLabelSamplingConfigPtr() override final; - - std::unique_ptr& getInstanceSamplingConfigPtr() override final; - - std::unique_ptr& getFeatureSamplingConfigPtr() override final; - - std::unique_ptr& getPartitionSamplingConfigPtr() override final; - - std::unique_ptr& getRulePruningConfigPtr() override final; - - std::unique_ptr& getPostProcessorConfigPtr() override final; - - std::unique_ptr& getParallelRuleRefinementConfigPtr() override final; - - std::unique_ptr& getParallelStatisticUpdateConfigPtr() override final; - - std::unique_ptr& getParallelPredictionConfigPtr() override final; - - std::unique_ptr& getSizeStoppingCriterionConfigPtr() override final; - - std::unique_ptr& getTimeStoppingCriterionConfigPtr() override final; - - std::unique_ptr& getGlobalPruningConfigPtr() override final; - - std::unique_ptr& getSequentialPostOptimizationConfigPtr() - override final; - - std::unique_ptr& getUnusedRuleRemovalConfigPtr() override final; - - std::unique_ptr& getMarginalProbabilityCalibratorConfigPtr() - override final; - - std::unique_ptr& getJointProbabilityCalibratorConfigPtr() - override final; - - std::unique_ptr& getBinaryPredictorConfigPtr() override final; - - std::unique_ptr& getScorePredictorConfigPtr() override final; - - std::unique_ptr& getProbabilityPredictorConfigPtr() override final; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that - * should be used for comparing the quality of different rules - */ - Config(RuleCompareFunction ruleCompareFunction); - }; - - private: - - IRuleLearner::IConfig& config_; - - std::unique_ptr createRuleModelAssemblageFactory( - const IRowWiseLabelMatrix& labelMatrix) const; - - std::unique_ptr createThresholdsFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const; - - std::unique_ptr createRuleInductionFactory(const IFeatureMatrix& featureMatrix, - const ILabelMatrix& labelMatrix) const; - - std::unique_ptr createLabelSamplingFactory(const ILabelMatrix& labelMatrix) const; - - std::unique_ptr createInstanceSamplingFactory() const; - - std::unique_ptr createFeatureSamplingFactory( - const IFeatureMatrix& featureMatrix) const; - - std::unique_ptr createPartitionSamplingFactory() const; - - std::unique_ptr createRulePruningFactory() const; - - std::unique_ptr createPostProcessorFactory() const; - - std::unique_ptr createSizeStoppingCriterionFactory() const; - - std::unique_ptr createTimeStoppingCriterionFactory() const; - - std::unique_ptr createGlobalPruningFactory() const; - - std::unique_ptr createSequentialPostOptimizationFactory() const; - - std::unique_ptr createUnusedRuleRemovalFactory() const; - - std::unique_ptr createMarginalProbabilityCalibratorFactory() const; - - std::unique_ptr createJointProbabilityCalibratorFactory() const; - - protected: - - /** - * May be overridden by subclasses in order create objects of the type `IStoppingCriterionFactory` to be used by - * the rule learner. - * - * @param factory A reference to an object of type `StoppingCriterionListFactory` the objects may be added to - */ - virtual void createStoppingCriterionFactories(StoppingCriterionListFactory& factory) const; - - /** - * May be overridden by subclasses in order to create objects of the type `IPostOptimizationPhaseFactory` to be - * used by the rule learner. - * - * @param factory A reference to an object of type `PostOptimizationPhaseListFactory` the objects may be added - * to - */ - virtual void createPostOptimizationPhaseFactories(PostOptimizationPhaseListFactory& factory) const; - - /** - * Must be implemented by subclasses in order to create the `IStatisticsProviderFactory` to be used by the rule - * learner. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the feature - * values of the training examples - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise access to - * the labels of the training examples - * @return An unique pointer to an object of type `IStatisticsProviderFactory` that has been - * created - */ - virtual std::unique_ptr createStatisticsProviderFactory( - const IFeatureMatrix& featureMatrix, const IRowWiseLabelMatrix& labelMatrix) const = 0; - - /** - * Must be implemented by subclasses in order to create the `IModelBuilderFactory` to be used by the rule - * learner. - * - * @return An unique pointer to an object of type `IModelBuilderFactory` that has been created - */ - virtual std::unique_ptr createModelBuilderFactory() const = 0; - - /** - * May be overridden by subclasses in order to create the `ILabelSpaceInfo` to be used by the rule learner as a - * basis for for making predictions. - * - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise access to - * the labels of the training examples - * @return An unique pointer to an object of type `ILabelSpaceInfo` that has been created - */ - virtual std::unique_ptr createLabelSpaceInfo(const IRowWiseLabelMatrix& labelMatrix) const; - - /** - * May be overridden by subclasses in order to create the `IBinaryPredictorFactory` to be used by the rule - * learner for predicting binary labels. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise access - * to the feature values of the query examples - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IBinaryPredictorFactory` that has been created - * or a null pointer, if the rule learner does not support to predict binary labels - */ - virtual std::unique_ptr createBinaryPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const; - - /** - * May be overridden by subclasses in order to create the `ISparseBinaryPredictorFactory` to be used by the rule - * learner for predicting sparse binary labels. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise access - * to the feature values of the query examples - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictorFactory` that has been - * created or a null pointer, if the rule learner does not support to predict sparse binary - * labels - */ - virtual std::unique_ptr createSparseBinaryPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const; - - /** - * May be overridden by subclasses in order to create the `IScorePredictorFactory` to be used by the rule - * learner for predicting regression scores. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise access - * to the feature values of the query examples - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IScorePredictorFactory` that has been created or - * a null pointer, if the rule learner does not support to predict regression scores - */ - virtual std::unique_ptr createScorePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const; - - /** - * May be overridden by subclasses in order to create the `IProbabilityPredictorFactory` to be used by the rule - * learner for predicting probability estimates. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise access - * to the feature values of the query examples - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IProbabilityPredictorFactory` that has been - * created or a null pointer, if the rule learner does not support to predict probability - * estimates - */ - virtual std::unique_ptr createProbabilityPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const; - - public: - - /** - * @param config A reference to an object of type `IRuleLearner::IConfig` that specifies the configuration that - * should be used by the rule learner - */ - AbstractRuleLearner(IRuleLearner::IConfig& config); - - std::unique_ptr fit(const IFeatureInfo& featureInfo, - const IColumnWiseFeatureMatrix& featureMatrix, - const IRowWiseLabelMatrix& labelMatrix, uint32 randomState) const override; - - bool canPredictBinary(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const override; - - bool canPredictBinary(const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - std::unique_ptr createBinaryPredictor(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const override; - - std::unique_ptr createBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createSparseBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const ITrainingResult& trainingResult) const override; - - std::unique_ptr createSparseBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - bool canPredictScores(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const override; - - bool canPredictScores(const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - std::unique_ptr createScorePredictor(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const override; - - std::unique_ptr createScorePredictor(const IRowWiseFeatureMatrix& featureMatrix, - const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const override; - - bool canPredictProbabilities(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const override; - - bool canPredictProbabilities(const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const override; - - std::unique_ptr createProbabilityPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const ITrainingResult& trainingResult) const override; - - std::unique_ptr createProbabilityPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; -}; diff --git a/cpp/subprojects/common/include/common/macros.hpp b/cpp/subprojects/common/include/common/macros.hpp deleted file mode 100644 index 6e88e7ee..00000000 --- a/cpp/subprojects/common/include/common/macros.hpp +++ /dev/null @@ -1,14 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#ifdef _WIN32 - #ifdef MLRLCOMMON_EXPORTS - #define MLRLCOMMON_API __declspec(dllexport) - #else - #define MLRLCOMMON_API __declspec(dllimport) - #endif -#else - #define MLRLCOMMON_API -#endif diff --git a/cpp/subprojects/common/include/common/math/math.hpp b/cpp/subprojects/common/include/common/math/math.hpp deleted file mode 100644 index 9b3e4d6a..00000000 --- a/cpp/subprojects/common/include/common/math/math.hpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -/** - * Returns the result of the floating point division `numerator / denominator` or 0, if a division by zero occurs. - * - * @tparam T The type of the operands - * @param numerator The numerator - * @param denominator The denominator - * @return The result of the division or 0, if a division by zero occurred - */ -template -static inline constexpr T divideOrZero(T numerator, T denominator) { - T result = numerator / denominator; - return std::isfinite(result) ? result : 0; -} - -/** - * Calculates the arithmetic mean of two values `small` and `large`, where `small < large`. - * - * The mean is calculated as `small + ((large - small) * 0.5`, instead of `(small + large) / 2`, to prevent overflows. - * - * @param small The smaller of both values - * @param large The larger of both values - * @return The mean that has been calculated - */ -template -static inline constexpr T arithmeticMean(T small, T large) { - return small + ((large - small) * 0.5); -} - -/** - * Allows to compute the arithmetic mean of several floating point values `x_1, ..., x_n` in an iterative manner, which - * prevents overflows. - * - * This function must be invoked for each value as follows: - * `mean_1 = iterativeArithmeticMean(1, x_1, 0); ...; mean_n = iterativeArithmeticMean(n, x_n, mean_n-1)` - * - * @tparam T The type of the values - * @param n The index of the value, starting at 1 - * @param x The n-th value - * @param mean The arithmetic mean of all previously provided values - * @return The arithmetic mean of all values provided so far - */ -template -static inline constexpr T iterativeArithmeticMean(uint32 n, T x, T mean) { - return mean + ((x - mean) / (T) n); -} - -/** - * Calculates and returns the fraction of a given integer value `fraction * n`, such that a certain upper and lower - * bound is respected. - * - * @param n The value - * @param fraction The fraction. Must be in (0, 1) - * @param minimum The minimum - * @param maximum The maximum or a value < `minimum`, if no upper bound should be enforced - */ -static inline uint32 calculateBoundedFraction(uint32 n, float32 fraction, uint32 minimum, uint32 maximum) { - // Calculate the fraction... - uint32 result = (uint32) std::ceil(fraction * n); - - // Prevent the minimum to exceed the original value... - uint32 min = minimum > n ? n : minimum; - - // Ensure that the result is not smaller than the given minimum... - if (result < min) { - return min; - } - - // If `max >= min`, ensure that the result does not exceed the given maximum... - if (maximum >= minimum && result > maximum) { - return maximum; - } - - return result; -} diff --git a/cpp/subprojects/common/include/common/measures/measure_distance.hpp b/cpp/subprojects/common/include/common/measures/measure_distance.hpp deleted file mode 100644 index aabd22b0..00000000 --- a/cpp/subprojects/common/include/common/measures/measure_distance.hpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_vector.hpp" -#include "common/prediction/label_vector_set.hpp" -#include "common/prediction/probability_calibration_joint.hpp" - -#include - -/** - * Defines an interface for all measures that may be used to compare predictions for individual examples to the - * corresponding ground truth labels in order to obtain a distance. - */ -class IDistanceMeasure { - public: - - virtual ~IDistanceMeasure() {}; - - /** - * Calculates and returns the distance between the predicted scores for a single example and a label vector. - * - * @param labelVectorIndex The index of the label vector, the scores should be compared to - * @param labelVector A reference to an object of type `LabelVector`, the scores should be compared to - * @param scoresBegin A `VectorConstView::const_iterator` to the beginning of the predicted scores - * @param scoresEnd A `VectorConstView::const_iterator` to the end of the predicted scores - * @return The distance that has been calculated - */ - virtual float64 measureDistance(uint32 labelVectorIndex, const LabelVector& labelVector, - VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd) const = 0; - - /** - * Searches among the label vectors contained in a `LabelVectorSet` and returns the one that is closest to the - * scores that are predicted for an example. - * - * @param labelVectorSet A reference to an object of type `LabelVectorSet` that contains the label - * vectors - * @param scoresBegin A `VectorConstView::const_iterator` to the beginning of the predicted scores - * @param scoresEnd A `VectorConstView::const_iterator` to the end of the predicted scores - * @return A reference to an object of type `LabelVector` that has been found - */ - virtual const LabelVector& getClosestLabelVector(const LabelVectorSet& labelVectorSet, - VectorConstView::const_iterator scoresBegin, - VectorConstView::const_iterator scoresEnd) const { - LabelVectorSet::const_iterator labelVectorIterator = labelVectorSet.cbegin(); - LabelVectorSet::frequency_const_iterator frequencyIterator = labelVectorSet.frequencies_cbegin(); - uint32 numLabelVectors = labelVectorSet.getNumLabelVectors(); - const LabelVector* closestLabelVector = labelVectorIterator[0].get(); - uint32 maxFrequency = frequencyIterator[0]; - float64 minDistance = this->measureDistance(0, *closestLabelVector, scoresBegin, scoresEnd); - - for (uint32 i = 1; i < numLabelVectors; i++) { - const LabelVector& labelVector = *labelVectorIterator[i]; - uint32 frequency = frequencyIterator[i]; - float64 distance = this->measureDistance(i, labelVector, scoresBegin, scoresEnd); - - if (distance < minDistance || (distance == minDistance && frequency > maxFrequency)) { - closestLabelVector = &labelVector; - maxFrequency = frequency; - minDistance = distance; - } - } - - return *closestLabelVector; - } -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IDistanceMeasure`. - */ -class IDistanceMeasureFactory { - public: - - virtual ~IDistanceMeasureFactory() {}; - - /** - * Creates and returns a new object of type `IDistanceMeasure`. - * - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that should be used for - * the calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that should be used for the - * calibration of joint probabilities - * @return An unique pointer to an object of type `IDistanceMeasure` that - * has been created - */ - virtual std::unique_ptr createDistanceMeasure( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/measures/measure_evaluation.hpp b/cpp/subprojects/common/include/common/measures/measure_evaluation.hpp deleted file mode 100644 index d148db01..00000000 --- a/cpp/subprojects/common/include/common/measures/measure_evaluation.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_c_contiguous.hpp" -#include "common/data/view_csr_binary.hpp" - -#include - -/** - * Defines an interface for all measures that may be used to assess the quality of predictions for certain examples by - * comparing them to the corresponding ground truth labels. - */ -class IEvaluationMeasure { - public: - - virtual ~IEvaluationMeasure() {}; - - /** - * Calculates and returns a numerical score that assesses the quality of predictions for the example at a - * specific index by comparing them to the corresponding ground truth labels, based on a label matrix that - * provides random access to the labels of the training examples. - * - * @param exampleIndex The index of the example for which the predictions should be evaluated - * @param labelMatrix A reference to an object of type `CContiguousConstView` that provides random access to - * the labels of the training examples - * @param scoreMatrix A reference to an object of type `CContiguousConstView` that stores the currently - * predicted scores - * @return The numerical score that has been calculated - */ - virtual float64 evaluate(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of predictions for the example at a - * specific index by comparing them to the corresponding ground truth labels, based on a label matrix that - * provides row-wise access to the labels of the training examples. - * - * @param exampleIndex The index of the example for which the predictions should be evaluated - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` that provides row-wise access to - * the labels of the training examples - * @param scoreMatrix A reference to an object of type `CContiguousConstView` that stores the currently - * predicted scores - * @return The numerical score that has been calculated - */ - virtual float64 evaluate(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const CContiguousConstView& scoreMatrix) const = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IEvaluationMeasure`. - */ -class IEvaluationMeasureFactory { - public: - - virtual ~IEvaluationMeasureFactory() {}; - - /** - * Creates and returns a new object of type `IEvaluationMeasure`. - * - * @return An unique pointer to an object of type `IEvaluationMeasure` that has been created - */ - virtual std::unique_ptr createEvaluationMeasure() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/measures/measure_evaluation_sparse.hpp b/cpp/subprojects/common/include/common/measures/measure_evaluation_sparse.hpp deleted file mode 100644 index a44d0ff1..00000000 --- a/cpp/subprojects/common/include/common/measures/measure_evaluation_sparse.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/matrix_sparse_set.hpp" -#include "common/data/view_c_contiguous.hpp" -#include "common/data/view_csr_binary.hpp" - -#include - -/** - * Defines an interface for all measures that may be used to assess the quality of predictions for certain examples, - * which are stored using sparse data structures, by comparing them to the corresponding ground truth labels. - */ -class ISparseEvaluationMeasure { - public: - - virtual ~ISparseEvaluationMeasure() {}; - - /** - * Calculates and returns a numerical score that assesses the quality of predictions for the example at a - * specific index by comparing them to the corresponding ground truth labels, based on a label matrix that - * provides random access to the labels of the training examples. - * - * @param exampleIndex The index of the example for which the predictions should be evaluated - * @param labelMatrix A reference to an object of type `CContiguousConstView` that provides random access to - * the labels of the training examples - * @param scoreMatrix A reference to an object of type `SparseSetMatrix` that stores the currently predicted - * scores - * @return The numerical score that has been calculated - */ - virtual float64 evaluate(uint32 exampleIndex, const CContiguousConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix) const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of predictions for the example at a - * specific index by comparing them to the corresponding ground truth labels, based on a label matrix that - * provides row-wise access to the labels of the training examples. - * - * @param exampleIndex The index of the example for which the predictions should be evaluated - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` that provides row-wise access to - * the labels of the training examples - * @param scoreMatrix A reference to an object of type `SparseSetMatrix` that stores the currently predicted - * scores - * @return The numerical score that has been calculated - */ - virtual float64 evaluate(uint32 exampleIndex, const BinaryCsrConstView& labelMatrix, - const SparseSetMatrix& scoreMatrix) const = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `ISparseEvaluationMeasure`. - */ -class ISparseEvaluationMeasureFactory { - public: - - virtual ~ISparseEvaluationMeasureFactory() {}; - - /** - * Creates and returns a new object of type `ISparseEvaluationMeasure`. - * - * @return An unique pointer to an object of type `ISparseEvaluationMeasure` that has been created - */ - virtual std::unique_ptr createSparseEvaluationMeasure() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/model/body.hpp b/cpp/subprojects/common/include/common/model/body.hpp deleted file mode 100644 index c48bdd86..00000000 --- a/cpp/subprojects/common/include/common/model/body.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_csr.hpp" -#include "common/data/view_vector.hpp" -#include "common/macros.hpp" - -#include - -// Forward declarations -class EmptyBody; -class ConjunctiveBody; - -/** - * Defines an interface for all classes that represent the body of a rule. - */ -class MLRLCOMMON_API IBody { - public: - - virtual ~IBody() {}; - - /** - * A visitor function for handling objects of the type `EmptyBody`. - */ - typedef std::function EmptyBodyVisitor; - - /** - * A visitor function for handling objects of the type `ConjunctiveBody`. - */ - typedef std::function ConjunctiveBodyVisitor; - - /** - * Returns whether an individual example, which is stored in a C-contiguous matrix, is covered by the body or - * not. - * - * @param begin A `VectorConstView::const_iterator` to the beginning of the example's feature values - * @param end A `VectorConstView::const_iterator` to the end of the example's feature values - * @return True, if the example is covered, false otherwise - */ - virtual bool covers(VectorConstView::const_iterator begin, - VectorConstView::const_iterator end) const = 0; - - /** - * Returns whether an individual example, which is stored in a CSR sparse matrix, is covered by the body or not. - * - * @param indicesBegin An iterator to the beginning of the example's feature values - * @param indicesEnd An iterator to the end of the example's feature values - * @param valuesBegin An iterator to the beginning of the example's feature_indices - * @param valuesEnd An iterator to the end of the example's feature indices - * @param tmpArray1 An array of type `float32`, shape `(num_features)` that is used to temporarily store - * non-zero feature values. May contain arbitrary values - * @param tmpArray2 An array of type `uint32`, shape `(num_features)` that is used to temporarily keep track - * of the feature indices with non-zero feature values. Must not contain any elements with - * value `n` - * @param n An arbitrary number. If this function is called multiple times for different examples, - * but using the same `tmpArray2`, the number must be unique for each of the function - * invocations - * @return True, if the example is covered, false otherwise - */ - virtual bool covers(CsrConstView::index_const_iterator indicesBegin, - CsrConstView::index_const_iterator indicesEnd, - CsrConstView::value_const_iterator valuesBegin, - CsrConstView::value_const_iterator valuesEnd, float32* tmpArray1, - uint32* tmpArray2, uint32 n) const = 0; - - /** - * Invokes one of the given visitor functions, depending on which one is able to handle this particular type of - * body. - * - * @param emptyBodyVisitor The visitor function for handling objects of the type `EmptyBody` - * @param conjunctiveBodyVisitor The visitor function for handling objects of the type `ConjunctiveBody` - */ - virtual void visit(EmptyBodyVisitor emptyBodyVisitor, ConjunctiveBodyVisitor conjunctiveBodyVisitor) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/model/body_conjunctive.hpp b/cpp/subprojects/common/include/common/model/body_conjunctive.hpp deleted file mode 100644 index 4dd11d05..00000000 --- a/cpp/subprojects/common/include/common/model/body_conjunctive.hpp +++ /dev/null @@ -1,374 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/body.hpp" - -/** - * A body that consists of a conjunction of conditions using the operators <= or > for numerical conditions, and = or != - * for nominal conditions, respectively. - */ -class MLRLCOMMON_API ConjunctiveBody final : public IBody { - private: - - const uint32 numLeq_; - - uint32* leqFeatureIndices_; - - float32* leqThresholds_; - - const uint32 numGr_; - - uint32* grFeatureIndices_; - - float32* grThresholds_; - - const uint32 numEq_; - - uint32* eqFeatureIndices_; - - float32* eqThresholds_; - - const uint32 numNeq_; - - uint32* neqFeatureIndices_; - - float32* neqThresholds_; - - public: - - /** - * @param numLeq The number of conditions that use the <= operator - * @param numGr The number of conditions that use the > operator - * @param numEq The number of conditions that use the == operator - * @param numNeq The number of conditions that use the != operator - */ - ConjunctiveBody(uint32 numLeq, uint32 numGr, uint32 numEq, uint32 numNeq); - - ~ConjunctiveBody() override; - - /** - * An iterator that provides access to the thresholds that are used by the conditions in the body and allows to - * modify them. - */ - typedef float32* threshold_iterator; - - /** - * An iterator that provides read-only access to the thresholds that are used by the conditions in the body. - */ - typedef const float32* threshold_const_iterator; - - /** - * An iterator that provides access to the feature indices that correspond to the conditions in the body and - * allows to modify them. - */ - typedef uint32* index_iterator; - - /** - * An iterator that provides read-only access to the feature indices that correspond to the conditions in the - * body. - */ - typedef const uint32* index_const_iterator; - - /** - * Returns the number of conditions that use the <= operator. - * - * @return The number of conditions - */ - uint32 getNumLeq() const; - - /** - * Returns a `threshold_iterator` to the beginning of the thresholds that correspond to conditions that use the - * <= operator. - * - * @return A `threshold_iterator` to the beginning - */ - threshold_iterator leq_thresholds_begin(); - - /** - * Returns a `threshold_iterator` to the end of the thresholds that correspond to conditions that use the <= - * operator. - * - * @return A `threshold_iterator` to the end - */ - threshold_iterator leq_thresholds_end(); - - /** - * Returns a `threshold_const_iterator` to the beginning of the thresholds that correspond to conditions that - * use the <= operator. - * - * @return A `threshold_const_iterator` to the beginning - */ - threshold_const_iterator leq_thresholds_cbegin() const; - - /** - * Returns a `threshold_const_iterator` to the end of the thresholds that correspond to conditions that use the - * <= operator. - * - * @return A `threshold_const_iterator` to the end - */ - threshold_const_iterator leq_thresholds_cend() const; - - /** - * Returns an `index_iterator` to the beginning of the feature indices that correspond to conditions that use - * the <= operator. - * - * @return An `index_iterator` to the beginning - */ - index_iterator leq_indices_begin(); - - /** - * Returns an `index_iterator` to the end of the feature indices that correspond to conditions that use the <= - * operator. - * - * @return An `index_iterator` to the end - */ - index_iterator leq_indices_end(); - - /** - * Returns an `index_const_iterator` to the beginning of the feature indices that correspond to conditions that - * use the <= operator. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator leq_indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the feature indices that correspond to conditions that use - * the <= operator. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator leq_indices_cend() const; - - /** - * Returns the number of conditions that use the > operator. - * - * @return The number of conditions - */ - uint32 getNumGr() const; - - /** - * Returns a `threshold_iterator` to the beginning of the thresholds that correspond to conditions that use the - * > operator. - * - * @return A `threshold_iterator` to the beginning - */ - threshold_iterator gr_thresholds_begin(); - - /** - * Returns a `threshold_iterator` to the end of the thresholds that correspond to conditions that use the > - * operator. - * - * @return A `threshold_iterator` to the end - */ - threshold_iterator gr_thresholds_end(); - - /** - * Returns a `threshold_const_iterator` to the beginning of the thresholds that correspond to conditions that - * use the > operator. - * - * @return A `threshold_const_iterator` to the beginning - */ - threshold_const_iterator gr_thresholds_cbegin() const; - - /** - * Returns a `threshold_const_iterator` to the end of the thresholds that correspond to conditions that use the - * > operator. - * - * @return A `threshold_const_iterator` to the end - */ - threshold_const_iterator gr_thresholds_cend() const; - - /** - * Returns an `index_iterator` to the beginning of the feature indices that correspond to conditions that use - * the > operator. - * - * @return An `index_iterator` to the beginning - */ - index_iterator gr_indices_begin(); - - /** - * Returns an `index_iterator` to the end of the feature indices that correspond to conditions that use the > - * operator. - * - * @return An `index_iterator` to the end - */ - index_iterator gr_indices_end(); - - /** - * Returns an `index_const_iterator` to the beginning of the feature indices that correspond to conditions that - * use the > operator. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator gr_indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the feature indices that correspond to conditions that use - * the > operator. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator gr_indices_cend() const; - - /** - * Returns the number of conditions that use the == operator. - * - * @return The number of conditions - */ - uint32 getNumEq() const; - - /** - * Returns a `threshold_iterator` to the beginning of the thresholds that correspond to conditions that use the - * == operator. - * - * @return A `threshold_iterator` to the beginning - */ - threshold_iterator eq_thresholds_begin(); - - /** - * Returns a `threshold_iterator` to the end of the thresholds that correspond to conditions that use the == - * operator. - * - * @return A `threshold_iterator` to the end - */ - threshold_iterator eq_thresholds_end(); - - /** - * Returns a `threshold_const_iterator` to the beginning of the thresholds that correspond to conditions that - * use the == operator. - * - * @return A `threshold_const_iterator` to the beginning - */ - threshold_const_iterator eq_thresholds_cbegin() const; - - /** - * Returns a `threshold_const_iterator` to the end of the thresholds that correspond to conditions that use the - * == operator. - * - * @return A `threshold_const_iterator` to the end - */ - threshold_const_iterator eq_thresholds_cend() const; - - /** - * Returns an `index_iterator` to the beginning of the feature indices that correspond to conditions that use - * the == operator. - * - * @return An `index_iterator` to the beginning - */ - index_iterator eq_indices_begin(); - - /** - * Returns an `index_iterator` to the end of the feature indices that correspond to conditions that use the == - * operator. - * - * @return An `index_iterator` to the end - */ - index_iterator eq_indices_end(); - - /** - * Returns an `index_const_iterator` to the beginning of the feature indices that correspond to conditions that - * use the == operator. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator eq_indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the feature indices that correspond to conditions that use - * the == operator. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator eq_indices_cend() const; - - /** - * Returns the number of conditions that use the != operator. - * - * @return The number of conditions - */ - uint32 getNumNeq() const; - - /** - * Returns a `threshold_iterator` to the beginning of the thresholds that correspond to conditions that use the - * != operator. - * - * @return A `threshold_iterator` to the beginning - */ - threshold_iterator neq_thresholds_begin(); - - /** - * Returns a `threshold_iterator` to the end of the thresholds that correspond to conditions that use the != - * operator. - * - * @return A `threshold_iterator` to the end - */ - threshold_iterator neq_thresholds_end(); - - /** - * Returns a `threshold_const_iterator` to the beginning of the thresholds that correspond to conditions that - * use the != operator. - * - * @return A `threshold_const_iterator` to the beginning - */ - threshold_const_iterator neq_thresholds_cbegin() const; - - /** - * Returns a `threshold_const_iterator` to the end of the thresholds that correspond to conditions that use the - * != operator. - * - * @return A `threshold_const_iterator` to the end - */ - threshold_const_iterator neq_thresholds_cend() const; - - /** - * Returns an `index_iterator` to the beginning of the feature indices that correspond to conditions that use - * the != operator. - * - * @return An `index_iterator` to the beginning - */ - index_iterator neq_indices_begin(); - - /** - * Returns an `index_iterator` to the end of the feature indices that correspond to conditions that use the != - * operator. - * - * @return An `index_iterator` to the end - */ - index_iterator neq_indices_end(); - - /** - * Returns an `index_const_iterator` to the beginning of the feature indices that correspond to conditions that - * use the != operator. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator neq_indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the feature indices that correspond to conditions that use - * the != operator. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator neq_indices_cend() const; - - /** - * @see `IBody::covers` - */ - bool covers(VectorConstView::const_iterator begin, - VectorConstView::const_iterator end) const override; - - /** - * @see `IBody::covers` - */ - bool covers(CsrConstView::index_const_iterator indicesBegin, - CsrConstView::index_const_iterator indicesEnd, - CsrConstView::value_const_iterator valuesBegin, - CsrConstView::value_const_iterator valuesEnd, float32* tmpArray1, uint32* tmpArray2, - uint32 n) const override; - - void visit(EmptyBodyVisitor emptyBodyVisitor, ConjunctiveBodyVisitor conjunctiveBodyVisitor) const override; -}; diff --git a/cpp/subprojects/common/include/common/model/body_empty.hpp b/cpp/subprojects/common/include/common/model/body_empty.hpp deleted file mode 100644 index 0692ce01..00000000 --- a/cpp/subprojects/common/include/common/model/body_empty.hpp +++ /dev/null @@ -1,24 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/body.hpp" - -/** - * An empty body that does not contain any conditions and therefore covers any examples. - */ -class MLRLCOMMON_API EmptyBody final : public IBody { - public: - - bool covers(VectorConstView::const_iterator begin, - VectorConstView::const_iterator end) const override; - - bool covers(CsrConstView::index_const_iterator indicesBegin, - CsrConstView::index_const_iterator indicesEnd, - CsrConstView::value_const_iterator valuesBegin, - CsrConstView::value_const_iterator valuesEnd, float32* tmpArray1, uint32* tmpArray2, - uint32 n) const override; - - void visit(EmptyBodyVisitor emptyBodyVisitor, ConjunctiveBodyVisitor conjunctiveBodyVisitor) const override; -}; diff --git a/cpp/subprojects/common/include/common/model/condition.hpp b/cpp/subprojects/common/include/common/model/condition.hpp deleted file mode 100644 index 9fe89c1f..00000000 --- a/cpp/subprojects/common/include/common/model/condition.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -/** - * An enum that specifies all possible types of operators used by a condition of a rule. - */ -enum Comparator : uint8 { - LEQ = 0, - GR = 1, - EQ = 2, - NEQ = 3 -}; - -/** - * Stores the properties of a condition of a rule. It consists of the index of the feature, the condition corresponds - * to, the type of the operator that is used by the condition, as well as a threshold. In addition, it stores the range - * [start, end) that corresponds to the elements, e.g. examples or bins, that are covered (or uncovered, if - * `covered == false`) by the condition, as well as the sum of the weights of all covered elements. - */ -struct Condition { - public: - - Condition() {} - - /** - * @param condition A reference to an existing condition to be copied - */ - Condition(const Condition& condition) - : featureIndex(condition.featureIndex), comparator(condition.comparator), threshold(condition.threshold), - start(condition.start), end(condition.end), covered(condition.covered), numCovered(condition.numCovered) { - } - - /** - * Assigns the properties of an existing condition to this condition. - * - * @param condition A reference to the existing condition - * @return A reference to the modified condition - */ - Condition& operator=(const Condition& condition) { - featureIndex = condition.featureIndex; - comparator = condition.comparator; - threshold = condition.threshold; - start = condition.start; - end = condition.end; - covered = condition.covered; - numCovered = condition.numCovered; - return *this; - } - - /** - * The index of the feature, the condition corresponds to. - */ - uint32 featureIndex; - - /** - * The type of the operator that is used by the condition. - */ - Comparator comparator; - - /** - * The threshold that is used by the condition. - */ - float32 threshold; - - /** - * The index of the first element (inclusive) that is covered (or uncovered) by the condition. - */ - int64 start; - - /** - * The index of the last element (exclusive) that is covered (or uncovered) by the condition. - */ - int64 end; - - /** - * True, if the elements in [start, end) are covered by the condition, false otherwise. - */ - bool covered; - - /** - * The number of elements that are covered by the condition. - */ - uint32 numCovered; -}; diff --git a/cpp/subprojects/common/include/common/model/condition_list.hpp b/cpp/subprojects/common/include/common/model/condition_list.hpp deleted file mode 100644 index 7ef6cd2a..00000000 --- a/cpp/subprojects/common/include/common/model/condition_list.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/body_conjunctive.hpp" -#include "common/model/condition.hpp" - -#include -#include -#include - -/** - * A list that stores conditions in the order they have been learned. - */ -class ConditionList final { - private: - - std::vector vector_; - - std::array numConditionsPerComparator_; - - public: - - ConditionList(); - - /** - * @param conditionList A reference to an object of type `ConditionList` to be copied - */ - ConditionList(const ConditionList& conditionList); - - /** - * An iterator that provides read-only access to the conditions in the list. - */ - typedef std::vector::const_iterator const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the list. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the list. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns how many conditions are contained by the list in total. - * - * @return The number of conditions that are contained by the list - */ - uint32 getNumConditions() const; - - /** - * Adds a new condition to the end of the list. - * - * @param condition A reference to an object of type `Condition` that should be added - */ - void addCondition(const Condition& condition); - - /** - * Removes the last condition from the list. - */ - void removeLastCondition(); - - /** - * Creates and returns a new object of type `ConjunctiveBody` from the conditions that contained by this list. - * - * @return An unique pointer to an object of type `ConjunctiveBody` that has been created - */ - std::unique_ptr createConjunctiveBody() const; -}; diff --git a/cpp/subprojects/common/include/common/model/head.hpp b/cpp/subprojects/common/include/common/model/head.hpp deleted file mode 100644 index ff2ad196..00000000 --- a/cpp/subprojects/common/include/common/model/head.hpp +++ /dev/null @@ -1,40 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" - -#include - -// Forward declarations -class CompleteHead; -class PartialHead; - -/** - * Defines an interface for all classes that represent the head of a rule. - */ -class MLRLCOMMON_API IHead { - public: - - virtual ~IHead() {}; - - /** - * A visitor function for handling objects of the type `CompleteHead`. - */ - typedef std::function CompleteHeadVisitor; - - /** - * A visitor function for handling objects of the type `PartialHead`. - */ - typedef std::function PartialHeadVisitor; - - /** - * Invokes one of the given visitor functions, depending on which one is able to handle this particular type of - * head. - * - * @param completeHeadVisitor The visitor function for handling objects of the type `CompleteHead` - * @param partialHeadVisitor The visitor function for handling objects of the type `PartialHead` - */ - virtual void visit(CompleteHeadVisitor completeHeadVisitor, PartialHeadVisitor partialHeadVisitor) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/model/head_complete.hpp b/cpp/subprojects/common/include/common/model/head_complete.hpp deleted file mode 100644 index 8ace6588..00000000 --- a/cpp/subprojects/common/include/common/model/head_complete.hpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" -#include "common/model/head.hpp" - -/** - * A head that contains a numerical score for each available label. - */ -class MLRLCOMMON_API CompleteHead final : public IHead { - private: - - const uint32 numElements_; - - float64* scores_; - - public: - - /** - * @param numElements The number of scores that are contained by the head. - */ - CompleteHead(uint32 numElements); - - ~CompleteHead() override; - - /** - * An iterator that provides access to the scores the are contained by the head and allows to modify them. - */ - typedef float64* score_iterator; - - /** - * An iterator that provides read-only access to the scores that are contained by the head. - */ - typedef const float64* score_const_iterator; - - /** - * Returns the number of scores that are contained by the head. - * - * @return The number of scores - */ - uint32 getNumElements() const; - - /** - * Returns a `score_iterator` to the beginning of the scores that are contained by the head. - * - * @return A `score_iterator` to the beginning - */ - score_iterator scores_begin(); - - /** - * Returns a `score_iterator` to the end of the scores that are contained by the head. - * - * @return A `score_iterator` to the end - */ - score_iterator scores_end(); - - /** - * Returns a `score_const_iterator` to the beginning of the scores that are contained by the head. - * - * @return A `score_const_iterator` to the beginning - */ - score_const_iterator scores_cbegin() const; - - /** - * Returns a `score_const_iterator` to the end of the scores that are contained by the head. - * - * @return A `score_const_iterator` to the end - */ - score_const_iterator scores_cend() const; - - void visit(CompleteHeadVisitor completeHeadVisitor, PartialHeadVisitor partialHeadVisitor) const override; -}; diff --git a/cpp/subprojects/common/include/common/model/head_partial.hpp b/cpp/subprojects/common/include/common/model/head_partial.hpp deleted file mode 100644 index f7a939a8..00000000 --- a/cpp/subprojects/common/include/common/model/head_partial.hpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" -#include "common/model/head.hpp" - -/** - * A head that contains a numerical score for a subset of the available labels. - */ -class MLRLCOMMON_API PartialHead final : public IHead { - private: - - const uint32 numElements_; - - float64* scores_; - - uint32* labelIndices_; - - public: - - /** - * @param numElements The number of scores that are contained by the head - */ - PartialHead(uint32 numElements); - - ~PartialHead() override; - - /** - * An iterator that provides access to the scores that are contained by the head and allows to modify them. - */ - typedef float64* score_iterator; - - /** - * An iterator that provides read-only access to the scores that are contained by the head. - */ - typedef const float64* score_const_iterator; - - /** - * An iterator that provides access to the indices, the scores that are contained by the head, correspond to and - * allows to modify them. - */ - typedef uint32* index_iterator; - - /** - * An iterator that provides read-only access to the indices, the scores that are contained by the head, - * correspond to. - */ - typedef const uint32* index_const_iterator; - - /** - * Returns the number of scores that are contained by the head. - * - * @return The number of scores - */ - uint32 getNumElements() const; - - /** - * Returns a `score_iterator` to the beginning of the scores that are contained by the head. - * - * @return A `score_iterator` to the beginning - */ - score_iterator scores_begin(); - - /** - * Returns a `score_iterator` to the end of the scores that are contained by the head. - * - * @return A `score_iterator` to the end - */ - score_iterator scores_end(); - - /** - * Returns a `score_const_iterator` to the beginning of the scores that are contained by the head. - * - * @return A `score_const_iterator` to the beginning - */ - score_const_iterator scores_cbegin() const; - - /** - * Returns a `score_const_iterator` to the end of the scores that are contained by the head. - * - * @return A `score_const_iterator` to the end - */ - score_const_iterator scores_cend() const; - - /** - * Returns an `index_iterator` to the beginning of the indices, the scores that are contained by the head - * correspond to. - * - * @return An `index_iterator` to the beginning - */ - index_iterator indices_begin(); - - /** - * Returns an `index_iterator` to the end of the indices, the scores that are contained by the head correspond - * to. - * - * @return An `index_iterator` to the end - */ - index_iterator indices_end(); - - /** - * Returns an `index_const_iterator` to the beginning of the indices, the scores that are contained by the head - * correspond to. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the indices, the scores that are contained by the head, - * correspond to. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator indices_cend() const; - - void visit(CompleteHeadVisitor completeHeadVisitor, PartialHeadVisitor partialHeadVisitor) const override; -}; diff --git a/cpp/subprojects/common/include/common/model/model_builder.hpp b/cpp/subprojects/common/include/common/model/model_builder.hpp deleted file mode 100644 index ee8880a6..00000000 --- a/cpp/subprojects/common/include/common/model/model_builder.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/condition_list.hpp" -#include "common/model/rule_model.hpp" -#include "common/rule_refinement/prediction_evaluated.hpp" - -/** - * Defines an interface for all classes that allow to incrementally build rule-based models. - */ -class IModelBuilder { - public: - - virtual ~IModelBuilder() {}; - - /** - * Sets the default rule of the model. - * - * @param predictionPtr A reference to an unique pointer of type `AbstractEvaluatedPrediction` that stores the - * scores that are predicted by the default rule - */ - virtual void setDefaultRule(std::unique_ptr& predictionPtr) = 0; - - /** - * Adds a new rule to the model. - * - * @param conditionListPtr A reference to an unique pointer of type `ConditionList` that stores the rule's - * conditions - * @param predictionPtr A reference to an unique pointer of type `AbstractEvaluatedPrediction` that stores - * the scores that are predicted by the rule - */ - virtual void addRule(std::unique_ptr& conditionListPtr, - std::unique_ptr& predictionPtr) = 0; - - /** - * Sets the number of used rules. - * - * @param numUsedRules The number of used rules - */ - virtual void setNumUsedRules(uint32 numUsedRules) = 0; - - /** - * Builds and returns the model. - * - * @return An unique pointer to an object of type `IRuleModel` that has been built - */ - virtual std::unique_ptr buildModel() = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IModelBuilder`. - */ -class IModelBuilderFactory { - public: - - virtual ~IModelBuilderFactory() {}; - - /** - * Creates and returns a new instance of type `IModelBuilder`. - * - * @return An unique pointer to an object of type `IModelBuilder` that has been created - */ - virtual std::unique_ptr create() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/model/rule_list.hpp b/cpp/subprojects/common/include/common/model/rule_list.hpp deleted file mode 100644 index 397db8ca..00000000 --- a/cpp/subprojects/common/include/common/model/rule_list.hpp +++ /dev/null @@ -1,381 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/body.hpp" -#include "common/model/head.hpp" -#include "common/model/rule_model.hpp" - -#include - -/** - * Defines an interface for all rule-based models that store several rules in an ordered list. Optionally, the model may - * also contain a default rule that either takes precedence over the remaining rules or not. - */ -class MLRLCOMMON_API IRuleList : public IRuleModel { - public: - - virtual ~IRuleList() override {}; - - /** - * Creates a new default rule from a given head and adds it to the model. - * - * @param headPtr An unique pointer to an object of type `IHead` that should be used as the head of the rule - */ - virtual void addDefaultRule(std::unique_ptr headPtr) = 0; - - /** - * Creates a new rule from a given body and head and adds it to the end of the model. - * - * @param bodyPtr An unique pointer to an object of type `IBody` that should be used as the body of the rule - * @param headPtr An unique pointer to an object of type `IHead` that should be used as the head of the rule - */ - virtual void addRule(std::unique_ptr bodyPtr, std::unique_ptr headPtr) = 0; - - /** - * Returns whether the model contains a default rule or not. - * - * @return True, if the model contains a default rule, false otherwise - */ - virtual bool containsDefaultRule() const = 0; - - /** - * Returns whether the default rule takes precedence over the remaining rules or not. - * - * @return True, if the default rule takes precedence over the remaining rules, false otherwise - */ - virtual bool isDefaultRuleTakingPrecedence() const = 0; - - /** - * Invokes some of the given visitor functions, depending on which ones are able to handle the bodies and heads - * of all rules that are contained in this model, including the default rule, if available. - * - * @param emptyBodyVisitor The visitor function for handling objects of the type `EmptyBody` - * @param conjunctiveBodyVisitor The visitor function for handling objects of the type `ConjunctiveBody` - * @param completeHeadVisitor The visitor function for handling objects of the type `CompleteHead` - * @param partialHeadVisitor The visitor function for handling objects of the type `PartialHead` - */ - virtual void visit(IBody::EmptyBodyVisitor emptyBodyVisitor, - IBody::ConjunctiveBodyVisitor conjunctiveBodyVisitor, - IHead::CompleteHeadVisitor completeHeadVisitor, - IHead::PartialHeadVisitor partialHeadVisitor) const = 0; - - /** - * Invokes some of the given visitor functions, depending on which ones are able to handle the bodies and heads - * of all used rules that are contained in this model, including the default rule, if available. - * - * @param emptyBodyVisitor The visitor function for handling objects of the type `EmptyBody` - * @param conjunctiveBodyVisitor The visitor function for handling objects of the type `ConjunctiveBody` - * @param completeHeadVisitor The visitor function for handling objects of the type `CompleteHead` - * @param partialHeadVisitor The visitor function for handling objects of the type `PartialHead` - */ - virtual void visitUsed(IBody::EmptyBodyVisitor emptyBodyVisitor, - IBody::ConjunctiveBodyVisitor conjunctiveBodyVisitor, - IHead::CompleteHeadVisitor completeHeadVisitor, - IHead::PartialHeadVisitor partialHeadVisitor) const = 0; -}; - -/** - * An implementation of the type `IRuleList` that stores several rules in the order of their induction. Optionally, the - * model may also contain a default rule that either takes precedence over the remaining rules or not. - */ -class RuleList final : public IRuleList { - public: - - /** - * An implementation of the type `IRule` that stores unique pointers to the body and head of a rule. - */ - class Rule final { - private: - - std::unique_ptr bodyPtr_; - - std::unique_ptr headPtr_; - - public: - - /** - * @param bodyPtr An unique pointer to an object of type `IBody` that represents the body of the rule - * @param headPtr An unique pointer to an object of type `IHead` that represents the head of the rule - */ - Rule(std::unique_ptr bodyPtr, std::unique_ptr headPtr); - - /** - * Returns the body of the rule. - * - * @return A reference to an object of type `IBody` that represents the body of the rule - */ - const IBody& getBody() const; - - /** - * Returns the head of the rule. - * - * @return A reference to an object of type `IHead` that represents the head of the rule - */ - const IHead& getHead() const; - - /** - * Invokes some of the given visitor functions, depending on which ones are able to handle the rule's - * particular type of body and head. - * - * @param emptyBodyVisitor The visitor function for handling objects of type `EmptyBody` - * @param conjunctiveBodyVisitor The visitor function for handling objects of type `ConjunctiveBody` - * @param completeHeadVisitor The visitor function for handling objects of type `CompleteHead` - * @param partialHeadVisitor The visitor function for handling objects of type `PartialHead` - */ - void visit(IBody::EmptyBodyVisitor emptyBodyVisitor, - IBody::ConjunctiveBodyVisitor conjunctiveBodyVisitor, - IHead::CompleteHeadVisitor completeHeadVisitor, - IHead::PartialHeadVisitor partialHeadVisitor) const; - }; - - private: - - /** - * A forward iterator that provides access to the rules in a model, including the default rule, if available. - */ - class ConstIterator final { - private: - - const Rule* defaultRule_; - - std::vector::const_iterator iterator_; - - uint32 offset_; - - uint32 defaultRuleIndex_; - - uint32 index_; - - public: - - /** - * @param defaultRuleTakesPrecedence True, if the default rule takes precedence over the remaining - * rules, false otherwise - * @param defaultRule A pointer to an object of type `Rule` that stores the default - * rule or a null pointer, if no default rule is available - * @param iterator An iterator to the beginning of the remaining rules - * @param start The index of the rule to start at - * @param end The index of the rule to end at (exclusive) - */ - ConstIterator(bool defaultRuleTakesPrecedence, const Rule* defaultRule, - const std::vector::const_iterator iterator, uint32 start, uint32 end); - - /** - * The type that is used to represent the difference between two iterators. - */ - typedef int difference_type; - - /** - * The type of the elements, the iterator provides access to. - */ - typedef const Rule value_type; - - /** - * The type of a pointer to an element, the iterator provides access to. - */ - typedef const Rule* pointer; - - /** - * The type of a reference to an element, the iterator provides access to. - */ - typedef const Rule& reference; - - /** - * The tag that specifies the capabilities of the iterator. - */ - typedef std::forward_iterator_tag iterator_category; - - /** - * Returns the element, the iterator currently refers to. - * - * @return The element, the iterator currently refers to - */ - reference operator*() const; - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - ConstIterator& operator++(); - - /** - * Returns an iterator to the next element. - * - * @return A reference to an iterator that refers to the next element - */ - ConstIterator& operator++(int n); - - /** - * Returns an iterator to one of the subsequent elements. - * - * @param difference The number of elements to increment the iterator by - * @return A copy of this iterator that refers to the specified element - */ - ConstIterator operator+(const uint32 difference) const; - - /** - * Returns an iterator to one of the subsequent elements. - * - * @param difference The number of elements to increment the iterator by - * @return A reference to an iterator that refers to the specified element - */ - ConstIterator& operator+=(const uint32 difference); - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators do not refer to the same element, false otherwise - */ - bool operator!=(const ConstIterator& rhs) const; - - /** - * Returns whether this iterator and another one refer to the same element. - * - * @param rhs A reference to another iterator - * @return True, if the iterators refer to the same element, false otherwise - */ - bool operator==(const ConstIterator& rhs) const; - - /** - * Returns the difference between this iterator and another one. - * - * @param rhs A reference to another iterator - * @return The difference between the iterators - */ - difference_type operator-(const ConstIterator& rhs) const; - }; - - std::unique_ptr defaultRulePtr_; - - std::vector ruleList_; - - uint32 numUsedRules_; - - bool defaultRuleTakesPrecedence_; - - public: - - /** - * @param defaultRuleTakesPrecedence True, if the default rule should take precedence over the remaining rules, - * false otherwise - */ - RuleList(bool defaultRuleTakesPrecedence); - - /** - * An iterator that provides read-only access to rules. - */ - typedef ConstIterator const_iterator; - - /** - * Returns a `const_iterator` to the beginning of all rules, including the default rule, if available. - * - * @param maxRules The maximum number of rules to consider or 0, if all rules should be considered - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin(uint32 maxRules = 0) const; - - /** - * Returns a `const_iterator` to the end of all rules, including the default rule, if available. - * - * @param maxRules The maximum number of rules to consider or 0, if all rules should be considered - * @return A `const_iterator` to the end - */ - const_iterator cend(uint32 maxRules = 0) const; - - /** - * Returns a `const_iterator` to the beginning of all used rules, including the default rule, if available. - * - * @param maxRules The maximum number of rules to consider or 0, if all rules should be considered - * @return A `const_iterator` to the beginning - */ - const_iterator used_cbegin(uint32 maxRules = 0) const; - - /** - * Returns a `const_iterator` to the end of all used rules, including the default rule, if available. - * - * @param maxRules The maximum number of rules to consider or 0, if all used rules should be considered - * @return A `const_iterator` to the end - */ - const_iterator used_cend(uint32 maxRules = 0) const; - - uint32 getNumRules() const override; - - uint32 getNumUsedRules() const override; - - void setNumUsedRules(uint32 numUsedRules) override; - - void addDefaultRule(std::unique_ptr headPtr) override; - - void addRule(std::unique_ptr bodyPtr, std::unique_ptr headPtr) override; - - bool containsDefaultRule() const override; - - bool isDefaultRuleTakingPrecedence() const override; - - void visit(IBody::EmptyBodyVisitor emptyBodyVisitor, IBody::ConjunctiveBodyVisitor conjunctiveBodyVisitor, - IHead::CompleteHeadVisitor completeHeadVisitor, - IHead::PartialHeadVisitor partialHeadVisitor) const override; - - void visitUsed(IBody::EmptyBodyVisitor emptyBodyVisitor, IBody::ConjunctiveBodyVisitor conjunctiveBodyVisitor, - IHead::CompleteHeadVisitor completeHeadVisitor, - IHead::PartialHeadVisitor partialHeadVisitor) const override; - - std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const override; - - std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const override; - - std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; -}; - -/** - * Creates and returns a new instance of the type `IRuleList`. - * - * @param defaultRuleTakesPrecedence True, if the default rule should take precedence over the remaining rules, false - * otherwise - * @return An unique pointer to an object of type `IRuleList` that has been created - */ -MLRLCOMMON_API std::unique_ptr createRuleList(bool defaultRuleTakesPrecedence); diff --git a/cpp/subprojects/common/include/common/model/rule_model.hpp b/cpp/subprojects/common/include/common/model/rule_model.hpp deleted file mode 100644 index 79465b26..00000000 --- a/cpp/subprojects/common/include/common/model/rule_model.hpp +++ /dev/null @@ -1,252 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" -#include "common/macros.hpp" - -#include - -// Forward declarations -class ILabelSpaceInfo; -class CContiguousFeatureMatrix; -class CsrFeatureMatrix; -class IMarginalProbabilityCalibrationModel; -class IJointProbabilityCalibrationModel; -class IBinaryPredictor; -class IBinaryPredictorFactory; -class ISparseBinaryPredictor; -class ISparseBinaryPredictorFactory; -class IScorePredictor; -class IScorePredictorFactory; -class IProbabilityPredictor; -class IProbabilityPredictorFactory; - -/** - * Defines an interface for all rule-based models. - */ -class MLRLCOMMON_API IRuleModel { - public: - - virtual ~IRuleModel() {}; - - /** - * Returns the total number of rules in the model, including the default rule, if available. - * - * @return The number of rules - */ - virtual uint32 getNumRules() const = 0; - - /** - * Returns the number of used rules, including the default rule, if available. - * - * @return The number of used rules - */ - virtual uint32 getNumUsedRules() const = 0; - - /** - * Sets the number of used rules, including the default rule, if available. - * - * @param numUsedRules The number of used rules to be set or 0, if all rules should be used - */ - virtual void setNumUsedRules(uint32 numUsedRules) = 0; - - /** - * Creates and returns a new instance of the class `IBinaryPredictor`, based on the type of this rule-based - * model. - * - * @param factory A reference to an object of type `IBinaryPredictorFactory` that - * should be used to create the instance - * @param featureMatrix A reference to an object of type `CContiguousFeatureMatrix` that - * provides row-wise access to the features of the query examples - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * making predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IBinaryPredictor` that has - * been created - */ - virtual std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IBinaryPredictor`, based on the type of this rule-based - * model. - * - * @param factory A reference to an object of type `IBinaryPredictorFactory` that - * should be used to create the instance - * @param featureMatrix A reference to an object of type `CsrFeatureMatrix` that provides - * row-wise access to the features of the query examples - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * making predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IBinaryPredictor` that has - * been created - */ - virtual std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `ISparseBinaryPredictor`, based on the type of this - * rule-based model. - * - * @param factory A reference to an object of type `IBinaryPredictorFactory` that - * should be used to create the instance - * @param featureMatrix A reference to an object of type `CContiguousFeatureMatrix` that - * provides row-wise access to the features of the query examples - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * making predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictor` - * that has been created - */ - virtual std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `ISparseBinaryPredictor`, based on the type of this - * rule-based model. - * - * @param factory A reference to an object of type `IBinaryPredictorFactory` that - * should be used to create the instance - * @param featureMatrix A reference to an object of type `CsrFeatureMatrix` that provides - * row-wise access to the features of the query examples - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * making predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictor` - * that has been created - */ - virtual std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IScorePredictor`, based on the type of this rule-based - * model. - * - * @param factory A reference to an object of type `IScorePredictorFactory` that should be used to - * create the instance - * @param featureMatrix A reference to an object of type `CContiguousFeatureMatrix` that provides row-wise - * access to the features of the query examples - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides information about the - * label space that may be used as a basis for making predictions - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IScorePredictor` that has been created - */ - virtual std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IScorePredictor`, based on the type of this rule-based - * model. - * - * @param factory A reference to an object of type `IScorePredictorFactory` that should be used to - * create the instance - * @param featureMatrix A reference to an object of type `CsrFeatureMatrix` that provides row-wise access to - * the features of the query examples - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides information about the - * label space that may be used as a basis for making predictions - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IScorePredictor` that has been created - */ - virtual std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IProbabilityPredictor`, based on the type of this rule-based - * model. - * - * @param factory A reference to an object of type `IProbabilityPredictorFactory` - * that should be used to create the instance - * @param featureMatrix A reference to an object of type `CContiguousFeatureMatrix` that - * provides row-wise access to the features of the query examples - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * making predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IProbabilityPredictor` - * that has been created - */ - virtual std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IProbabilityPredictor`, based on the type of this rule-based - * model. - * - * @param factory A reference to an object of type `IProbabilityPredictorFactory` - * that should be used to create the instance - * @param featureMatrix A reference to an object of type `CsrFeatureMatrix` that provides - * row-wise access to the features of the query examples - * @param labelSpaceInfo A reference to an object of type `ILabelSpaceInfo` that provides - * information about the label space that may be used as a basis for - * making predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IProbabilityPredictor` - * that has been created - */ - virtual std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/multi_threading/multi_threading.hpp b/cpp/subprojects/common/include/common/multi_threading/multi_threading.hpp deleted file mode 100644 index add6cfbf..00000000 --- a/cpp/subprojects/common/include/common/multi_threading/multi_threading.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_matrix.hpp" - -/** - * Defines an interface for all classes that allow to configure the multi-threading behavior of a parallelizable - * algorithm. - */ -class IMultiThreadingConfig { - public: - - virtual ~IMultiThreadingConfig() {}; - - /** - * Determines and returns the number of threads to be used by a parallelizable algorithm. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the feature - * values of the training examples - * @param numLabels The total number of available labels - * @return The number of threads to be used - */ - virtual uint32 getNumThreads(const IFeatureMatrix& featureMatrix, uint32 numLabels) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/multi_threading/multi_threading_manual.hpp b/cpp/subprojects/common/include/common/multi_threading/multi_threading_manual.hpp deleted file mode 100644 index 2b4b8b0f..00000000 --- a/cpp/subprojects/common/include/common/multi_threading/multi_threading_manual.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" - -/** - * Defines an interface for all classes that allow to configure the multi-threading behavior of a parallelizable - * algorithm by manually specifying the number of threads to be used. - */ -class MLRLCOMMON_API IManualMultiThreadingConfig { - public: - - virtual ~IManualMultiThreadingConfig() {}; - - /** - * Returns the number of threads that are used. - * - * @return The number of threads that are used or 0, if all available CPU cores are utilized - */ - virtual uint32 getNumThreads() const = 0; - - /** - * Sets the number of threads that should be used. - * - * @param numThreads The number of threads that should be used. Must be at least 1 or 0, if all available CPU - * cores should be utilized - * @return A reference to an object of type `IManualMultiThreadingConfig` that allows further - * configuration of the multi-threading behavior - */ - virtual IManualMultiThreadingConfig& setNumThreads(uint32 numThreads) = 0; -}; - -/** - * Allows to configure the multi-threading behavior of a parallelizable algorithm by manually specifying the number of - * threads to be used. - */ -class ManualMultiThreadingConfig final : public IMultiThreadingConfig, - public IManualMultiThreadingConfig { - private: - - uint32 numThreads_; - - public: - - ManualMultiThreadingConfig(); - - uint32 getNumThreads() const override; - - IManualMultiThreadingConfig& setNumThreads(uint32 numThreads) override; - - uint32 getNumThreads(const IFeatureMatrix& featureMatrix, uint32 numLabels) const override; -}; diff --git a/cpp/subprojects/common/include/common/multi_threading/multi_threading_no.hpp b/cpp/subprojects/common/include/common/multi_threading/multi_threading_no.hpp deleted file mode 100644 index c77d1ad4..00000000 --- a/cpp/subprojects/common/include/common/multi_threading/multi_threading_no.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/multi_threading/multi_threading.hpp" - -/** - * Allows to configure the multi-threading behavior of a parallelize algorithm that should not use any multi-threading. - */ -class NoMultiThreadingConfig final : public IMultiThreadingConfig { - public: - - uint32 getNumThreads(const IFeatureMatrix& featureMatrix, uint32 numLabels) const override; -}; diff --git a/cpp/subprojects/common/include/common/post_optimization/model_builder_intermediate.hpp b/cpp/subprojects/common/include/common/post_optimization/model_builder_intermediate.hpp deleted file mode 100644 index aa555620..00000000 --- a/cpp/subprojects/common/include/common/post_optimization/model_builder_intermediate.hpp +++ /dev/null @@ -1,88 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/model_builder.hpp" - -#include - -/** - * An implementation of the class `IModelBuilder` that stores intermediate representations of rules, which can still be - * modified when globally optimizing a rule-based model once it has been learned, that are ultimately converted into a - * final model using another `IModelBuilder`. - */ -class IntermediateModelBuilder final : public IModelBuilder { - public: - - /** - * The type of a rule, which can still be modified. - */ - typedef std::pair, std::unique_ptr> - IntermediateRule; - - private: - - const std::unique_ptr modelBuilderPtr_; - - std::unique_ptr defaultPredictionPtr_; - - std::vector intermediateRuleList_; - - uint32 numUsedRules_; - - public: - - /** - * @param modelBuilderPtr An unique pointer to an object of type `IModelBuilder` that should be used to build - * the final model - */ - IntermediateModelBuilder(std::unique_ptr modelBuilderPtr); - - /** - * An iterator that provides access to the intermediate representations of rules and allows to modify them. - */ - typedef std::vector::iterator iterator; - - /** - * Returns an `iterator` to the beginning of the intermediate representations of rules. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the intermediate representations of rules. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Removes the intermediate representation of the last rule. - */ - void removeLastRule(); - - /** - * Returns the total number of rules. - * - * @return The total number of rules - */ - uint32 getNumRules() const; - - /** - * Returns the number of used rules. - * - * @return The number of used rules - */ - uint32 getNumUsedRules() const; - - void setNumUsedRules(uint32 numUsedRules) override; - - void setDefaultRule(std::unique_ptr& predictionPtr) override; - - void addRule(std::unique_ptr& conditionListPtr, - std::unique_ptr& predictionPtr) override; - - std::unique_ptr buildModel() override; -}; diff --git a/cpp/subprojects/common/include/common/post_optimization/post_optimization.hpp b/cpp/subprojects/common/include/common/post_optimization/post_optimization.hpp deleted file mode 100644 index cdc8d56c..00000000 --- a/cpp/subprojects/common/include/common/post_optimization/post_optimization.hpp +++ /dev/null @@ -1,121 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/post_optimization/model_builder_intermediate.hpp" -#include "common/post_processing/post_processor.hpp" -#include "common/rule_induction/rule_induction.hpp" -#include "common/rule_pruning/rule_pruning.hpp" -#include "common/sampling/feature_sampling.hpp" -#include "common/sampling/label_sampling.hpp" -#include "common/thresholds/thresholds.hpp" - -/** - * Defines an interface for all classes that allow to optimize a rule-based model globally once it has been learned. - */ -class IPostOptimizationPhase { - public: - - virtual ~IPostOptimizationPhase() {}; - - /** - * Optimizes a rule-based model globally once it has been learned. - * - * @param thresholds A reference to an object of type `IThresholds` that provides access to the - * thresholds that may be used by the conditions of the rule - * @param ruleInduction A reference to an object of type `IRuleInduction` that should be used for inducing - * new rules - * @param partition A reference to an object of type `IPartition` that provides access to the indices of - * the training examples that belong to the training set and the holdout set, - * respectively - * @param labelSampling A reference to an object of type `ILabelSampling` that should be used for sampling - * labels - * @param instanceSampling A reference to an object of type `IInstanceSampling` that should be used for - * sampling examples - * @param featureSampling A reference to an object of type `IFeatureSampling` that should be used for sampling - * the features that may be used by the conditions of new rules - * @param rulePruning A reference to an object of type `IRulePruning` that should be used to prune new - * rules - * @param postProcessor A reference to an object of type `IPostProcessor` that should be used to - * post-process the predictions of new rules - * @param rng A reference to an object of type `RNG` that implements the random number generator - * to be used - */ - virtual void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, - ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, - IFeatureSampling& featureSampling, const IRulePruning& rulePruning, - const IPostProcessor& postProcessor, RNG& rng) const = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IPostOptimizationPhase`. - */ -class IPostOptimizationPhaseFactory { - public: - - virtual ~IPostOptimizationPhaseFactory() {}; - - /** - * Creates and returns a new object of type `IPostOptimizationPhase`. - * - * @param modelBuilder A reference to an object of type `IntermediateModelBuilder` that provides access to the - * rules in the model - * @return An unique pointer to an object of type `IPostOptimizationPhase` that has been created - */ - virtual std::unique_ptr create(IntermediateModelBuilder& modelBuilder) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a method that optimizes a rule-based model globally once - * it has been learned. - */ -class IPostOptimizationPhaseConfig { - public: - - virtual ~IPostOptimizationPhaseConfig() {}; - - /** - * Creates and returns a new object of type `IPostOptimizationPhaseFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `IPostOptimizationPhaseFactory` that has been created - */ - virtual std::unique_ptr createPostOptimizationPhaseFactory() const = 0; -}; - -/** - * Defines an interface for all classes that allow to optimize a rule-based model globally once it has been learned by - * carrying out several optimization phases. - */ -class IPostOptimization : public IPostOptimizationPhase { - public: - - virtual ~IPostOptimization() override {}; - - /** - * Returns an `IModelBuilder` that is suited for post-optimization via this object. Rules that are induced - * during training must be added to the returned builder. - * - * @return A reference to an object of type `IModelBuilder` that is suited for post-optimization - */ - virtual IModelBuilder& getModelBuilder() const = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IPostOptimization`. - */ -class IPostOptimizationFactory { - public: - - virtual ~IPostOptimizationFactory() {}; - - /** - * Creates and returns a new object of type `IPostOptimization`. - * - * @param modelBuilderFactory A reference to an object of type `IModelBuilderFactory` that allows to create - * the builder to be used for assembling a model - * @return An unique pointer to an object of type `IPostOptimization` that has been created - */ - virtual std::unique_ptr create(const IModelBuilderFactory& modelBuilderFactory) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/post_optimization/post_optimization_phase_list.hpp b/cpp/subprojects/common/include/common/post_optimization/post_optimization_phase_list.hpp deleted file mode 100644 index c8dbf77c..00000000 --- a/cpp/subprojects/common/include/common/post_optimization/post_optimization_phase_list.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/post_optimization/post_optimization.hpp" - -#include - -/** - * A factory that allows to create instances of the type `IPostOptimization` that carries out multiple optimization - * phases. - */ -class PostOptimizationPhaseListFactory final : public IPostOptimizationFactory { - private: - - std::vector> postOptimizationPhaseFactories_; - - public: - - /** - * Adds a new factory that allows to creates instances of an optimization phase to be carried out. - * - * @param postOptimizationPhaseFactoryPtr An unique pointer to an object of type `IPostOptimizationPhaseFactory` - * that should be added - */ - void addPostOptimizationPhaseFactory( - std::unique_ptr postOptimizationPhaseFactoryPtr); - - std::unique_ptr create(const IModelBuilderFactory& modelBuilderFactory) const override; -}; diff --git a/cpp/subprojects/common/include/common/post_optimization/post_optimization_sequential.hpp b/cpp/subprojects/common/include/common/post_optimization/post_optimization_sequential.hpp deleted file mode 100644 index bbcc790a..00000000 --- a/cpp/subprojects/common/include/common/post_optimization/post_optimization_sequential.hpp +++ /dev/null @@ -1,101 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/post_optimization/post_optimization.hpp" - -/** - * Defines an interface for all classes that allow to configure a method that optimizes each rule in a model by - * relearning it in the context of the other rules. Multiple iterations, where the rules in a model are relearned in the - * order of their induction, may be carried out. - */ -class MLRLCOMMON_API ISequentialPostOptimizationConfig { - public: - - virtual ~ISequentialPostOptimizationConfig() {}; - - /** - * Returns the number of iterations that are performed for optimizing a model. - * - * @return The number of iterations that are performed for optimizing a model - */ - virtual uint32 getNumIterations() const = 0; - - /** - * Sets the number of iterations that should be performed for optimizing a model. - * - * @param numIterations The number of iterations to be performed. Must be at least 1 - * @return A reference to an object of type `ISequentialPostOptimizationConfig` that allows further - * configuration of the optimization method - */ - virtual ISequentialPostOptimizationConfig& setNumIterations(uint32 numIterations) = 0; - - /** - * Returns whether the heads of rules are refined when being relearned or not. - * - * @return True, if the heads of rules are refined when being relearned, false otherwise - */ - virtual bool areHeadsRefined() const = 0; - - /** - * Sets whether the heads of rules should be refined when being relearned or not. - * - * @param refineHeads True, if the heads of rules should be refined when being relearned, false otherwise - * @return A reference to an object of type `ISequentialPostOptimizationConfig` that allows further - * configuration of the optimization method - */ - virtual ISequentialPostOptimizationConfig& setRefineHeads(bool refineHeads) = 0; - - /** - * Returns whether a new sample of the available features is created whenever a new rule is refined or not. - * - * @return True, if a new sample of the available features is created whenever a new rule is refined, false, if - * the conditions of the new rule use the same features as the original rule - */ - virtual bool areFeaturesResampled() const = 0; - - /** - * Sets whether a new sample of the available features should be created whenever a new rule is refined or not. - * - * @param resampleFeatures True, if a new sample of the available features should be created whenever a new - * rule is refined, false, if the conditions of the new rule should use the same - * features as the original rule - * @return A reference to an object of type `ISequentialPostOptimizationConfig` that allows - * further configuration of the optimization method - */ - virtual ISequentialPostOptimizationConfig& setResampleFeatures(bool resampleFeatures) = 0; -}; - -/** - * Allows to configure a method that optimizes each rule in a model by relearning it in the context of the other rules. - */ -class SequentialPostOptimizationConfig final : public ISequentialPostOptimizationConfig, - public IPostOptimizationPhaseConfig { - private: - - uint32 numIterations_; - - bool refineHeads_; - - bool resampleFeatures_; - - public: - - SequentialPostOptimizationConfig(); - - uint32 getNumIterations() const override; - - ISequentialPostOptimizationConfig& setNumIterations(uint32 numIterations) override; - - bool areHeadsRefined() const override; - - ISequentialPostOptimizationConfig& setRefineHeads(bool refineHeads) override; - - bool areFeaturesResampled() const override; - - ISequentialPostOptimizationConfig& setResampleFeatures(bool resampleFeatures) override; - - std::unique_ptr createPostOptimizationPhaseFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/post_optimization/post_optimization_unused_rule_removal.hpp b/cpp/subprojects/common/include/common/post_optimization/post_optimization_unused_rule_removal.hpp deleted file mode 100644 index 9d4484d9..00000000 --- a/cpp/subprojects/common/include/common/post_optimization/post_optimization_unused_rule_removal.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/post_optimization/post_optimization.hpp" - -/** - * Allows to configure a method that removes unused rules from a model. - */ -class UnusedRuleRemovalConfig final : public IPostOptimizationPhaseConfig { - public: - - std::unique_ptr createPostOptimizationPhaseFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/post_processing/post_processor.hpp b/cpp/subprojects/common/include/common/post_processing/post_processor.hpp deleted file mode 100644 index 1ec75a42..00000000 --- a/cpp/subprojects/common/include/common/post_processing/post_processor.hpp +++ /dev/null @@ -1,55 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_refinement/prediction.hpp" - -/** - * Defines an interface for all classes that allow to post-process the predictions of rules once they have been learned. - */ -class IPostProcessor { - public: - - virtual ~IPostProcessor() {}; - - /** - * Post-processes the prediction of a rule. - * - * @param prediction A reference to an object of type `AbstractPrediction` that stores the predictions of a rule - */ - virtual void postProcess(AbstractPrediction& prediction) const = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IPostProcessor`. - */ -class IPostProcessorFactory { - public: - - virtual ~IPostProcessorFactory() {}; - - /** - * Creates and returns a new object of type `IPostProcessor`. - * - * @return An unique pointer to an object of type `IPostProcessor` that has been created - */ - virtual std::unique_ptr create() const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a method that post-processes the predictions of rules - * once they have been learned. - */ -class IPostProcessorConfig { - public: - - virtual ~IPostProcessorConfig() {}; - - /** - * Creates and returns a new object of type `IPostProcessorFactory` according to the specified configuration. - * - * @return An unique pointer to an object of type `IPostProcessorFactory` that has been created - */ - virtual std::unique_ptr createPostProcessorFactory() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/post_processing/post_processor_no.hpp b/cpp/subprojects/common/include/common/post_processing/post_processor_no.hpp deleted file mode 100644 index 3c3b1569..00000000 --- a/cpp/subprojects/common/include/common/post_processing/post_processor_no.hpp +++ /dev/null @@ -1,16 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/post_processing/post_processor.hpp" - -/** - * Allows to configure a post-processor that does not perform any post-processing, but retains the original predictions - * of rules. - */ -class NoPostProcessorConfig final : public IPostProcessorConfig { - public: - - std::unique_ptr createPostProcessorFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/prediction/label_space_info.hpp b/cpp/subprojects/common/include/common/prediction/label_space_info.hpp deleted file mode 100644 index b9647f17..00000000 --- a/cpp/subprojects/common/include/common/prediction/label_space_info.hpp +++ /dev/null @@ -1,236 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" -#include "common/macros.hpp" - -#include - -class IJointProbabilityCalibrator; -class IJointProbabilityCalibratorFactory; -class CContiguousFeatureMatrix; -class CsrFeatureMatrix; -class RuleList; -class IMarginalProbabilityCalibrationModel; -class IJointProbabilityCalibrationModel; -class IBinaryPredictor; -class IBinaryPredictorFactory; -class ISparseBinaryPredictor; -class ISparseBinaryPredictorFactory; -class IScorePredictor; -class IScorePredictorFactory; -class IProbabilityPredictor; -class IProbabilityPredictorFactory; - -/** - * Defines an interface for all classes that provide information about the label space that may be used as a basis for - * making predictions. - */ -class MLRLCOMMON_API ILabelSpaceInfo { - public: - - virtual ~ILabelSpaceInfo() {}; - - /** - * Creates and returns a new instance of the class `IJointProbabilityCalibrator`, based on the type of this - * information about the label space. - * - * @param factory A reference to an object of type - * `IJointProbabilityCalibratorFactory` that should be used to create - * the instance - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @return An unique pointer to an object of type - * `IJointProbabilityPredictor` that has been created - */ - virtual std::unique_ptr createJointProbabilityCalibrator( - const IJointProbabilityCalibratorFactory& factory, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const = 0; - - /** - * Creates and returns a new instance of the class `IBinaryPredictor`, based on the type of this information - * about the label space. - * - * @param factory A reference to an object of type `IBinaryPredictorFactory` that - * should be used to create the instance - * @param featureMatrix A reference to an object of type `CContiguousFeatureMatrix` that - * provides row-wise access to the features of the query examples - * @param model A reference to an object of type `RuleList` that should be used to - * obtain predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IBinaryPredictor` that has - * been created - */ - virtual std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IBinaryPredictor`, based on the type of this information - * about the label space. - * - * @param factory A reference to an object of type `IBinaryPredictorFactory` that - * should be used to create the instance - * @param featureMatrix A reference to an object of type `CsrFeatureMatrix` that provides - * row-wise access to the features of the query examples - * @param model A reference to an object of type `RuleList` that should be used to - * obtain predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IBinaryPredictor` that has - * been created - */ - virtual std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `ISparseBinaryPredictor`, based on the type of this - * information about the label space. - * - * @param factory A reference to an object of type `ISparseBinaryPredictorFactory` - * that should be used to create the instance - * @param featureMatrix A reference to an object of type `CContiguousFeatureMatrix` that - * provides row-wise access to the features of the query examples - * @param model A reference to an object of type `RuleList` that should be used to - * obtain predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictor` - * that has been created - */ - virtual std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const RuleList& model, const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `ISparseBinaryPredictor`, based on the type of this - * information about the label space. - * - * @param factory A reference to an object of type `ISparseBinaryPredictorFactory` - * that should be used to create the instance - * @param featureMatrix A reference to an object of type `CsrFeatureMatrix` that provides - * row-wise access to the features of the query examples - * @param model A reference to an object of type `RuleList` that should be used to - * obtain predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictor` - * that has been created - */ - virtual std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IScorePredictor`, based on the type of this information - * about the label space. - * - * @param factory A reference to an object of type `IScorePredictorFactory` that should be used to create - * the instance - * @param featureMatrix A reference to an object of type `CContiguousFeatureMatrix` that provides row-wise - * access to the features of the query examples - * @param model A reference to an object of type `RuleList` that should be used to obtain predictions - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IScorePredictor` that has been created - */ - virtual std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CContiguousFeatureMatrix& featureMatrix, - const RuleList& model, - uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IScorePredictor`, based on the type of this information - * about the label space. - * - * @param factory A reference to an object of type `IScorePredictorFactory` that should be used to create - * the instance - * @param featureMatrix A reference to an object of type `CsrFeatureMatrix` that provides row-wise access to the - * features of the query examples - * @param model A reference to an object of type `RuleList` that should be used to obtain predictions - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IScorePredictor` that has been created - */ - virtual std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CsrFeatureMatrix& featureMatrix, - const RuleList& model, - uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IProbabilityPredictor`, based on the type of this - * information about the label space. - * - * @param factory A reference to an object of type `IProbabilityPredictorFactory` - * that should be used to create the instance - * @param featureMatrix A reference to an object of type `CContiguousFeatureMatrix` that - * provides row-wise access to the features of the query examples - * @param model A reference to an object of type `RuleList` that should be used to - * obtain predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IProbabilityPredictor` - * that has been created - */ - virtual std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const RuleList& model, const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new instance of the class `IProbabilityPredictor`, based on the type of this - * information about the label space. - * - * @param factory A reference to an object of type `IProbabilityPredictorFactory` - * that should be used to create the instance - * @param featureMatrix A reference to an object of type `CsrFeatureMatrix` that provides - * row-wise access to the features of the query examples - * @param model A reference to an object of type `RuleList` that should be used to - * obtain predictions - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IProbabilityPredictor` - * that has been created - */ - virtual std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/prediction/label_space_info_no.hpp b/cpp/subprojects/common/include/common/prediction/label_space_info_no.hpp deleted file mode 100644 index 1e0ddab2..00000000 --- a/cpp/subprojects/common/include/common/prediction/label_space_info_no.hpp +++ /dev/null @@ -1,22 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/prediction/label_space_info.hpp" - -/** - * Defines an interface for all classes that do not provide any information about the label space. - */ -class MLRLCOMMON_API INoLabelSpaceInfo : public ILabelSpaceInfo { - public: - - virtual ~INoLabelSpaceInfo() override {}; -}; - -/** - * Creates and returns a new object of the type `INoLabelSpaceInfo`. - * - * @return An unique pointer to an object of type `INoLabelSpaceInfo` that has been created - */ -MLRLCOMMON_API std::unique_ptr createNoLabelSpaceInfo(); diff --git a/cpp/subprojects/common/include/common/prediction/label_vector_set.hpp b/cpp/subprojects/common/include/common/prediction/label_vector_set.hpp deleted file mode 100644 index 7ebd992d..00000000 --- a/cpp/subprojects/common/include/common/prediction/label_vector_set.hpp +++ /dev/null @@ -1,163 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/arrays.hpp" -#include "common/input/label_matrix_row_wise.hpp" -#include "common/prediction/label_space_info.hpp" - -#include -#include - -/** - * Defines an interface for all classes that provide access to a set of unique label vectors. - */ -class MLRLCOMMON_API ILabelVectorSet : public ILabelSpaceInfo { - public: - - virtual ~ILabelVectorSet() override {}; - - /** - * A visitor function for handling objects of the type `LabelVector` and their frequencies. - */ - typedef std::function LabelVectorVisitor; - - /** - * Adds a label vector to the set. - * - * @param labelVectorPtr An unique pointer to an object of type `LabelVector` - * @param frequency The frequency of the label vector - */ - virtual void addLabelVector(std::unique_ptr labelVectorPtr, uint32 frequency) = 0; - - /** - * Invokes the given visitor function for each label vector that has been added to the set. - * - * @param visitor The visitor function for handling objects of the type `LabelVector` - */ - virtual void visit(LabelVectorVisitor visitor) const = 0; -}; - -/** - * An implementation of the type `ILabelVectorSet` that stores a set of unique label vectors, as well as their - * frequency. - */ -class LabelVectorSet final : public ILabelVectorSet { - private: - - std::vector> labelVectors_; - - std::vector frequencies_; - - public: - - LabelVectorSet(); - - /** - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that stores the label vectors that - * should be added to the set - */ - LabelVectorSet(const IRowWiseLabelMatrix& labelMatrix); - - /** - * An iterator that provides read-only access to the label vectors. - */ - typedef std::vector>::const_iterator const_iterator; - - /** - * An iterator that provides read-only access to the frequency of the label lectors. - * - */ - typedef std::vector::const_iterator frequency_const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the label vectors in the set. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the label vectors in the set. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns a `frequency_const_iterator` to the beginning of the frequencies. - * - * @return frequency_const_iterator A `frequency_const_iterator` to the beginning - */ - frequency_const_iterator frequencies_cbegin() const; - - /** - * Returns a `frequency_const_iterator` to the end of the frequencies. - * - * @return frequency_const_iterator A `frequency_const_iterator` to the end - */ - frequency_const_iterator frequencies_cend() const; - - /** - * Returns the number of label vectors in the set. - * - * @return The number of label vectors - */ - uint32 getNumLabelVectors() const; - - void addLabelVector(std::unique_ptr labelVectorPtr, uint32 frequency) override; - - void visit(LabelVectorVisitor visitor) const override; - - std::unique_ptr createJointProbabilityCalibrator( - const IJointProbabilityCalibratorFactory& factory, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const override; - - std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const RuleList& ruleList, const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& ruleList, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const RuleList& ruleList, const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& ruleList, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CContiguousFeatureMatrix& featureMatrix, - const RuleList& ruleList, - uint32 numLabels) const override; - - std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CsrFeatureMatrix& featureMatrix, - const RuleList& ruleList, - uint32 numLabels) const override; - - std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const RuleList& ruleList, const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; - - std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& ruleList, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override; -}; - -/** - * Creates and returns a new object of the type `ILabelVectorSet`. - * - * @return An unique pointer to an object of type `ILabelVectorSet` that has been created - */ -MLRLCOMMON_API std::unique_ptr createLabelVectorSet(); diff --git a/cpp/subprojects/common/include/common/prediction/prediction_matrix_dense.hpp b/cpp/subprojects/common/include/common/prediction/prediction_matrix_dense.hpp deleted file mode 100644 index 8830833d..00000000 --- a/cpp/subprojects/common/include/common/prediction/prediction_matrix_dense.hpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_c_contiguous.hpp" - -/** - * A dense matrix that provides read-only access to predictions that are stored in a C-contiguous array. - * - * @tparam T The type of the predictions that are stored by the matrix - */ -template -class MLRLCOMMON_API DensePredictionMatrix final : public CContiguousView { - private: - - T* array_; - - public: - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - */ - DensePredictionMatrix(uint32 numRows, uint32 numCols); - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - * @param init True, if all elements in the matrix should be value-initialized, false otherwise - */ - DensePredictionMatrix(uint32 numRows, uint32 numCols, bool init); - - ~DensePredictionMatrix() override; - - /** - * Returns a pointer to the array that stores the predictions. - * - * @return A pointer to the array that stores the predictions - */ - T* get(); - - /** - * Releases the ownership of the array that stores the predictions. The caller is responsible for freeing the - * memory that is occupied by the array. - * - * @return A pointer to the array that stores the predictions - */ - T* release(); -}; diff --git a/cpp/subprojects/common/include/common/prediction/prediction_matrix_sparse_binary.hpp b/cpp/subprojects/common/include/common/prediction/prediction_matrix_sparse_binary.hpp deleted file mode 100644 index 483fb4c0..00000000 --- a/cpp/subprojects/common/include/common/prediction/prediction_matrix_sparse_binary.hpp +++ /dev/null @@ -1,85 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/matrix_lil_binary.hpp" -#include "common/data/view_csr_binary.hpp" - -#include - -/** - * A sparse matrix that provides read-only access to binary predictions that are stored in the compressed sparse row - * (CSR) format. - * - * The matrix maintains two arrays, referred to as `rowIndices` and `colIndices`. The latter stores a column-index for - * each of the `numNonZeroValues` non-zero elements in the matrix. The former stores `numRows + 1` row-indices that - * specify the first element in `colIndices` that correspond to a certain row. The index at the last position is equal - * to the number of non-zero values in the matrix. - */ -class MLRLCOMMON_API BinarySparsePredictionMatrix final : public BinaryCsrConstView { - private: - - uint32* rowIndices_; - - uint32* colIndices_; - - public: - - /** - * @param numRows The number of rows in the matrix - * @param numCols The number of columns in the matrix - * @param rowIndices A pointer to an array of type `uint32`, shape `(numRows + 1)`, that stores the indices - * of the first element in `colIndices` that corresponds to a certain row. The index at the - * last position is equal to `numNonZeroValues` - * @param colIndices A pointer to an array of type `uint32`, shape `(numNonZeroValues)`, that stores the - * column-indices, the non-zero elements correspond to - */ - BinarySparsePredictionMatrix(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices); - - ~BinarySparsePredictionMatrix() override; - - /** - * Returns a pointer to the array `rowIndices`. - * - * @return A pointer to the array `rowIndices` - */ - uint32* getRowIndices(); - - /** - * Releases the ownership of the array `rowIndices`. The caller is responsible for freeing the memory that is - * occupied by the array. - * - * @return A pointer to the array `rowIndices` - */ - uint32* releaseRowIndices(); - - /** - * Returns a pointer to the array `colIndices`. - * - * @return A pointer to the array `colIndices` - */ - uint32* getColIndices(); - - /** - * Releases the ownership of the array `colIndices`. The caller is responsible for freeing the memory that is - * occupied by the array. - * - * @return A pointer to the array `colIndices` - */ - uint32* releaseColIndices(); -}; - -/** - * Creates and returns a new object of the type `BinarySparsePredictionMatrix` as a copy of an existing - * `BinaryLilMatrix`. - * - * @param lilMatrix A reference to an object of type `BinaryLilMatrix` to be copied - * @param numCols The number of columns of the given `BinaryLilMatrix` - * @param numNonZeroElements The number of non-zero elements in the given `BinaryLilMatrix` - * @return An unique pointer to an object of type `BinarySparsePredictionMatrix` that has been - * created - */ -std::unique_ptr createBinarySparsePredictionMatrix(const BinaryLilMatrix& lilMatrix, - uint32 numCols, - uint32 numNonZeroElements); diff --git a/cpp/subprojects/common/include/common/prediction/predictor.hpp b/cpp/subprojects/common/include/common/prediction/predictor.hpp deleted file mode 100644 index 0a4adb95..00000000 --- a/cpp/subprojects/common/include/common/prediction/predictor.hpp +++ /dev/null @@ -1,120 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_matrix_row_wise.hpp" - -#include - -/** - * Defines an interface for all classes that allow to obtain predictions for given query examples incrementally. - * - * @tparam PredictionMatrix The type of the matrix that is used to store the predictions - */ -template -class IIncrementalPredictor { - public: - - virtual ~IIncrementalPredictor() {}; - - /** - * Returns whether there are any remaining ensemble members that have not been used yet or not. - * - * @return True, if there are any remaining ensemble members, false otherwise - */ - virtual bool hasNext() const { - return this->getNumNext() > 0; - } - - /** - * Returns the number of remaining ensemble members that have not been used yet. - * - * @return The number of remaining ensemble members - */ - virtual uint32 getNumNext() const = 0; - - /** - * Updates the current predictions by considering several of the remaining ensemble members. If not enough - * ensemble members are remaining, only the available ones will be used for updating the current predictions. - * - * @param stepSize The number of additional ensemble members to be considered for prediction - * @return A reference to an object of template type `PredictionMatrix` that stores the updated - * predictions - */ - virtual PredictionMatrix& applyNext(uint32 stepSize) = 0; -}; - -/** - * Defines an interface for all classes that allow to obtain predictions for given query examples. - * - * @tparam PredictionMatrix The type of the matrix that is used to store the predictions - */ -template -class IPredictor { - public: - - virtual ~IPredictor() {}; - - /** - * Obtains and returns predictions for all query examples. - * - * @param maxRules The maximum number of rules to be used for prediction or 0, if the number of rules should - * not be restricted - * @return An unique pointer to an object of template type `PredictionMatrix` that stores the - * predictions - */ - virtual std::unique_ptr predict(uint32 maxRules = 0) const = 0; - - /** - * Returns whether the predictor allows to obtain predictions incrementally or not. - * - * @return True, if the predictor allows to obtain predictions incrementally, false otherwise - */ - virtual bool canPredictIncrementally() const = 0; - - /** - * Creates and returns a predictor that may be used to obtain predictions incrementally. If incremental - * prediction is not supported, a `std::runtime_error` is thrown. - * - * @throws std::runtime_exception The exception that is thrown if incremental prediction is not supported - * @param maxRules The maximum number of rules to be used for prediction. Must be at least 1 or - * 0, if the number of rules should not be restricted - * @return An unique pointer to an object of type `IIncrementalPredictor` that may be - * used to obtain predictions incrementally - */ - virtual std::unique_ptr> createIncrementalPredictor( - uint32 maxRules = 0) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a predictor. - * - * @tparam PredictorFactory The type of the factory that allows to create instances of the predictor - */ -template -class IPredictorConfig { - public: - - virtual ~IPredictorConfig() {}; - - /** - * Creates and returns a new object of type `IPredictorFactory` according to the specified configuration. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise access - * to the feature values of the query examples to predict for - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of template type `PredictorFactory` that has been created - */ - virtual std::unique_ptr createPredictorFactory(const IRowWiseFeatureMatrix& featureMatrix, - uint32 numLabels) const = 0; - - /** - * Returns whether the predictor needs access to the label vectors that are encountered in the training data or - * not. - * - * @return True, if the predictor needs access to the label vectors that are encountered in the training data, - * false otherwise - */ - virtual bool isLabelVectorSetNeeded() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/prediction/predictor_binary.hpp b/cpp/subprojects/common/include/common/prediction/predictor_binary.hpp deleted file mode 100644 index aef6a2df..00000000 --- a/cpp/subprojects/common/include/common/prediction/predictor_binary.hpp +++ /dev/null @@ -1,173 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_c_contiguous.hpp" -#include "common/data/view_csr.hpp" -#include "common/model/rule_list.hpp" -#include "common/prediction/label_vector_set.hpp" -#include "common/prediction/prediction_matrix_dense.hpp" -#include "common/prediction/prediction_matrix_sparse_binary.hpp" -#include "common/prediction/predictor.hpp" -#include "common/prediction/probability_calibration_marginal.hpp" - -/** - * Defines an interface for all classes that allow to predict binary labels for given query examples. - */ -class IBinaryPredictor : virtual public IPredictor> { - public: - - virtual ~IBinaryPredictor() override {}; -}; - -/** - * Defines an interface for all classes that allow to create instances of the type `IBinaryPredictor`. - */ -class IBinaryPredictorFactory { - public: - - virtual ~IBinaryPredictorFactory() {}; - - /** - * Creates and returns a new object of the type `IBinaryPredictor`. - * - * @param featureMatrix A reference to an object of type `CsrConstView` that stores the - * feature values of the query examples to predict for - * @param model A reference to an object of type `RuleList` that should be used - * to obtain predictions - * @param labelVectorSet A pointer to an object of type `LabelVectorSet` that stores all - * known label vectors or a null pointer, if no such set is - * available - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IBinaryPredictor` that - * has been created - */ - virtual std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new object of the type `IBinaryPredictor`. - * - * @param featureMatrix A reference to an object of type `CsrConstView` that stores the - * feature values of the query examples to predict for - * @param model A reference to an object of type `RuleList` that should be used - * to obtain predictions - * @param labelVectorSet A pointer to an object of type `LabelVectorSet` that stores all - * known label vectors or a null pointer, if no such set is - * available - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IBinaryPredictor` that - * has been created - */ - virtual std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; -}; - -/** - * Defines an interface for all classes that allow to predict sparse binary labels for given query examples. - */ -class ISparseBinaryPredictor : public IPredictor { - public: - - virtual ~ISparseBinaryPredictor() override {}; -}; - -/** - * Defines an interface for all classes that allow to create instances of the type `ISparseBinaryPredictor`. - */ -class ISparseBinaryPredictorFactory { - public: - - virtual ~ISparseBinaryPredictorFactory() {}; - - /** - * Creates and returns a new object of the type `ISparseBinaryPredictor`. - * - * @param featureMatrix A reference to an object of type `CsrConstView` that stores the - * feature values of the query examples to predict for - * @param model A reference to an object of type `RuleList` that should be used - * to obtain predictions - * @param labelVectorSet A pointer to an object of type `LabelVectorSet` that stores all - * known label vectors or a null pointer, if no such set is - * available - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictor` - * that has been created - */ - virtual std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new object of the type `ISparseBinaryPredictor`. - * - * @param featureMatrix A reference to an object of type `CsrConstView` that stores the - * feature values of the query examples to predict for - * @param model A reference to an object of type `RuleList` that should be used - * to obtain predictions - * @param labelVectorSet A pointer to an object of type `LabelVectorSet` that stores all - * known label vectors or a null pointer, if no such set is - * available - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictor` - * that has been created - */ - virtual std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure an `IBinaryPredictor` or `ISparseBinaryPredictor`. - */ -class IBinaryPredictorConfig : public IPredictorConfig { - public: - - virtual ~IBinaryPredictorConfig() override {}; - - /** - * Creates and returns a new object of type `ISparseBinaryPredictorFactory` according to the specified - * configuration. - * - * @param featureMatrix A reference to an object of type `IRowWiseFeatureMatrix` that provides row-wise access - * to the feature values of the query examples to predict for - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `ISparseBinaryPredictorFactory` that has been - * created - */ - virtual std::unique_ptr createSparsePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/prediction/predictor_common.hpp b/cpp/subprojects/common/include/common/prediction/predictor_common.hpp deleted file mode 100644 index a8b8ae3a..00000000 --- a/cpp/subprojects/common/include/common/prediction/predictor_common.hpp +++ /dev/null @@ -1,228 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/prediction/predictor.hpp" -#include "omp.h" - -/** - * Allows to obtain predictions for multiple query examples by delegating the prediction for individual examples to - * another class. - * - * @tparam T The type of the predictions - * @tparam FeatureMatrix The type of the feature matrix that provides row-wise access to the feature values of the - * query examples - * @tparam Model The type of the rule-based model that is used to obtain predictions - */ -template -class PredictionDispatcher final { - public: - - /** - * Defines an interface for all classes, the prediction for individual examples can be delegated to by a - * `PredictionDispatcher`. - */ - class IPredictionDelegate { - public: - - virtual ~IPredictionDelegate() {}; - - /** - * Obtains predictions for a single query example. - * - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that provides - * row-wise access to the feature values of the query examples - * @param rulesBegin An iterator of type `Model::const_iterator` to the first rule that should be - * used for prediction - * @param rulesEnd An iterator of type `Model::const_iterator` to the last rule (exclusive) - * that should be used for prediction - * @param threadIndex The index of the thread used for prediction - * @param exampleIndex The index of the query example to predict for - * @param predictionIndex The index of the row in the prediction matrix, where the predictions should - * be stored - */ - virtual void predictForExample(const FeatureMatrix& featureMatrix, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd, uint32 threadIndex, - uint32 exampleIndex, uint32 predictionIndex) const = 0; - }; - - /** - * Obtains predictions for multiple query examples by delegating the prediction for individual examples to a - * given `PredictionDelegate`. - * - * @param delegate A reference to an object of type `IPredictionDelegate`, the prediction for individual - * examples should be delegated to - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that provides row-wise access - * to the feature values of the query examples - * @param rulesBegin An iterator of type `Model::const_iterator` to the first rule that should be used for - * prediction - * @param rulesEnd An iterator of type `Model::const_iterator` to the last rule (exclusive) that should be - * used for prediction - * @param numThreads The number of CPU threads to be used to make predictions for different query examples in - * parallel. Must be at least 1 - */ - void predict(const IPredictionDelegate& delegate, const FeatureMatrix& featureMatrix, - typename Model::const_iterator rulesBegin, typename Model::const_iterator rulesEnd, - uint32 numThreads) const { - uint32 numExamples = featureMatrix.getNumRows(); - const IPredictionDelegate* delegatePtr = &delegate; - const FeatureMatrix* featureMatrixPtr = &featureMatrix; - -#pragma omp parallel for firstprivate(numExamples) firstprivate(delegatePtr) firstprivate(rulesBegin) \ - firstprivate(rulesEnd) firstprivate(featureMatrixPtr) schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numExamples; i++) { - uint32 threadIndex = (uint32) omp_get_thread_num(); - delegatePtr->predictForExample(*featureMatrixPtr, rulesBegin, rulesEnd, threadIndex, i, i); - } - } -}; - -/** - * Allows to obtain sparse binary predictions for multiple query examples by delegating the prediction for individual - * examples to another class. - * - * @tparam FeatureMatrix The type of the feature matrix that provides row-wise access to the feature values of the - * query examples - * @tparam Model The type of the rule-based model that is used to obtain predictions - */ -template -class BinarySparsePredictionDispatcher final { - public: - - /** - * Defines an interface for all classes, the prediction for individual examples can be delegated to by a - * `BinarySparsePredictionDispatcher`. - */ - class IPredictionDelegate { - public: - - virtual ~IPredictionDelegate() {}; - - /** - * Obtains predictions for a single query example. - * - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that provides - * row-wise access to the feature values of the query examples - * @param rulesBegin An iterator of type `Model::const_iterator` to the first rule that should be - * used for prediction - * @param rulesEnd An iterator of type `Model::const_iterator` to the last rule (exclusive) - * that should be used for prediction - * @param threadIndex The index of the thread used for prediction - * @param exampleIndex The index of the query example to predict for - * @param predictionIndex The index of the row in the prediction matrix, where the predictions should - * be stored - * @return The number of non-zero predictions - */ - virtual uint32 predictForExample(const FeatureMatrix& featureMatrix, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd, uint32 threadIndex, - uint32 exampleIndex, uint32 predictionIndex) const = 0; - }; - - /** - * Obtains predictions for multiple query examples by delegating the prediction for individual examples to a - * given `IPredictionDelegate`. - * - * @param delegate A reference to an object of type `IPredictionDelegate`, the prediction for individual - * examples should be delegated to - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that provides row-wise access - * to the feature values of the query examples - * @param rulesBegin An iterator of type `Model::const_iterator` to the first rule that should be used for - * prediction - * @param rulesEnd An iterator of type `Model::const_iterator` to the last rule (exclusive) that should be - * used for prediction - * @param numThreads The number of CPU threads to be used to make predictions for different query examples in - * parallel. Must be at least 1 - * @return The total number of non-zero predictions - */ - uint32 predict(const IPredictionDelegate& delegate, const FeatureMatrix& featureMatrix, - typename Model::const_iterator rulesBegin, typename Model::const_iterator rulesEnd, - uint32 numThreads) const { - uint32 numExamples = featureMatrix.getNumRows(); - const IPredictionDelegate* delegatePtr = &delegate; - const FeatureMatrix* featureMatrixPtr = &featureMatrix; - uint32 numNonZeroElements = 0; - -#pragma omp parallel for reduction(+ : numNonZeroElements) firstprivate(numExamples) firstprivate(delegatePtr) \ - firstprivate(rulesBegin) firstprivate(rulesEnd) firstprivate(featureMatrixPtr) schedule(dynamic) \ - num_threads(numThreads) - for (int64 i = 0; i < numExamples; i++) { - uint32 threadIndex = (uint32) omp_get_thread_num(); - numNonZeroElements += - delegatePtr->predictForExample(*featureMatrixPtr, rulesBegin, rulesEnd, threadIndex, i, i); - } - - return numNonZeroElements; - } -}; - -/** - * An abstract base class for all implementations of the class `IIncrementalPredictor`. - * - * @tparam FeatureMatrix The type of the feature matrix that provides row-wise access to the feature values of the - * query examples - * @tparam Model The type of the rule-based model that is used to obtain predictions - * @tparam PredictionMatrix The type of the matrix that is used to store the predictions - */ -template -class AbstractIncrementalPredictor : public IIncrementalPredictor { - private: - - const FeatureMatrix& featureMatrix_; - - const uint32 numThreads_; - - typename Model::const_iterator current_; - - typename Model::const_iterator end_; - - protected: - - /** - * Must be implemented by subclasses in order to obtain predictions. - * - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that provides row-wise access - * to the feature values of the query examples - * @param numThreads The number of CPU threads to be used to make predictions for different query examples in - * parallel. Must be at least 1 - * @param rulesBegin An iterator of type `Model::const_iterator` to the first rule that should be used for - * prediction - * @param rulesEnd An iterator of type `Model::const_iterator` to the last rule (exclusive) that should be - * used for prediction - * @return A reference to an object of template type `PredictionMatrix` that stores the predictions - */ - virtual PredictionMatrix& applyNext(const FeatureMatrix& featureMatrix, uint32 numThreads, - typename Model::const_iterator rulesBegin, - typename Model::const_iterator rulesEnd) = 0; - - public: - - /** - * @param featureMatrix A reference to an object of template type `FeatureMatrix` that provides row-wise access - * to the feature values of the query examples - * @param model A reference to an object of template type `Model` that should be used for prediction - * @param numThreads The number of CPU threads to be used to make predictions for different query examples in - * parallel. Must be at least 1 - * @param maxRules The maximum number of rules to be used for prediction. Must be at least 1 or 0, if the - * number of rules should not be restricted - */ - AbstractIncrementalPredictor(const FeatureMatrix& featureMatrix, const Model& model, uint32 numThreads, - uint32 maxRules) - : featureMatrix_(featureMatrix), numThreads_(numThreads), current_(model.used_cbegin(maxRules)), - end_(model.used_cend(maxRules)) {} - - virtual ~AbstractIncrementalPredictor() override {}; - - uint32 getNumNext() const override final { - return (uint32) (end_ - current_); - } - - PredictionMatrix& applyNext(uint32 stepSize) override final { - typename Model::const_iterator next = current_ + std::min(stepSize, this->getNumNext()); - PredictionMatrix& predictionMatrix = this->applyNext(featureMatrix_, numThreads_, current_, next); - current_ = next; - return predictionMatrix; - } -}; diff --git a/cpp/subprojects/common/include/common/prediction/predictor_probability.hpp b/cpp/subprojects/common/include/common/prediction/predictor_probability.hpp deleted file mode 100644 index 8cd0b0fb..00000000 --- a/cpp/subprojects/common/include/common/prediction/predictor_probability.hpp +++ /dev/null @@ -1,90 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_c_contiguous.hpp" -#include "common/data/view_csr.hpp" -#include "common/model/rule_list.hpp" -#include "common/prediction/label_vector_set.hpp" -#include "common/prediction/prediction_matrix_dense.hpp" -#include "common/prediction/predictor.hpp" -#include "common/prediction/probability_calibration_marginal.hpp" - -/** - * Defines an interface for all classes that allow to predict probability estimates for given query examples. - */ -class IProbabilityPredictor : public IPredictor> { - public: - - virtual ~IProbabilityPredictor() override {}; -}; - -/** - * Defines an interface for all classes that allow to create instances of the type `IProbabilityPredictor`. - */ -class IProbabilityPredictorFactory { - public: - - virtual ~IProbabilityPredictorFactory() {}; - - /** - * Creates and returns a new object of the type `IProbabilityPredictor`. - * - * @param featureMatrix A reference to an object of type `CsrConstView` that stores the - * feature values of the query examples to predict for - * @param model A reference to an object of type `RuleList` that should be used - * to obtain predictions - * @param labelVectorSet A pointer to an object of type `LabelVectorSet` that stores all - * known label vectors or a null pointer, if no such set is - * available - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IProbabilityPredictor` - * that has been created - */ - virtual std::unique_ptr create( - const CContiguousConstView& featureMatrix, const RuleList& model, - const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; - - /** - * Creates and returns a new object of the type `IProbabilityPredictor`. - * - * @param featureMatrix A reference to an object of type `CsrConstView` that stores the - * feature values of the query examples to predict for - * @param model A reference to an object of type `RuleList` that should be used - * to obtain predictions - * @param labelVectorSet A pointer to an object of type `LabelVectorSet` that stores all - * known label vectors or a null pointer, if no such set is - * available - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param jointProbabilityCalibrationModel A reference to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IProbabilityPredictor` - * that has been created - */ - virtual std::unique_ptr create( - const CsrConstView& featureMatrix, const RuleList& model, const LabelVectorSet* labelVectorSet, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a `IProbabilityPredictor`. - */ -class IProbabilityPredictorConfig : public IPredictorConfig { - public: - - virtual ~IProbabilityPredictorConfig() override {}; -}; diff --git a/cpp/subprojects/common/include/common/prediction/predictor_score.hpp b/cpp/subprojects/common/include/common/prediction/predictor_score.hpp deleted file mode 100644 index d91ef958..00000000 --- a/cpp/subprojects/common/include/common/prediction/predictor_score.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_c_contiguous.hpp" -#include "common/data/view_csr.hpp" -#include "common/model/rule_list.hpp" -#include "common/prediction/label_vector_set.hpp" -#include "common/prediction/prediction_matrix_dense.hpp" -#include "common/prediction/predictor.hpp" - -/** - * Defines an interface for all classes that allow to predict regression scores for given query examples. - */ -class IScorePredictor : public IPredictor> { - public: - - virtual ~IScorePredictor() override {}; -}; - -/** - * Defines an interface for all classes that allow to create instances of the type `IScorePredictor`. - */ -class IScorePredictorFactory { - public: - - virtual ~IScorePredictorFactory() {}; - - /** - * Creates and returns a new object of the type `IScorePredictor`. - * - * @param featureMatrix A reference to an object of type `CsrConstView` that stores the feature values of - * the query examples to predict for - * @param model A reference to an object of type `RuleList` that should be used to obtain - * predictions - * @param labelVectorSet A pointer to an object of type `LabelVectorSet` that stores all known label vectors - * or a null pointer, if no such set is available - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IScorePredictor` that has been created - */ - virtual std::unique_ptr create(const CContiguousConstView& featureMatrix, - const RuleList& model, const LabelVectorSet* labelVectorSet, - uint32 numLabels) const = 0; - - /** - * Creates and returns a new object of the type `IScorePredictor`. - * - * @param featureMatrix A reference to an object of type `CsrConstView` that stores the feature values of - * the query examples to predict for - * @param model A reference to an object of type `RuleList` that should be used to obtain - * predictions - * @param labelVectorSet A pointer to an object of type `LabelVectorSet` that stores all known label vectors - * or a null pointer, if no such set is available - * @param numLabels The number of labels to predict for - * @return An unique pointer to an object of type `IScorePredictor` that has been created - */ - virtual std::unique_ptr create(const CsrConstView& featureMatrix, - const RuleList& model, const LabelVectorSet* labelVectorSet, - uint32 numLabels) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a `IScorePredictor`. - */ -class IScorePredictorConfig : public IPredictorConfig { - public: - - virtual ~IScorePredictorConfig() override {}; -}; diff --git a/cpp/subprojects/common/include/common/prediction/probability_calibration.hpp b/cpp/subprojects/common/include/common/prediction/probability_calibration.hpp deleted file mode 100644 index 0a9dc10c..00000000 --- a/cpp/subprojects/common/include/common/prediction/probability_calibration.hpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -/** - * Defines an interface for all classes that implement a method for fitting models for the calibration of probabilities. - * - * @tparam ProbabilityCalibrationModel The type of the calibration model that is fitted by the calibrator - */ -template -class IProbabilityCalibrator { - public: - - virtual ~IProbabilityCalibrator() {}; - - /** - * Fits and returns a model for the calibration of probabilities. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices of - * the training examples that are included in the training set - * @param labelMatrix A reference to an object of type `CContiguousLabelMatrix` that provides row-wise access to - * the labels of the training examples - * @param statistics A reference to an object of type `IStatistics` that provides access to statistics about - * the labels of the training examples - * @return An unique pointer to an object of template type `ProbabilityCalibrationModel` that has - * been fit - */ - virtual std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CContiguousLabelMatrix& labelMatrix, - const IStatistics& statistics) const = 0; - - /** - * Fits and returns a model for the calibration of probabilities. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices of - * the training examples that are included in the training set - * @param labelMatrix A reference to an object of type `CsrLabelMatrix` that provides row-wise access to the - * labels of the training examples - * @param statistics A reference to an object of type `IStatistics` that provides access to statistics about - * the labels of the training examples - * @return An unique pointer to an object of template type `ProbabilityCalibrationModel` that has - * been fit - */ - virtual std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CsrLabelMatrix& labelMatrix, const IStatistics& statistics) const = 0; - - /** - * Fits and returns a model for the calibration of probabilities. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of the - * training examples that are included in the training set and the holdout set, respectively - * @param labelMatrix A reference to an object of type `CContiguousLabelMatrix` that provides row-wise access to - * the labels of the training examples - * @param statistics A reference to an object of type `IStatistics` that provides access to statistics about - * the labels of the training examples - * @return An unique pointer to an object of template type `ProbabilityCalibrationModel` that has - * been fit - */ - virtual std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CContiguousLabelMatrix& labelMatrix, const IStatistics& statistics) const = 0; - - /** - * Fits and returns a model for the calibration of probabilities. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of the - * training examples that are included in the training set and the holdout set, respectively - * @param labelMatrix A reference to an object of type `CsrLabelMatrix` that provides row-wise access to the - * labels of the training examples - * @param statistics A reference to an object of type `IStatistics` that provides access to statistics about - * the labels of the training examples - * @return An unique pointer to an object of template type `ProbabilityCalibrationModel` that has - * been fit - */ - virtual std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CsrLabelMatrix& labelMatrix, const IStatistics& statistics) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/prediction/probability_calibration_isotonic.hpp b/cpp/subprojects/common/include/common/prediction/probability_calibration_isotonic.hpp deleted file mode 100644 index 01658af5..00000000 --- a/cpp/subprojects/common/include/common/prediction/probability_calibration_isotonic.hpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/list_of_lists.hpp" -#include "common/data/tuple.hpp" -#include "common/prediction/probability_calibration_joint.hpp" - -#include - -/** - * Defines an interface for all models for the calibration of marginal or joint probabilities via isotonic regression. - */ -class MLRLCOMMON_API IIsotonicProbabilityCalibrationModel : public IMarginalProbabilityCalibrationModel, - public IJointProbabilityCalibrationModel { - public: - - virtual ~IIsotonicProbabilityCalibrationModel() override {}; - - /** - * A visitor function for handling individual bins. - */ - typedef std::function BinVisitor; - - /** - * Returns the number of available list of bins. - * - * @return The number of available list of bins - */ - virtual uint32 getNumBinLists() const = 0; - - /** - * Adds a new bin to the calibration model. - * - * @param listIndex The index of the list, the bin should be added to - * @param threshold The threshold of the bin - * @param probability The probability of the bin - */ - virtual void addBin(uint32 listIndex, float64 threshold, float64 probability) = 0; - - /** - * Invokes the given visitor function for each bin that is contained by the calibration model. - * - * @param visitor The visitor function for handling individual bins - */ - virtual void visit(BinVisitor visitor) const = 0; -}; - -/** - * A model for the calibration of marginal or joint probabilities via isotonic regression. - */ -class IsotonicProbabilityCalibrationModel final : public IIsotonicProbabilityCalibrationModel { - private: - - ListOfLists> binsPerList_; - - public: - - /** - * @param numLists The total number of lists for storing bins - */ - IsotonicProbabilityCalibrationModel(uint32 numLists); - - /** - * Provides access to the bins that belong to a specific list and allows to modify them. - */ - typedef ListOfLists>::row bin_list; - - /** - * Provides read-only access to the bins that belong to a specific list. - */ - typedef ListOfLists>::const_row const_bin_list; - - /** - * Provides access to the bins that belong to the list at a specific index and allows to modify its elements. - * - * @param listIndex The index of the list - * @return A `bin_list` - */ - bin_list operator[](uint32 listIndex); - - /** - * Provides read-only access to the bins that belong to the list at a specific index. - * - * @param listIndex The index of the list - * @return A `const_bin_list` - */ - const_bin_list operator[](uint32 listIndex) const; - - /** - * Fits the isotonic calibration model. - */ - void fit(); - - float64 calibrateMarginalProbability(uint32 labelIndex, float64 marginalProbability) const override; - - float64 calibrateJointProbability(uint32 labelVectorIndex, float64 jointProbability) const override; - - uint32 getNumBinLists() const override; - - void addBin(uint32 listIndex, float64 threshold, float64 probability) override; - - void visit(BinVisitor visitor) const override; -}; - -/** - * Creates and returns a new object of the type `IIsotonicProbabilityCalibrationModel`. - * - * @param numLists The total number of lists for storing bins - * @return An unique pointer to an object of type `IIsotonicProbabilityCalibrationModel` that has been created - */ -MLRLCOMMON_API std::unique_ptr createIsotonicProbabilityCalibrationModel( - uint32 numLists); diff --git a/cpp/subprojects/common/include/common/prediction/probability_calibration_joint.hpp b/cpp/subprojects/common/include/common/prediction/probability_calibration_joint.hpp deleted file mode 100644 index 9c9ec9d6..00000000 --- a/cpp/subprojects/common/include/common/prediction/probability_calibration_joint.hpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/prediction/label_vector_set.hpp" -#include "common/prediction/probability_calibration_marginal.hpp" - -/** - * Defines an interface for all classes that implement a model for the calibration of joint probabilities. - */ -class MLRLCOMMON_API IJointProbabilityCalibrationModel { - public: - - virtual ~IJointProbabilityCalibrationModel() {}; - - /** - * Calibrates a joint probability. - * - * @param labelVectorIndex The index of the label vector, the probability is predicted for - * @param jointProbability The joint probability to be calibrated - * @return The calibrated probability - */ - virtual float64 calibrateJointProbability(uint32 labelVectorIndex, float64 jointProbability) const = 0; -}; - -/** - * Defines an interface for all classes that implement a method for fitting models for the calibration of joint - * probabilities. - */ -class IJointProbabilityCalibrator : public IProbabilityCalibrator { - public: - - virtual ~IJointProbabilityCalibrator() override {}; -}; - -/** - * Defines an interface for all classes that allow to create instances of the type `IJointProbabilityCalibrator`. - */ -class IJointProbabilityCalibratorFactory { - public: - - virtual ~IJointProbabilityCalibratorFactory() {}; - - /** - * Creates and returns a new object of type `IJointProbabilityCalibrator`. - * - * @param marginalProbabilityCalibrationModel A reference to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for the - * calibration of marginal probabilities - * @param labelVectorSet A pointer to an object of type `LabelVectorSet` that stores all - * known label vectors or a null pointer, if no such object is - * available - * @return An unique pointer to an object of type - * `IJointProbabilityCalibrator` that has been created - */ - virtual std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const LabelVectorSet* labelVectorSet) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a method for fitting a model for the calibration of - * joint probabilities. - */ -class IJointProbabilityCalibratorConfig { - public: - - virtual ~IJointProbabilityCalibratorConfig() {}; - - /** - * Returns whether a holdout set should be used, if available, or not. - * - * @return True, if a holdout set should be used, false otherwise - */ - virtual bool shouldUseHoldoutSet() const = 0; - - /** - * Returns whether the calibrator needs access to the label vectors that are encountered in the training data or - * not. - * - * @return True, if the calibrator needs access to the label vectors that are encountered in the training data, - * false otherwise - */ - virtual bool isLabelVectorSetNeeded() const = 0; - - /** - * Creates and returns a new object of template type `IJointProbabilityCalibratorFactory` according to the - * configuration. - * - * @return An unique pointer to an object of template type `IJointProbabilityCalibratorFactory` that has been - * created - */ - virtual std::unique_ptr createJointProbabilityCalibratorFactory() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/prediction/probability_calibration_marginal.hpp b/cpp/subprojects/common/include/common/prediction/probability_calibration_marginal.hpp deleted file mode 100644 index b268572e..00000000 --- a/cpp/subprojects/common/include/common/prediction/probability_calibration_marginal.hpp +++ /dev/null @@ -1,83 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/label_matrix_c_contiguous.hpp" -#include "common/input/label_matrix_csr.hpp" -#include "common/macros.hpp" -#include "common/prediction/probability_calibration.hpp" -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/partition_single.hpp" -#include "common/statistics/statistics.hpp" - -/** - * Defines an interface for all classes that implement a model for the calibration of marginal probabilities. - */ -class MLRLCOMMON_API IMarginalProbabilityCalibrationModel { - public: - - virtual ~IMarginalProbabilityCalibrationModel() {}; - - /** - * Calibrates the marginal probability that is predicted for a specific label. - * - * @param labelIndex The index of the label, the probability is predicted for - * @param marginalProbability The marginal probability to be calibrated - * @return The calibrated probability - */ - virtual float64 calibrateMarginalProbability(uint32 labelIndex, float64 marginalProbability) const = 0; -}; - -/** - * Defines an interface for all classes that implement a method for fitting models for the calibration of marginal - * probabilities. - */ -class IMarginalProbabilityCalibrator : public IProbabilityCalibrator { - public: - - virtual ~IMarginalProbabilityCalibrator() override {}; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IMarginalProbabilityCalibrator`. - */ -class IMarginalProbabilityCalibratorFactory { - public: - - virtual ~IMarginalProbabilityCalibratorFactory() {}; - - /** - * Creates and returns a new object of type `IMarginalProbabilityCalibrator`. - * - * @return An unique pointer to an object of type `IMarginalProbabilityCalibrator` that has been created - */ - virtual std::unique_ptr create() const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a method for fitting a model for the calibration of - * marginal probabilities. - */ -class IMarginalProbabilityCalibratorConfig { - public: - - virtual ~IMarginalProbabilityCalibratorConfig() {}; - - /** - * Returns whether a holdout set should be used, if available, or not. - * - * @return True, if a holdout set should be used, false otherwise - */ - virtual bool shouldUseHoldoutSet() const = 0; - - /** - * Creates and returns a new object of template type `IMarginalProbabilityCalibratorFactory` according to the - * configuration. - * - * @return An unique pointer to an object of template type `IMarginalProbabilityCalibratorFactory` that has been - * created - */ - virtual std::unique_ptr createMarginalProbabilityCalibratorFactory() - const = 0; -}; diff --git a/cpp/subprojects/common/include/common/prediction/probability_calibration_no.hpp b/cpp/subprojects/common/include/common/prediction/probability_calibration_no.hpp deleted file mode 100644 index 7e493698..00000000 --- a/cpp/subprojects/common/include/common/prediction/probability_calibration_no.hpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/prediction/probability_calibration_joint.hpp" - -/** - * Defines an interface for all models for the calibration of marginal or joint probabilities that do make any - * adjustments. - */ -class MLRLCOMMON_API INoProbabilityCalibrationModel : public IMarginalProbabilityCalibrationModel, - public IJointProbabilityCalibrationModel { - public: - - virtual ~INoProbabilityCalibrationModel() override {}; -}; - -/** - * A factory that allows to create instances of the type `IMarginalProbabilityCalibrator` that do not fit a model for - * the calibration of marginal probabilities. - */ -class NoMarginalProbabilityCalibratorFactory final : public IMarginalProbabilityCalibratorFactory { - public: - - virtual ~NoMarginalProbabilityCalibratorFactory() {}; - - std::unique_ptr create() const override; -}; - -/** - * Allows to configure a calibrator that does not fit a model for the calibration of marginal probabilities. - */ -class NoMarginalProbabilityCalibratorConfig final : public IMarginalProbabilityCalibratorConfig { - public: - - bool shouldUseHoldoutSet() const override; - - std::unique_ptr createMarginalProbabilityCalibratorFactory() - const override; -}; - -/** - * A factory that allows to create instances of the class `IJointProbabilityCalibrator` that do not fit a model for the - * calibration of joint probabilities. - */ -class NoJointProbabilityCalibratorFactory final : public IJointProbabilityCalibratorFactory { - public: - - std::unique_ptr create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const LabelVectorSet* labelVectorSet) const override; -}; - -/** - * Allows to configure a calibrator that does not fit a model for the calibration of joint probabilities. - */ -class NoJointProbabilityCalibratorConfig final : public IJointProbabilityCalibratorConfig { - public: - - bool shouldUseHoldoutSet() const override; - - bool isLabelVectorSetNeeded() const override; - - std::unique_ptr createJointProbabilityCalibratorFactory() const override; -}; - -/** - * Creates and returns a new object of the type `INoProbabilityCalibrationModel`. - * - * @return An unique pointer to an object of type `INoProbabilityCalibrationModel` that has been created - */ -MLRLCOMMON_API std::unique_ptr createNoProbabilityCalibrationModel(); diff --git a/cpp/subprojects/common/include/common/rule_evaluation/rule_compare_function.hpp b/cpp/subprojects/common/include/common/rule_evaluation/rule_compare_function.hpp deleted file mode 100644 index c8656d5a..00000000 --- a/cpp/subprojects/common/include/common/rule_evaluation/rule_compare_function.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/util/quality.hpp" - -#include - -/** - * Defines a function for comparing the quality of different rules. - */ -struct RuleCompareFunction { - public: - - /** - * A function for comparing two objects of type `Quality`. It should return true, if the first object is better - * than the second one, false otherwise. - */ - typedef std::function CompareFunction; - - /** - * @param c A function of type `CompareFunction` for comparing the quality of different rules - * @param m The minimum quality of a rule - */ - RuleCompareFunction(CompareFunction c, float64 m) : compare(c), minQuality(m) {}; - - /** - * A function of type `CompareFunction` for comparing the quality of different rules. - */ - const CompareFunction compare; - - /** - * The minimum quality of a rule. - */ - const float64 minQuality; -}; diff --git a/cpp/subprojects/common/include/common/rule_evaluation/score_vector.hpp b/cpp/subprojects/common/include/common/rule_evaluation/score_vector.hpp deleted file mode 100644 index 74db1eda..00000000 --- a/cpp/subprojects/common/include/common/rule_evaluation/score_vector.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/util/quality.hpp" - -// Forward declarations -class ScoreProcessor; -class AbstractPrediction; -class AbstractEvaluatedPrediction; - -/** - * Defines an interface for all one-dimensional vectors that store the scores that may be predicted by a rule, as well - * as a numerical score that assess the overall quality of the rule. - */ -class IScoreVector : public Quality { - public: - - virtual ~IScoreVector() {}; - - /** - * Sets the scores of a specific prediction to the scores that are stored in this vector. - * - * @param prediction A reference to an object of type `AbstractPrediction` that should be updated - */ - virtual void updatePrediction(AbstractPrediction& prediction) const = 0; - - /** - * Passes the scores to an `ScoreProcessor` in order to convert them into the head of a rule. - * - * @param scoreProcessor A reference to an object of type `ScoreProcessor`, the scores should be passed to - */ - virtual void processScores(ScoreProcessor& scoreProcessor) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/rule_evaluation/score_vector_binned_dense.hpp b/cpp/subprojects/common/include/common/rule_evaluation/score_vector_binned_dense.hpp deleted file mode 100644 index c4206afc..00000000 --- a/cpp/subprojects/common/include/common/rule_evaluation/score_vector_binned_dense.hpp +++ /dev/null @@ -1,198 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_binned_dense.hpp" -#include "common/rule_evaluation/score_vector.hpp" - -/** - * An one dimensional vector that stores the scores that may be predicted by a rule, corresponding to bins for which the - * same prediction is made, as well as a numerical score that assesses the overall quality of the rule, in a - * C-contiguous array. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels for which the rule may - * predict - */ -template -class DenseBinnedScoreVector final : virtual public IScoreVector { - private: - - const IndexVector& labelIndices_; - - DenseBinnedVector binnedVector_; - - const bool sorted_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides access to the - * indices of the labels for which the rule may predict - * @param numBins The number of bins - * @param sorted True, if the indices of the labels for which the rule may predict are sorted in - * increasing order, false otherwise - */ - DenseBinnedScoreVector(const IndexVector& labelIndices, uint32 numBins, bool sorted); - - /** - * An iterator that provides read-only access to the indices of the labels for which the rule predicts. - */ - typedef typename IndexVector::const_iterator index_const_iterator; - - /** - * An iterator that provides read-only access to the predicted scores that correspond to individual labels. - */ - typedef DenseBinnedVector::const_iterator score_const_iterator; - - /** - * An iterator that provides access to the indices that correspond to individual bins and allows to modify them. - */ - typedef DenseBinnedVector::index_iterator index_binned_iterator; - - /** - * An iterator that provides read-only access to the indices that correspond to individual bins. - */ - typedef DenseBinnedVector::index_const_iterator index_binned_const_iterator; - - /** - * An iterator that provides access to the predicted scores that correspond to individual bins and allows to - * modify them. - */ - typedef DenseBinnedVector::value_iterator score_binned_iterator; - - /** - * An iterator that provides read-only access to the predicted scores that correspond to individual bins. - */ - typedef DenseBinnedVector::value_const_iterator score_binned_const_iterator; - - /** - * Returns an `index_const_iterator` to the beginning of the indices that correspond to individual labels. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the indices that correspond to individual labels. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator indices_cend() const; - - /** - * Returns a `score_const_iterator` to the beginning of the predicted scores that correspond to individual - * labels. - * - * @return A `score_const_iterator` to the beginning - */ - score_const_iterator scores_cbegin() const; - - /** - * Returns a `score_const_iterator` to the end of the predicted scores that correspond to individual labels. - * - * @return A `score_const_iterator` to the end - */ - score_const_iterator scores_cend() const; - - /** - * Returns an `index_binned_iterator` to the beginning of the indices that correspond to individual bins. - * - * @return An `index_binned_iterator` to the beginning - */ - index_binned_iterator indices_binned_begin(); - - /** - * Returns an `index_binned_iterator` to the end of the indices that correspond to individual bins. - * - * @return An `index_binned_iterator` to the end - */ - index_binned_iterator indices_binned_end(); - - /** - * Returns an `index_binned_const_iterator` to the beginning of the indices that correspond to individual bins. - * - * @return An `index_binned_const_iterator` to the beginning - */ - index_binned_const_iterator indices_binned_cbegin() const; - - /** - * Returns an `index_binned_const_iterator` to the end of the indices that correspond to individual bins. - * - * @return An `index_binned_const_iterator` to the end - */ - index_binned_const_iterator indices_binned_cend() const; - - /** - * Returns a `score_binned_iterator` to the beginning of the predicted scores that correspond to individual - * bins. - * - * @return A `score_binned_iterator` to the beginning - */ - score_binned_iterator scores_binned_begin(); - - /** - * Returns a `score_binned_iterator` to the end of the predicted scores that correspond to individual bins. - * - * @return A `score_binned_iterator` to the end - */ - score_binned_iterator scores_binned_end(); - - /** - * Returns a `score_binned_const_iterator` to the beginning of the predicted scores that correspond to - * individual bins. - * - * @return A `score_binned_const_iterator` to the beginning - */ - score_binned_const_iterator scores_binned_cbegin() const; - - /** - * Returns a `score_binned_const_iterator` to the end of the predicted scores that correspond to individual - * bins. - * - * @return A `score_binned_const_iterator` to the end - */ - score_binned_const_iterator scores_binned_cend() const; - - /** - * Returns the number of labels for which the rule may predict. - * - * @return The number of labels - */ - uint32 getNumElements() const; - - /** - * Returns the number of bins. - * - * @return The number of bins - */ - uint32 getNumBins() const; - - /** - * Sets the number of bins. - * - * @param numBins The number of bins to be set - * @param freeMemory True, if unused memory should be freed, if possible, false otherwise - */ - void setNumBins(uint32 numBins, bool freeMemory); - - /** - * Returns whether the rule may only predict for a subset of the available labels, or not. - * - * @return True, if the rule may only predict for a subset of the available labels, false otherwise - */ - bool isPartial() const; - - /** - * Returns whether the indices of the labels for which the rule may predict are sorted in increasing order, or - * not. - * - * @return True, if the indices of the labels for which the rule may predict are sorted in increasing order, - * false otherwise - */ - bool isSorted() const; - - void updatePrediction(AbstractPrediction& prediction) const override; - - void processScores(ScoreProcessor& scoreProcessor) const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_evaluation/score_vector_dense.hpp b/cpp/subprojects/common/include/common/rule_evaluation/score_vector_dense.hpp deleted file mode 100644 index 96152d4e..00000000 --- a/cpp/subprojects/common/include/common/rule_evaluation/score_vector_dense.hpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_dense.hpp" -#include "common/rule_evaluation/score_vector.hpp" - -/** - * An one-dimensional vector that stores the scores that may be predicted by a rule, as well as an overall quality - * score that assesses the overall quality of the rule, in a C-contiguous array. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels for which the rule may - * predict - */ -template -class DenseScoreVector final : virtual public IScoreVector { - private: - - const IndexVector& labelIndices_; - - DenseVector predictedScoreVector_; - - const bool sorted_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides access to the - * indices of the labels for which the rule may predict - * @param sorted True, if the indices of the labels for which the rule may predict are sorted in - * increasing order, false otherwise - */ - DenseScoreVector(const IndexVector& labelIndices, bool sorted); - - /** - * An iterator that provides read-only access to the indices. - */ - typedef typename IndexVector::const_iterator index_const_iterator; - - /** - * An iterator that provides access to the predicted scores and allows to modify them. - */ - typedef DenseVector::iterator score_iterator; - - /** - * An iterator that provides read-only access to the predicted scores. - */ - typedef DenseVector::const_iterator score_const_iterator; - - /** - * Returns an `index_const_iterator` to the beginning of the indices. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the indices. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator indices_cend() const; - - /** - * Returns a `score_iterator` to the beginning of the predicted scores. - * - * @return A `score_iterator` to the beginning - */ - score_iterator scores_begin(); - - /** - * Returns a `score_iterator` to the end of the predicted scores. - * - * @return A `score_iterator` to the end - */ - score_iterator scores_end(); - - /** - * Returns a `score_const_iterator` to the beginning of the predicted scores. - * - * @return A `score_const_iterator` to the beginning - */ - score_const_iterator scores_cbegin() const; - - /** - * Returns a `const_iterator` to the end of the predicted scores. - * - * @return A `const_iterator` to the end - */ - score_const_iterator scores_cend() const; - - /** - * Returns the number of labels for which the rule may predict. - * - * @return The number of labels - */ - uint32 getNumElements() const; - - /** - * Returns whether the rule may only predict for a subset of the available labels, or not. - * - * @return True, if the rule may only predict for a subset of the available labels, false otherwise - */ - bool isPartial() const; - - /** - * Returns whether the indices of the labels for which the rule may predict are sorted in increasing order, or - * not. - * - * @return True, if the indices of the labels for which the rule may predict are sorted in increasing order, - * false otherwise - */ - bool isSorted() const; - - void updatePrediction(AbstractPrediction& prediction) const override; - - void processScores(ScoreProcessor& scoreProcessor) const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_induction/rule_induction.hpp b/cpp/subprojects/common/include/common/rule_induction/rule_induction.hpp deleted file mode 100644 index 36b65a0b..00000000 --- a/cpp/subprojects/common/include/common/rule_induction/rule_induction.hpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_matrix.hpp" -#include "common/input/label_matrix.hpp" -#include "common/model/model_builder.hpp" -#include "common/post_processing/post_processor.hpp" -#include "common/rule_pruning/rule_pruning.hpp" -#include "common/sampling/feature_sampling.hpp" -#include "common/sampling/partition.hpp" -#include "common/sampling/weight_vector.hpp" -#include "common/statistics/statistics.hpp" -#include "common/thresholds/thresholds.hpp" - -/** - * Defines an interface for all classes that implement an algorithm for the induction of individual rules. - */ -class IRuleInduction { - public: - - virtual ~IRuleInduction() {}; - - /** - * Induces the default rule. - * - * @param statistics A reference to an object of type `IStatistics` that provides access to the statistics - * which should serve as the basis for inducing the default rule - * @param modelBuilder A reference to an object of type `IModelBuilder`, the default rule should be added to - */ - virtual void induceDefaultRule(IStatistics& statistics, IModelBuilder& modelBuilder) const = 0; - - /** - * Induces a new rule. - * - * @param thresholds A reference to an object of type `IThresholds` that provides access to the - * thresholds that may be used by the conditions of the rule - * @param labelIndices A reference to an object of type `IIndexVector` that provides access to the indices - * of the labels for which the rule may predict - * @param weights A reference to an object of type `IWeightVector` that provides access to the weights - * of individual training examples - * @param partition A reference to an object of type `IPartition` that provides access to the indices of - * the training examples that belong to the training set and the holdout set, - * respectively - * @param featureSampling A reference to an object of type `IFeatureSampling` that should be used for sampling - * the features that may be used by a new condition - * @param rulePruning A reference to an object of type `IRulePruning` that should be used to prune the - * rule - * @param postProcessor A reference to an object of type `IPostProcessor` that should be used to - * post-process the predictions of the rule - * @param rng A reference to an object of type `RNG` that implements the random number generator - * to be used - * @param modelBuilder A reference to an object of type `IModelBuilder`, the rule should be added to - * @return True, if a rule has been induced, false otherwise - */ - virtual bool induceRule(IThresholds& thresholds, const IIndexVector& labelIndices, const IWeightVector& weights, - IPartition& partition, IFeatureSampling& featureSampling, - const IRulePruning& rulePruning, const IPostProcessor& postProcessor, RNG& rng, - IModelBuilder& modelBuilder) const = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IRuleInduction`. - */ -class IRuleInductionFactory { - public: - - virtual ~IRuleInductionFactory() {}; - - /** - * Creates and returns a new object of type `IRuleInduction`. - * - * @return An unique pointer to an object of type `IRuleInduction` that has been created. - */ - virtual std::unique_ptr create() const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure an algorithm for the induction of individual rules. - */ -class IRuleInductionConfig { - public: - - virtual ~IRuleInductionConfig() {}; - - /** - * Creates and returns a new object of type `IRuleInductionFactory` according to the specified configuration. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the feature - * values of the training examples - * @param labelMatrix A reference to an object of type `ILabelMatrix` that provides access to the labels of - * the training examples - * @return An unique pointer to an object of type `IRuleInductionFactory` that has been created - */ - virtual std::unique_ptr createRuleInductionFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/rule_induction/rule_induction_top_down_beam_search.hpp b/cpp/subprojects/common/include/common/rule_induction/rule_induction_top_down_beam_search.hpp deleted file mode 100644 index a4cb3a68..00000000 --- a/cpp/subprojects/common/include/common/rule_induction/rule_induction_top_down_beam_search.hpp +++ /dev/null @@ -1,215 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/rule_induction/rule_induction.hpp" - -/** - * Defines an interface for all classes that allow to configure an algorithm for the induction of individual rules that - * uses a top-down beam search. - */ -class MLRLCOMMON_API IBeamSearchTopDownRuleInductionConfig { - public: - - virtual ~IBeamSearchTopDownRuleInductionConfig() {}; - - /** - * Returns the width that is used by the beam search. - * - * @return The width that is used by the beam search - */ - virtual uint32 getBeamWidth() const = 0; - - /** - * Sets the width that should be used by the beam search. - * - * @param beamWidth The width the should be used by the beam search. Must be at least 2 - * @return A reference to an object of type `IBeamSearchTopDownRuleInductionConfig` that allows further - * configuration of the algorithm for the induction of individual rules - */ - virtual IBeamSearchTopDownRuleInductionConfig& setBeamWidth(uint32 beamWidth) = 0; - - /** - * Returns whether a new sample of the available features is created for each rule that is refined during the - * beam search or not. - * - * @return True, if a new sample is created for each rule, false otherwise - */ - virtual bool areFeaturesResampled() const = 0; - - /** - * Sets whether a new sample of the available features should be created for each rule that is refined during - * the beam search or not. - * - * @param resampleFeatures True, if a new sample should be created for each rule, false otherwise - * @return A reference to an object of type `IBeamSearchTopDownRuleInductionConfig` that allows - * further configuration of the algorithm for the induction of individual rules - */ - virtual IBeamSearchTopDownRuleInductionConfig& setResampleFeatures(bool resampleFeatures) = 0; - - /** - * Returns the minimum number of training examples that must be covered by a rule. - * - * @return The minimum number of training examples that must be covered by a rule - */ - virtual uint32 getMinCoverage() const = 0; - - /** - * Sets the minimum number of training examples that must be covered by a rule. - * - * @param minCoverage The minimum number of training examples that must be covered by a rule. Must be at least - * 1 - * @return A reference to an object of type `IBeamSearchTopDownRuleInductionConfig` that allows - * further configuration of the algorithm for the induction of individual rules - */ - virtual IBeamSearchTopDownRuleInductionConfig& setMinCoverage(uint32 minCoverage) = 0; - - /** - * Returns the minimum support, i.e., the minimum fraction of the training examples that must be covered by a - * rule. - * - * @return The minimum support or 0, if the support of rules is not restricted - */ - virtual float32 getMinSupport() const = 0; - - /** - * Sets the minimum support, i.e., the minimum fraction of the training examples that must be covered by a rule. - * - * @param minSupport The minimum support. Must be in [0, 1] or 0, if the support of rules should not be - * restricted - * @return A reference to an object of type `IBeamSearchTopDownRuleInductionConfig` that allows - * further configuration of the algorithm for the induction of individual rules - */ - virtual IBeamSearchTopDownRuleInductionConfig& setMinSupport(float32 minSupport) = 0; - - /** - * Returns the maximum number of conditions to be included in a rule's body. - * - * @return The maximum number of conditions to be included in a rule's body or 0, if the number of conditions is - * not restricted - */ - virtual uint32 getMaxConditions() const = 0; - - /** - * Sets the maximum number of conditions to be included in a rule's body. - * - * @param maxConditions The maximum number of conditions to be included in a rule's body. Must be at least 2 or - * 0, if the number of conditions should not be restricted - * @return A reference to an object of type `IBeamSearchTopDownRuleInductionConfig` that allows - * further configuration of the algorithm for the induction of individual rules - */ - virtual IBeamSearchTopDownRuleInductionConfig& setMaxConditions(uint32 maxConditions) = 0; - - /** - * Returns the maximum number of times, the head of a rule may be refinement after a new condition has been - * added to its body. - * - * @return The maximum number of times, the head of a rule may be refined or 0, if the number of refinements is - * not restricted - */ - virtual uint32 getMaxHeadRefinements() const = 0; - - /** - * Sets the maximum number of times, the head of a rule may be refined after a new condition has been added to - * its body. - * - * @param maxHeadRefinements The maximum number of times, the head of a rule may be refined. Must be at least - * 1 or 0, if the number of refinements should not be restricted - * @return A reference to an object of type `IBeamSearchTopDownRuleInductionConfig` that - * allows further configuration of the algorithm for the induction of individual - * rules - */ - virtual IBeamSearchTopDownRuleInductionConfig& setMaxHeadRefinements(uint32 maxHeadRefinements) = 0; - - /** - * Returns whether the predictions of rules are recalculated on all training examples, if some of the examples - * have zero weights, or not. - * - * @return True, if the predictions of rules are recalculated on all training examples, false otherwise - */ - virtual bool arePredictionsRecalculated() const = 0; - - /** - * Sets whether the predictions of rules should be recalculated on all training examples, if some of the - * examples have zero weights, or not. - * - * @param recalculatePredictions True, if the predictions of rules should be recalculated on all training - * examples, false otherwise - * @return A reference to an object of type `IBeamSearchTopDownRuleInductionConfig` - * that allows further configuration of the algorithm for the induction of - * individual rules - */ - virtual IBeamSearchTopDownRuleInductionConfig& setRecalculatePredictions(bool recalculatePredictions) = 0; -}; - -/** - * Allows to configure an algorithm for the induction of individual rules that uses a top-down beam search. - */ -class BeamSearchTopDownRuleInductionConfig final : public IRuleInductionConfig, - public IBeamSearchTopDownRuleInductionConfig { - private: - - const RuleCompareFunction ruleCompareFunction_; - - uint32 beamWidth_; - - bool resampleFeatures_; - - uint32 minCoverage_; - - float32 minSupport_; - - uint32 maxConditions_; - - uint32 maxHeadRefinements_; - - bool recalculatePredictions_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that - * should be used for comparing the quality of different rules - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel refinement of - * rules - */ - BeamSearchTopDownRuleInductionConfig(RuleCompareFunction ruleCompareFunction, - const std::unique_ptr& multiThreadingConfigPtr); - - uint32 getBeamWidth() const override; - - IBeamSearchTopDownRuleInductionConfig& setBeamWidth(uint32 beamWidth) override; - - bool areFeaturesResampled() const override; - - IBeamSearchTopDownRuleInductionConfig& setResampleFeatures(bool resampleFeatures) override; - - uint32 getMinCoverage() const override; - - IBeamSearchTopDownRuleInductionConfig& setMinCoverage(uint32 minCoverage) override; - - float32 getMinSupport() const override; - - IBeamSearchTopDownRuleInductionConfig& setMinSupport(float32 minSupport) override; - - uint32 getMaxConditions() const override; - - IBeamSearchTopDownRuleInductionConfig& setMaxConditions(uint32 maxConditions) override; - - uint32 getMaxHeadRefinements() const override; - - IBeamSearchTopDownRuleInductionConfig& setMaxHeadRefinements(uint32 maxHeadRefinements) override; - - bool arePredictionsRecalculated() const override; - - IBeamSearchTopDownRuleInductionConfig& setRecalculatePredictions(bool recalculatePredictions) override; - - std::unique_ptr createRuleInductionFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_induction/rule_induction_top_down_greedy.hpp b/cpp/subprojects/common/include/common/rule_induction/rule_induction_top_down_greedy.hpp deleted file mode 100644 index fd1ad0dd..00000000 --- a/cpp/subprojects/common/include/common/rule_induction/rule_induction_top_down_greedy.hpp +++ /dev/null @@ -1,168 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/multi_threading/multi_threading.hpp" -#include "common/rule_induction/rule_induction.hpp" - -/** - * Defines an interface for all classes that allow to configure an algorithm for the induction of individual rules that - * uses a greedy top-down search. - */ -class MLRLCOMMON_API IGreedyTopDownRuleInductionConfig { - public: - - virtual ~IGreedyTopDownRuleInductionConfig() {}; - - /** - * Returns the minimum number of training examples that must be covered by a rule. - * - * @return The minimum number of training examples that must be covered by a rule - */ - virtual uint32 getMinCoverage() const = 0; - - /** - * Sets the minimum number of training examples that must be covered by a rule. - * - * @param minCoverage The minimum number of training examples that must be covered by a rule. Must be at least - * 1 - * @return A reference to an object of type `IGreedyTopDownRuleInductionConfig` that allows further - * configuration of the algorithm for the induction of individual rules - */ - virtual IGreedyTopDownRuleInductionConfig& setMinCoverage(uint32 minCoverage) = 0; - - /** - * Returns the minimum support, i.e., the minimum fraction of the training examples that must be covered by a - * rule. - * - * @return The minimum support or 0, if the support of rules is not restricted - */ - virtual float32 getMinSupport() const = 0; - - /** - * Sets the minimum support, i.e., the minimum fraction of the training examples that must be covered by a rule. - * - * @param minSupport The minimum support. Must be in [0, 1] or 0, if the support of rules should not be - * restricted - * @return A reference to an object of type `IGreedyTopDownRuleInductionConfig` that allows further - * configuration of the algorithm for the induction of individual rules - */ - virtual IGreedyTopDownRuleInductionConfig& setMinSupport(float32 minSupport) = 0; - - /** - * Returns the maximum number of conditions to be included in a rule's body. - * - * @return The maximum number of conditions to be included in a rule's body or 0, if the number of conditions is - * not restricted - */ - virtual uint32 getMaxConditions() const = 0; - - /** - * Sets the maximum number of conditions to be included in a rule's body. - * - * @param maxConditions The maximum number of conditions to be included in a rule's body. Must be at least 1 or - * 0, if the number of conditions should not be restricted - * @return A reference to an object of type `IGreedyTopDownRuleInductionConfig` that allows further - * configuration of the algorithm for the induction of individual rules - */ - virtual IGreedyTopDownRuleInductionConfig& setMaxConditions(uint32 maxConditions) = 0; - - /** - * Returns the maximum number of times, the head of a rule may be refinement after a new condition has been - * added to its body. - * - * @return The maximum number of times, the head of a rule may be refined or 0, if the number of refinements is - * not restricted - */ - virtual uint32 getMaxHeadRefinements() const = 0; - - /** - * Sets the maximum number of times, the head of a rule may be refined after a new condition has been added to - * its body. - * - * @param maxHeadRefinements The maximum number of times, the head of a rule may be refined. Must be at least - * 1 or 0, if the number of refinements should not be restricted - * @return A reference to an object of type `IGreedyTopDownRuleInductionConfig` that allows - * further configuration of the algorithm for the induction of individual rules - */ - virtual IGreedyTopDownRuleInductionConfig& setMaxHeadRefinements(uint32 maxHeadRefinements) = 0; - - /** - * Returns whether the predictions of rules are recalculated on all training examples, if some of the examples - * have zero weights, or not. - * - * @return True, if the predictions of rules are recalculated on all training examples, false otherwise - */ - virtual bool arePredictionsRecalculated() const = 0; - - /** - * Sets whether the predictions of rules should be recalculated on all training examples, if some of the - * examples have zero weights, or not. - * - * @param recalculatePredictions True, if the predictions of rules should be recalculated on all training - * examples, false otherwise - * @return A reference to an object of type `IGreedyTopDownRuleInductionConfig` that - * allows further configuration of the algorithm for the induction of - * individual rules - */ - virtual IGreedyTopDownRuleInductionConfig& setRecalculatePredictions(bool recalculatePredictions) = 0; -}; - -/** - * Allows to configure an algorithm for the induction of individual rules that uses a greedy top-down search. - */ -class GreedyTopDownRuleInductionConfig final : public IRuleInductionConfig, - public IGreedyTopDownRuleInductionConfig { - private: - - const RuleCompareFunction ruleCompareFunction_; - - uint32 minCoverage_; - - float32 minSupport_; - - uint32 maxConditions_; - - uint32 maxHeadRefinements_; - - bool recalculatePredictions_; - - const std::unique_ptr& multiThreadingConfigPtr_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that - * should be used for comparing the quality of different rules - * @param multiThreadingConfigPtr A reference to an unique pointer that stores the configuration of the - * multi-threading behavior that should be used for the parallel refinement of - * rules - */ - GreedyTopDownRuleInductionConfig(RuleCompareFunction ruleCompareFunction, - const std::unique_ptr& multiThreadingConfigPtr); - - uint32 getMinCoverage() const override; - - IGreedyTopDownRuleInductionConfig& setMinCoverage(uint32 minCoverage) override; - - float32 getMinSupport() const override; - - IGreedyTopDownRuleInductionConfig& setMinSupport(float32 minSupport) override; - - uint32 getMaxConditions() const override; - - IGreedyTopDownRuleInductionConfig& setMaxConditions(uint32 maxConditions) override; - - uint32 getMaxHeadRefinements() const override; - - IGreedyTopDownRuleInductionConfig& setMaxHeadRefinements(uint32 maxHeadRefinements) override; - - bool arePredictionsRecalculated() const override; - - IGreedyTopDownRuleInductionConfig& setRecalculatePredictions(bool recalculatePredictions) override; - - std::unique_ptr createRuleInductionFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_model_assemblage/default_rule.hpp b/cpp/subprojects/common/include/common/rule_model_assemblage/default_rule.hpp deleted file mode 100644 index b19d74cc..00000000 --- a/cpp/subprojects/common/include/common/rule_model_assemblage/default_rule.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/label_matrix_row_wise.hpp" - -/** - * Defines an interface for all classes that allow to configure the default rule that is included in a rule-based model. - */ -class IDefaultRuleConfig { - public: - - virtual ~IDefaultRuleConfig() {}; - - /** - * Returns whether a default rule is included or not. - * - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise access to - * the labels of the training examples - * @return True, if a default rule is included, false otherwise - */ - virtual bool isDefaultRuleUsed(const IRowWiseLabelMatrix& labelMatrix) const = 0; -}; - -/** - * Allows to configure whether a default rule should be included in a rule-based model or not. - */ -class DefaultRuleConfig final : public IDefaultRuleConfig { - private: - - const bool useDefaultRule_; - - public: - - /** - * @param useDefaultRule True, if a default rule should be included, false otherwise - */ - DefaultRuleConfig(bool useDefaultRule); - - bool isDefaultRuleUsed(const IRowWiseLabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_model_assemblage/rule_model_assemblage.hpp b/cpp/subprojects/common/include/common/rule_model_assemblage/rule_model_assemblage.hpp deleted file mode 100644 index 9f136115..00000000 --- a/cpp/subprojects/common/include/common/rule_model_assemblage/rule_model_assemblage.hpp +++ /dev/null @@ -1,96 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/label_matrix_row_wise.hpp" -#include "common/model/model_builder.hpp" -#include "common/rule_induction/rule_induction.hpp" -#include "common/sampling/feature_sampling.hpp" -#include "common/sampling/instance_sampling.hpp" -#include "common/sampling/label_sampling.hpp" -#include "common/sampling/partition_sampling.hpp" -#include "common/statistics/statistics_provider.hpp" -#include "common/stopping/stopping_criterion.hpp" -#include "common/thresholds/thresholds.hpp" - -/** - * Defines an interface for all classes that implement an algorithm for the induction of several rules that will be - * added to a rule-based model. - */ -class IRuleModelAssemblage { - public: - - virtual ~IRuleModelAssemblage() {}; - - /** - * Assembles and returns a rule-based model that consists of several rules. - * - * @param ruleInduction A reference to an object of type `IRuleInduction` to be used for the induction - * of individual rules - * @param rulePruning A reference to an object of type `IRulePruning` to be used for pruning rules - * @param postProcessor A reference to an object of type `IPostProcessor` to be used for post-processing - * the predictions of rules - * @param partition A reference to an object of type `IPartition` that provides access to the - * indices of the training examples that belong to the training set and the holdout - * set, respectively - * @param labelSampling A reference to an object of type `ILabelSampling` to be used for sampling the - * labels whenever a new rule is induced - * @param instanceSampling A reference to an object of type `IInstanceSampling` to be used for sampling the - * examples whenever a new rule is induced - * @param featureSampling A reference to an object of type `IFeatureSampling` to be used for sampling the - * features that may be used by the conditions of a rule - * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides access to - * the statistics which serve as the basis for learning rules - * @param thresholds A reference to an object of type `IThresholds` that provides access to the - * thresholds that may be used by the conditions of rules - * @param rng A reference to an object of type `RNG` that implements the random number - * generator to be used - * @param modelBuilder A reference to an object of type `IModelBuilder`, the rules should be added to - */ - virtual void induceRules(const IRuleInduction& ruleInduction, const IRulePruning& rulePruning, - const IPostProcessor& postProcessor, IPartition& partition, - ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, - IFeatureSampling& featureSampling, IStatisticsProvider& statisticsProvider, - IThresholds& thresholds, IModelBuilder& modelBuilder, RNG& rng) const = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IRuleModelAssemblage`. - */ -class IRuleModelAssemblageFactory { - public: - - virtual ~IRuleModelAssemblageFactory() {}; - - /** - * Creates and returns a new object of the type `IRuleModelAssemblage`. - * - * @param stoppingCriterionFactoryPtr An unique pointer to an object of type `IStoppingCriterionFactory` that - * allows to create the implementations to be used to decide whether - * additional rules should be induced or not - */ - virtual std::unique_ptr create( - std::unique_ptr stoppingCriterionFactoryPtr) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure an algorithm for the induction of several rules that - * will be added to a rule-based model. - */ -class IRuleModelAssemblageConfig { - public: - - virtual ~IRuleModelAssemblageConfig() {}; - - /** - * Creates and returns a new object of type `IRuleModelAssemblageFactory` according to specified configuration. - * - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise access to - * the labels of the training examples - * @return An unique pointer to an object of type `IRuleModelAssemblageFactory` that has been - * created - */ - virtual std::unique_ptr createRuleModelAssemblageFactory( - const IRowWiseLabelMatrix& labelMatrix) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/rule_model_assemblage/rule_model_assemblage_sequential.hpp b/cpp/subprojects/common/include/common/rule_model_assemblage/rule_model_assemblage_sequential.hpp deleted file mode 100644 index b11a658f..00000000 --- a/cpp/subprojects/common/include/common/rule_model_assemblage/rule_model_assemblage_sequential.hpp +++ /dev/null @@ -1,28 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_model_assemblage/default_rule.hpp" -#include "common/rule_model_assemblage/rule_model_assemblage.hpp" - -/** - * Allows to configure an algorithm that sequentially induces several rules, optionally starting with a default rule, - * that are added to a rule-based model. - */ -class SequentialRuleModelAssemblageConfig final : public IRuleModelAssemblageConfig { - private: - - const std::unique_ptr& defaultRuleConfigPtr_; - - public: - - /** - * @param defaultRuleConfigPtr A reference to an unique pointer that stores the configuration of the default - * rule - */ - SequentialRuleModelAssemblageConfig(const std::unique_ptr& defaultRuleConfigPtr); - - std::unique_ptr createRuleModelAssemblageFactory( - const IRowWiseLabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_pruning/rule_pruning.hpp b/cpp/subprojects/common/include/common/rule_pruning/rule_pruning.hpp deleted file mode 100644 index 385dbb2c..00000000 --- a/cpp/subprojects/common/include/common/rule_pruning/rule_pruning.hpp +++ /dev/null @@ -1,74 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/condition_list.hpp" -#include "common/sampling/partition.hpp" -#include "common/thresholds/thresholds_subset.hpp" - -/** - * Defines an interface for all classes that implement a strategy for pruning individual rules based on a "prune set", - * i.e., based on the examples that are not contained in the sub-sample of the training data that has been used to learn - * the rule, referred to as the "grow set". - */ -class IRulePruning { - public: - - virtual ~IRulePruning() {}; - - /** - * Prunes the conditions of an existing rule by modifying a given list of conditions in-place. The rule is - * pruned by removing individual conditions in a way that improves over its original quality, measured on the - * prune set. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset`, which contains the thresholds - * that correspond to the subspace of the instance space that is covered by the - * existing rule - * @param partition A reference to an object of type `IPartition` that provides access to the indices of - * the training examples that belong to the training set and the holdout set, - * respectively - * @param conditions A reference to an object of type `ConditionList` that stores the conditions of the - * existing rule - * @param head A reference to an object of type `AbstractPrediction` that stores the scores that - * are predicted by the existing rule - * @return An unique pointer to an object of type `ICoverageState` that keeps track of the - * examples that are covered by the pruned rule or a null pointer if the rule was not - * pruned - */ - virtual std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, - ConditionList& conditions, - const AbstractPrediction& head) const = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IRulePruning`. - */ -class IRulePruningFactory { - public: - - virtual ~IRulePruningFactory() {}; - - /** - * Creates and returns a new object of type `IRulePruning`. - * - * @return An unique pointer to an object of type `IRulePruning` that has been created - */ - virtual std::unique_ptr create() const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a strategy for pruning individual rules. - */ -class IRulePruningConfig { - public: - - virtual ~IRulePruningConfig() {}; - - /** - * Creates and returns a new object of type `IRulePruningFactory` according to the specified configuration. - * - * @return An unique pointer to an object of type `IRulePruningFactory` that has been created - */ - virtual std::unique_ptr createRulePruningFactory() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/rule_pruning/rule_pruning_irep.hpp b/cpp/subprojects/common/include/common/rule_pruning/rule_pruning_irep.hpp deleted file mode 100644 index becc696e..00000000 --- a/cpp/subprojects/common/include/common/rule_pruning/rule_pruning_irep.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_pruning/rule_pruning.hpp" - -/** - * Allows to configure a strategy for pruning individual rules that prunes rules by following the principles of - * "incremental reduced error pruning" (IREP). - */ -class IrepConfig final : public IRulePruningConfig { - private: - - const RuleCompareFunction ruleCompareFunction_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that should be - * used for comparing the quality of different rules - */ - IrepConfig(RuleCompareFunction ruleCompareFunction); - - std::unique_ptr createRulePruningFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_pruning/rule_pruning_no.hpp b/cpp/subprojects/common/include/common/rule_pruning/rule_pruning_no.hpp deleted file mode 100644 index d2157d56..00000000 --- a/cpp/subprojects/common/include/common/rule_pruning/rule_pruning_no.hpp +++ /dev/null @@ -1,15 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_pruning/rule_pruning.hpp" - -/** - * Allows to configure a method for pruning individual rules that does not actually perform any pruning. - */ -class NoRulePruningConfig final : public IRulePruningConfig { - public: - - std::unique_ptr createRulePruningFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/prediction.hpp b/cpp/subprojects/common/include/common/rule_refinement/prediction.hpp deleted file mode 100644 index 9958ec1d..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/prediction.hpp +++ /dev/null @@ -1,195 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_binned_dense.hpp" -#include "common/data/vector_dense.hpp" -#include "common/indices/index_vector.hpp" -#include "common/sampling/weight_vector_bit.hpp" -#include "common/sampling/weight_vector_dense.hpp" -#include "common/sampling/weight_vector_equal.hpp" -#include "common/sampling/weight_vector_out_of_sample.hpp" - -#include - -// Forward declarations -class IStatistics; -class IStatisticsSubset; -class IHead; - -/** - * An abstract base class for all classes that store the scores that are predicted by a rule. - */ -class AbstractPrediction : public IIndexVector { - protected: - - /** - * A vector that stores the predicted scores. - */ - DenseVector predictedScoreVector_; - - public: - - /** - * @param numElements The number of labels for which the rule predicts - */ - AbstractPrediction(uint32 numElements); - - /** - * An iterator that provides access to the predicted scores and allows to modify them. - */ - typedef DenseVector::iterator score_iterator; - - /** - * An iterator that provides read-only access to the predicted scores. - */ - typedef DenseVector::const_iterator score_const_iterator; - - /** - * Returns a `score_iterator` to the beginning of the predicted scores. - * - * @return A `score_iterator` to the beginning - */ - score_iterator scores_begin(); - - /** - * Returns a `score_iterator` to the end of the predicted scores. - * - * @return A `score_iterator` to the end - */ - score_iterator scores_end(); - - /** - * Returns a `score_const_iterator` to the beginning of the predicted scores. - * - * @return A `score_const_iterator` to the beginning - */ - score_const_iterator scores_cbegin() const; - - /** - * Returns a `score_const_iterator` to the end of the predicted scores. - * - * @return A `score_const_iterator` to the end - */ - score_const_iterator scores_cend() const; - - /** - * Sets the predicted scores in another vector to this vector. - * - * @param begin A `score_const_iterator` to the beginning of the predicted scores - * @param end A `score_const_iterator` to the end of the predicted scores - */ - void set(score_const_iterator begin, score_const_iterator end); - - /** - * Sets the predicted scores in another vector to this vector. - * - * @param begin An iterator to the beginning of the predicted scores - * @param end An iterator to the end of the predicted scores - */ - void set(DenseBinnedVector::const_iterator begin, DenseBinnedVector::const_iterator end); - - /** - * Updates the given statistics by applying this prediction. - * - * @param statistics A reference to an object of type `IStatistics` to be updated - * @param statisticIndex The index of the statistic to be updated - */ - virtual void apply(IStatistics& statistics, uint32 statisticIndex) const = 0; - - /** - * Updates the given statistics by reverting this prediction. - * - * @param statistics A reference to an object of type `IStatistics` to be updated - * @param statisticIndex The index of the statistic to be updated - */ - virtual void revert(IStatistics& statistics, uint32 statisticIndex) const = 0; - - /** - * Sorts the scores that stored by this prediction in increasing order by the the indices of the labels they - * correspond to. - */ - virtual void sort() = 0; - - /** - * Creates and returns a head that contains the scores that are stored by this prediction. - * - * @return An unique pointer to an object of type `IHead` that has been created - */ - virtual std::unique_ptr createHead() const = 0; - - /** - * Creates and returns a new subset of the given statistics that only contains the labels whose indices are - * stored in this vector. - * - * @param statistics A reference to an object of type `IStatistics` that should be used to create the subset - * @param weights A reference to an object of type `EqualWeightVector` that provides access to the weights - * of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createStatisticsSubset(const IStatistics& statistics, - const EqualWeightVector& weights) const = 0; - - /** - * Creates and returns a new subset of the given statistics that only contains the labels whose indices are - * stored in this vector. - * - * @param statistics A reference to an object of type `IStatistics` that should be used to create the subset - * @param weights A reference to an object of type `BitWeightVector` that provides access to the weights - * of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createStatisticsSubset(const IStatistics& statistics, - const BitWeightVector& weights) const = 0; - - /** - * Creates and returns a new subset of the given statistics that only contains the labels whose indices are - * stored in this vector. - * - * @param statistics A reference to an object of type `IStatistics` that should be used to create the subset - * @param weights A reference to an object of type `DenseWeightVector` that provides access to the - * weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const DenseWeightVector& weights) const = 0; - - /** - * Creates and returns a new subset of the given statistics that only contains the labels whose indices are - * stored in this vector. - * - * @param statistics A reference to an object of type `IStatistics` that should be used to create the subset - * @param weights A reference to an object of type `OutOfSampleWeightVector` that - * provides access to the weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const = 0; - - /** - * Creates and returns a new subset of the given statistics that only contains the labels whose indices are - * stored in this vector. - * - * @param statistics A reference to an object of type `IStatistics` that should be used to create the subset - * @param weights A reference to an object of type `OutOfSampleWeightVector` that - * provides access to the weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const = 0; - - /** - * Creates and returns a new subset of the given statistics that only contains the labels whose indices are - * stored in this vector. - * - * @param statistics A reference to an object of type `IStatistics` that should be used to create the subset - * @param weights A reference to an object of type `OutOfSampleWeightVector>` - * that provides access to the weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector>& weights) const = 0; - - uint32 getNumElements() const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/prediction_complete.hpp b/cpp/subprojects/common/include/common/rule_refinement/prediction_complete.hpp deleted file mode 100644 index 69754150..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/prediction_complete.hpp +++ /dev/null @@ -1,76 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector_complete.hpp" -#include "common/rule_refinement/prediction_evaluated.hpp" - -/** - * Stores the scores that are predicted by a rule that predicts for all available labels. - */ -class CompletePrediction final : public AbstractEvaluatedPrediction { - private: - - const CompleteIndexVector indexVector_; - - public: - - /** - * @param numElements The number of labels for which the rule predicts - */ - CompletePrediction(uint32 numElements); - - /** - * An iterator that provides read-only access to the indices of the labels for which the rule predicts. - */ - typedef CompleteIndexVector::const_iterator index_const_iterator; - - /** - * Returns an `index_const_iterator` to the beginning of the indices of the labels for which the rule predicts. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the indices of the labels for which the rule predicts. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator indices_cend() const; - - bool isPartial() const override; - - uint32 getIndex(uint32 pos) const override; - - std::unique_ptr createStatisticsSubset(const IStatistics& statistics, - const EqualWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset(const IStatistics& statistics, - const BitWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const DenseWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, - const OutOfSampleWeightVector>& weights) const override; - - std::unique_ptr createRuleRefinement(IThresholdsSubset& thresholdsSubset, - uint32 featureIndex) const override; - - void apply(IStatistics& statistics, uint32 statisticIndex) const override; - - void revert(IStatistics& statistics, uint32 statisticIndex) const override; - - void sort() override; - - std::unique_ptr createHead() const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/prediction_evaluated.hpp b/cpp/subprojects/common/include/common/rule_refinement/prediction_evaluated.hpp deleted file mode 100644 index 192da801..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/prediction_evaluated.hpp +++ /dev/null @@ -1,21 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_refinement/prediction.hpp" -#include "common/util/quality.hpp" - -/** - * An abstract base class for all classes that store the scores that are predicted by a rule, as well as a numerical - * score that assesses the overall quality of the rule. - */ -class AbstractEvaluatedPrediction : public AbstractPrediction, - public Quality { - public: - - /** - * @param numElements The number of labels for which the rule predicts - */ - AbstractEvaluatedPrediction(uint32 numElements); -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/prediction_partial.hpp b/cpp/subprojects/common/include/common/rule_refinement/prediction_partial.hpp deleted file mode 100644 index 1092e811..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/prediction_partial.hpp +++ /dev/null @@ -1,116 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector_partial.hpp" -#include "common/rule_refinement/prediction_evaluated.hpp" - -/** - * Stores the scores that are predicted by a rule that predicts for a subset of the available labels. - */ -class PartialPrediction final : public AbstractEvaluatedPrediction { - private: - - PartialIndexVector indexVector_; - - bool sorted_; - - public: - - /** - * @param numElements The number of labels for which the rule predicts - * @param sorted True, if the scores that are stored by this prediction are sorted in increasing order by - * the corresponding label indices, false otherwise - */ - PartialPrediction(uint32 numElements, bool sorted); - - /** - * An iterator that provides access to the indices for which the rule predicts and allows to modify them. - */ - typedef PartialIndexVector::iterator index_iterator; - - /** - * An iterator that provides read-only access to the indices for which the rule predicts. - */ - typedef PartialIndexVector::const_iterator index_const_iterator; - - /** - * Returns an `index_iterator` to the beginning of the indices for which the rule predicts. - * - * @return An `index_iterator` to the beginning - */ - index_iterator indices_begin(); - - /** - * Returns an `index_iterator` to the end of the indices for which the rule predicts. - * - * @return An `index_iterator` to the end - */ - index_iterator indices_end(); - - /** - * Returns an `index_const_iterator` to the beginning of the indices for which the rule predicts. - * - * @return An `index_const_iterator` to the beginning - */ - index_const_iterator indices_cbegin() const; - - /** - * Returns an `index_const_iterator` to the end of the indices for which the rule predicts. - * - * @return An `index_const_iterator` to the end - */ - index_const_iterator indices_cend() const; - - /** - * Sets the number of labels for which the rule predicts. - * - * @param numElements The number of labels to be set - * @param freeMemory True, if unused memory should be freed if possible, false otherwise - */ - void setNumElements(uint32 numElements, bool freeMemory); - - /** - * Sets whether the scores that are stored by this prediction are sorted in increasing order by the - * corresponding label indices, or not. - * - * @param sorted True, if the scores that are stored by this prediction are sorted in increasing order by the - * corresponding label indices, false otherwise - */ - void setSorted(bool sorted); - - bool isPartial() const override; - - uint32 getIndex(uint32 pos) const override; - - std::unique_ptr createStatisticsSubset(const IStatistics& statistics, - const EqualWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset(const IStatistics& statistics, - const BitWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const DenseWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const override; - - std::unique_ptr createStatisticsSubset( - const IStatistics& statistics, - const OutOfSampleWeightVector>& weights) const override; - - std::unique_ptr createRuleRefinement(IThresholdsSubset& thresholdsSubset, - uint32 featureIndex) const override; - - void apply(IStatistics& statistics, uint32 statisticIndex) const override; - - void revert(IStatistics& statistics, uint32 statisticIndex) const override; - - void sort() override; - - std::unique_ptr createHead() const override; -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/refinement.hpp b/cpp/subprojects/common/include/common/rule_refinement/refinement.hpp deleted file mode 100644 index 3c8b248d..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/refinement.hpp +++ /dev/null @@ -1,39 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/model/condition.hpp" -#include "common/rule_refinement/prediction_evaluated.hpp" - -/** - * Stores the properties of a potential refinement of a rule. - */ -struct Refinement final : public Condition { - public: - - /** - * Assigns the properties of an existing refinement, except for the scores that are predicted by the refined - * rule, to this refinement. - * - * @param refinement A reference to the existing refinement - * @return A reference to the modified refinement - */ - Refinement& operator=(const Refinement& refinement) { - Condition::operator=(refinement); - previous = refinement.previous; - return *this; - } - - /** - * An unique pointer to an object of type `AbstractEvaluatedPrediction` that stores the scores that are - * predicted by the refined rule, as well as its overall quality. - */ - std::unique_ptr headPtr; - - /** - * The index of the last element, e.g., example or bin, that has been processed when evaluating the refined - * rule. - */ - int64 previous; -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/refinement_comparator_fixed.hpp b/cpp/subprojects/common/include/common/rule_refinement/refinement_comparator_fixed.hpp deleted file mode 100644 index a711c32c..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/refinement_comparator_fixed.hpp +++ /dev/null @@ -1,111 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_evaluation/rule_compare_function.hpp" -#include "common/rule_evaluation/score_vector.hpp" -#include "common/rule_refinement/refinement.hpp" - -#include -#include - -/** - * Allows comparing potential refinements of a rule and keeping track of the best ones. - */ -class FixedRefinementComparator final { - private: - - const RuleCompareFunction ruleCompareFunction_; - - const uint32 maxRefinements_; - - Refinement* refinements_; - - std::vector> order_; - - Quality minQuality_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that should be - * used for comparing the quality of different rules - * @param maxRefinements The maximum number of refinements to keep track of - * @param minQuality A reference to an object of type `Quality` a refinement must improve on - */ - FixedRefinementComparator(RuleCompareFunction ruleCompareFunction, uint32 maxRefinements, - const Quality& minQuality); - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that should be - * used for comparing the quality of different rules - * @param maxRefinements The maximum number of refinements to keep track of - */ - FixedRefinementComparator(RuleCompareFunction ruleCompareFunction, uint32 maxRefinements); - - /** - * @param comparator A reference to an object of type `FixedRefinementComparator` that keeps track of the best - * refinements found so far - */ - FixedRefinementComparator(const FixedRefinementComparator& comparator); - - ~FixedRefinementComparator(); - - /** - * An iterator that provides access to the refinements the comparator keeps track of and allows to modify them. - */ - typedef std::vector>::iterator iterator; - - /** - * Returns an `iterator` to the beginning of the refinements, starting with the best one. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator to the end of the refinements. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns the number of refinements the comparator keeps track of. - * - * @return The number of refinements - */ - uint32 getNumElements() const; - - /** - * Returns whether the quality of a rule's predictions is considered as an improvement over the quality of the - * refinements that have been provided to this comparator so far. - * - * @param scoreVector A reference to an object of type `IScoreVector` that stores the quality of the - * predictions - * @return True, if the quality of the given predictions is considered as an improvement, false - * otherwise - */ - bool isImprovement(const IScoreVector& scoreVector) const; - - /** - * Keeps track of a given refinement of a rule that is considered as an improvement over the refinements that - * have been provided to this comparator so far. - * - * @param refinement A reference to an object of type `Refinement` that represents the refinement of the rule - * @param scoreVector A reference to an object of type `IScoreVector` that stores the predictions of the rule - */ - void pushRefinement(const Refinement& refinement, const IScoreVector& scoreVector); - - /** - * Keeps track of the best refinements that are stored by a given `FixedRefinementComparator` if they are - * considered as an improvement over the best refinements that have been provided to this comparator. - * - * @param comparator A reference to an object of type `FixedRefinementComparator` that should be merged - * @return True, if at least one of the refinements that are stored by the given `comparator` is - * considered as an improvement over the best refinements that has been provided to this - * comparator - */ - bool merge(FixedRefinementComparator& comparator); -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/refinement_comparator_single.hpp b/cpp/subprojects/common/include/common/rule_refinement/refinement_comparator_single.hpp deleted file mode 100644 index 4d4252f7..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/refinement_comparator_single.hpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_evaluation/rule_compare_function.hpp" -#include "common/rule_refinement/refinement.hpp" -#include "common/rule_refinement/score_processor.hpp" - -/** - * Allows comparing potential refinements of a rule and keeping track of the best one. - */ -class SingleRefinementComparator final { - private: - - const RuleCompareFunction ruleCompareFunction_; - - Refinement bestRefinement_; - - Quality bestQuality_; - - ScoreProcessor scoreProcessor_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that should be - * used for comparing the quality of different rules - */ - SingleRefinementComparator(RuleCompareFunction ruleCompareFunction); - - /** - * @param comparator A reference to an object of type `SingleRefinementComparator` that keeps track of the best - * refinement found so far - */ - SingleRefinementComparator(const SingleRefinementComparator& comparator); - - /** - * An iterator that provides access to the refinements the comparator keeps track of and allows to modify them. - */ - typedef Refinement* iterator; - - /** - * Returns an `iterator` to the beginning of the refinements, starting with the best one. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator to the worst end of the refinements. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns the number of refinements the comparator keeps track of. - * - * @return The number of refinements - */ - uint32 getNumElements() const; - - /** - * Returns whether the quality of a rule's predictions is considered as an improvement over the refinements that - * have been provided to this comparator so far. - * - * @param scoreVector A reference to an object of type `IScoreVector` that stores the quality of the - * predictions - * @return True, if the quality of the given predictions is considered as an improvement, false - * otherwise - */ - bool isImprovement(const IScoreVector& scoreVector) const; - - /** - * Keeps track of a given refinement of a rule that is considered as an improvement over the refinements that - * have been provided to this comparator so far. - * - * @param refinement A reference to an object of type `Refinement` that represents the refinement of the rule - * @param scoreVector A reference to an object of type `IScoreVector` that stores the predictions of the rule - */ - void pushRefinement(const Refinement& refinement, const IScoreVector& scoreVector); - - /** - * Keeps track of the best refinement that is stored by a given `SingleRefinementComparator` if it is considered - * as an improvement over the best refinement that has been provided to this comparator. - * - * @param comparator A reference to an object of type `SingleRefinementComparator` that should be merged - * @return True, if the best refinement that is stored by the given `comparator` is considered as - * an improvement over the best refinement that has been provided to this comparator - */ - bool merge(SingleRefinementComparator& comparator); -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/rule_refinement.hpp b/cpp/subprojects/common/include/common/rule_refinement/rule_refinement.hpp deleted file mode 100644 index fc1e1724..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/rule_refinement.hpp +++ /dev/null @@ -1,34 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_refinement/refinement_comparator_fixed.hpp" -#include "common/rule_refinement/refinement_comparator_single.hpp" - -/** - * Defines an interface for all classes that allow to find the best refinement of existing rules. - */ -class IRuleRefinement { - public: - - virtual ~IRuleRefinement() {}; - - /** - * Finds the best refinement of an existing rule. - * - * @param comparator A reference to an object of type `SingleRefinementComparator` that is used to compare - * the potential refinements - * @param minCoverage The minimum number of examples that must be covered by the refinement - */ - virtual void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) = 0; - - /** - * Finds the best refinements of an existing rule. - * - * @param comparator A reference to an object of type `MultiRefinementComparator` that is used to compare the - * potential refinements - * @param minCoverage The minimum number of examples that must be covered by the refinements - */ - virtual void findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) = 0; -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/rule_refinement_approximate.hpp b/cpp/subprojects/common/include/common/rule_refinement/rule_refinement_approximate.hpp deleted file mode 100644 index 6b24629f..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/rule_refinement_approximate.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/threshold_vector.hpp" -#include "common/rule_refinement/rule_refinement.hpp" -#include "common/rule_refinement/rule_refinement_callback.hpp" -#include "common/statistics/histogram.hpp" - -/** - * Allows to find the best refinements of existing rules, which result from adding a new condition that correspond to a - * certain feature. The thresholds that may be used by the new condition result from the boundaries between the bins - * that have been created using a binning method. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels for which the refined - * rule is allowed to predict - */ -template -class ApproximateRuleRefinement final : public IRuleRefinement { - private: - - const IndexVector& labelIndices_; - - const uint32 numExamples_; - - const uint32 featureIndex_; - - const bool nominal_; - - typedef IRuleRefinementCallback Callback; - - const std::unique_ptr callbackPtr_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides access to the - * indices of the labels for which the refined rule is allowed to predict - * @param numExamples The total number of training examples with non-zero weights that are covered by the - * existing rule - * @param featureIndex The index of the feature, the new condition corresponds to - * @param nominal True, if the feature at index `featureIndex` is nominal, false otherwise - * @param callbackPtr An unique pointer to an object of type `IRuleRefinementCallback` that allows to - * retrieve the information that is required to search for potential refinements - */ - ApproximateRuleRefinement(const IndexVector& labelIndices, uint32 numExamples, uint32 featureIndex, - bool nominal, std::unique_ptr callbackPtr); - - void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) override; - - void findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) override; -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/rule_refinement_callback.hpp b/cpp/subprojects/common/include/common/rule_refinement/rule_refinement_callback.hpp deleted file mode 100644 index e9d84284..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/rule_refinement_callback.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -/** - * Defines an interface for callbacks that may be invoked by subclasses of the the class `IRuleRefinement` in order to - * retrieve the information that is required to search for potential refinements. It consists of statistics, as well as - * a vector that allows to determine the thresholds that may be used by potential conditions. - * - * @tparam Statistics The type of the statistics, - * @tparam Vector The type of the vector that is returned by the callback - */ -template -class IRuleRefinementCallback { - public: - - /** - * The data that is provided via the callback's `get` function. - */ - struct Result final { - public: - - /** - * @param s A reference to an object of template type `Statistics` that should be used to search for - * potential refinements - * @param v A reference to an object of template type `Vector` that should be used to search for - * potential refinements - */ - Result(const Statistics& s, const Vector& v) : statistics(s), vector(v) {} - - /** - * A reference to an object of template type `Statistics` that should be used to search for potential - * refinements. - */ - const Statistics& statistics; - - /** - * A reference to an object of template type `Vector` that should be used to search for potential - * refinements. - */ - const Vector& vector; - }; - - virtual ~IRuleRefinementCallback() {}; - - /** - * Invokes the callback and returns its result. - * - * @return An object of type `Result` that stores references to the statistics and the vector that may be used - * to search for potential refinements - */ - virtual Result get() = 0; -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/rule_refinement_exact.hpp b/cpp/subprojects/common/include/common/rule_refinement/rule_refinement_exact.hpp deleted file mode 100644 index c1e0d0db..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/rule_refinement_exact.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_vector.hpp" -#include "common/rule_refinement/rule_refinement.hpp" -#include "common/rule_refinement/rule_refinement_callback.hpp" -#include "common/statistics/statistics_weighted.hpp" - -/** - * Allows to find the best refinements of existing rules, which result from adding a new condition that correspond to a - * certain feature. The thresholds that may be used by the new condition result from the feature values of all training - * examples for the respective feature. - * - * @tparam IndexVector The type of the vector that provides access to the indices of the labels for which the refined - * rule is allowed to predict - */ -template -class ExactRuleRefinement final : public IRuleRefinement { - private: - - const IndexVector& labelIndices_; - - const uint32 numExamples_; - - const uint32 featureIndex_; - - const bool nominal_; - - const bool hasZeroWeights_; - - typedef IRuleRefinementCallback Callback; - - const std::unique_ptr callbackPtr_; - - public: - - /** - * @param labelIndices A reference to an object of template type `IndexVector` that provides access to the - * indices of the labels for which the refined rule is allowed to predict - * @param numExamples The total number of training examples with non-zero weights that are covered by the - * existing rule - * @param featureIndex The index of the feature, the new condition corresponds to - * @param nominal True, if the feature at index `featureIndex` is nominal, false otherwise - * @param hasZeroWeights True, if some training examples may have zero weights, false otherwise - * @param callbackPtr An unique pointer to an object of type `IRuleRefinementCallback` that allows to - * retrieve the information that is required to search for potential refinements - */ - ExactRuleRefinement(const IndexVector& labelIndices, uint32 numExamples, uint32 featureIndex, bool nominal, - bool hasZeroWeights, std::unique_ptr callbackPtr); - - void findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) override; - - void findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) override; -}; diff --git a/cpp/subprojects/common/include/common/rule_refinement/score_processor.hpp b/cpp/subprojects/common/include/common/rule_refinement/score_processor.hpp deleted file mode 100644 index 788fa20c..00000000 --- a/cpp/subprojects/common/include/common/rule_refinement/score_processor.hpp +++ /dev/null @@ -1,71 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" -#include "common/rule_evaluation/score_vector_binned_dense.hpp" -#include "common/rule_evaluation/score_vector_dense.hpp" -#include "common/rule_refinement/prediction_evaluated.hpp" - -/** - * Allows to process the scores that are stored by an `IScoreVector` in order to convert them into the head of a rule, - * represented by an `AbstractEvaluatedPrediction`. - */ -class ScoreProcessor { - private: - - std::unique_ptr& headPtr_; - - public: - - /** - * @param headPtr A reference to an unique pointer of type `AbstractEvaluatedPrediction` that should be used - * to store the rule head that is created by the processor - */ - ScoreProcessor(std::unique_ptr& headPtr); - - /** - * Processes the scores that are stored by a `DenseScoreVector` in order to convert them - * into the head of a rule. - * - * @param scoreVector A reference to an object of type `DenseScoreVector` that stores the - * scores to be processed - */ - void processScores(const DenseScoreVector& scoreVector); - - /** - * Processes the scores that are stored by a `DenseScoreVector` in order to convert them - * into the head of a rule. - * - * @param scoreVector A reference to an object of type `DenseScoreVector` that stores the - * scores to be processed - */ - void processScores(const DenseScoreVector& scoreVector); - - /** - * Processes the scores that are stored by a `DenseBinnedScoreVector` in order to convert - * them into the head of a rule. - * - * @param scoreVector A reference to an object of type `DenseBinnedScoreVector` that stores - * the scores to be processed - */ - void processScores(const DenseBinnedScoreVector& scoreVector); - - /** - * Processes the scores that are stored by a `DenseBinnedScoreVector` in order to convert - * them into the head of a rule. - * - * @param scoreVector A reference to an object of type `DenseBinnedScoreVector` that stores - * the scores to be processed - */ - void processScores(const DenseBinnedScoreVector& scoreVector); - - /** - * Processes the scores that are stored by a `IScoreVector` in order to convert them into the head of a rule. - * - * @param scoreVector A reference to an object of type `IScoreVector` that stores the scores to be processed - */ - void processScores(const IScoreVector& scoreVector); -}; diff --git a/cpp/subprojects/common/include/common/sampling/feature_sampling.hpp b/cpp/subprojects/common/include/common/sampling/feature_sampling.hpp deleted file mode 100644 index 0d9382ad..00000000 --- a/cpp/subprojects/common/include/common/sampling/feature_sampling.hpp +++ /dev/null @@ -1,80 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector.hpp" -#include "common/input/feature_matrix.hpp" -#include "common/sampling/random.hpp" - -#include - -/** - * Defines an interface for all classes that implement a method for sampling features. - */ -class IFeatureSampling { - public: - - virtual ~IFeatureSampling() {}; - - /** - * Creates and returns a sample of the available features. - * - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - * @return A reference to an object of type `IIndexVector` that provides access to the indices of the - * features that are contained in the sample - */ - virtual const IIndexVector& sample(RNG& rng) = 0; - - /** - * Creates and returns a new object of type `IFeatureSampling` that is suited for use during a beam search. - * - * @param rng A reference to an object of type `RNG`, implementing the random number generator be used - * @param resample True, if a new sample of the available features should be created whenever the sampling - * method is invoked during the beam search, false otherwise - * @return An unique pointer to an object of type `IFeatureSampling` that has been created - */ - virtual std::unique_ptr createBeamSearchFeatureSampling(RNG& rng, bool resample) = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IFeatureSampling`. - */ -class IFeatureSamplingFactory { - public: - - virtual ~IFeatureSamplingFactory() {}; - - /** - * Creates and returns a new object of type `IFeatureSampling`. - * - * @return An unique pointer to an object of type `IFeatureSampling` that has been created - */ - virtual std::unique_ptr create() const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a method for sampling features. - */ -class IFeatureSamplingConfig { - public: - - virtual ~IFeatureSamplingConfig() {}; - - /** - * Creates and returns a new object of type `IFeatureSamplingFactory` according to the specified configuration. - * - * @param featureMatrix A reference to an object of type `IFeatureMatrix` that provides access to the features - * of the training examples - * @return An unique pointer to an object of type `IFeatureSamplingFactory` that has been created - */ - virtual std::unique_ptr createFeatureSamplingFactory( - const IFeatureMatrix& featureMatrix) const = 0; - - /** - * Returns whether feature sampling is used or not. - * - * @return True, if feature sampling is used, false otherwise - */ - virtual bool isSamplingUsed() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/sampling/feature_sampling_no.hpp b/cpp/subprojects/common/include/common/sampling/feature_sampling_no.hpp deleted file mode 100644 index 67a75ea4..00000000 --- a/cpp/subprojects/common/include/common/sampling/feature_sampling_no.hpp +++ /dev/null @@ -1,18 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/feature_sampling.hpp" - -/** - * Allows to configure a method for sampling features that does not perform any sampling, but includes all features. - */ -class NoFeatureSamplingConfig final : public IFeatureSamplingConfig { - public: - - std::unique_ptr createFeatureSamplingFactory( - const IFeatureMatrix& featureMatrix) const override; - - bool isSamplingUsed() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/feature_sampling_predefined.hpp b/cpp/subprojects/common/include/common/sampling/feature_sampling_predefined.hpp deleted file mode 100644 index f434cfcc..00000000 --- a/cpp/subprojects/common/include/common/sampling/feature_sampling_predefined.hpp +++ /dev/null @@ -1,27 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/feature_sampling.hpp" - -/** - * An implementation of the class `IFeatureSampling` that does not perform any sampling, but always returns a predefined - * set of features. - */ -class PredefinedFeatureSampling final : public IFeatureSampling { - private: - - const IIndexVector& indexVector_; - - public: - - /** - * @param indexVector A reference to an object of type `IIndexVector` that stores predefined feature indices - */ - PredefinedFeatureSampling(const IIndexVector& indexVector); - - const IIndexVector& sample(RNG& rng) override; - - std::unique_ptr createBeamSearchFeatureSampling(RNG& rng, bool resample) override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/feature_sampling_without_replacement.hpp b/cpp/subprojects/common/include/common/sampling/feature_sampling_without_replacement.hpp deleted file mode 100644 index c8158700..00000000 --- a/cpp/subprojects/common/include/common/sampling/feature_sampling_without_replacement.hpp +++ /dev/null @@ -1,80 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/sampling/feature_sampling.hpp" - -/** - * Defines an interface for all classes that allow to configure a method for sampling features without replacement. - */ -class MLRLCOMMON_API IFeatureSamplingWithoutReplacementConfig { - public: - - virtual ~IFeatureSamplingWithoutReplacementConfig() {}; - - /** - * Returns the fraction of features that are included in a sample. - * - * @return The fraction of features that are included in a sample - */ - virtual float32 getSampleSize() const = 0; - - /** - * Sets the fraction of features that should be included in a sample. - * - * @param sampleSize The fraction of features that should be included in a sample, e.g., a value of 0.6 - * corresponds to 60 % of the available features. Must be in (0, 1) or 0, if the default - * sample size `floor(log2(numFeatures - 1) + 1)` should be used - * @return A reference to an object of type `IFeatureSamplingWithoutReplacementConfig` that allows - * further configuration of the method for sampling features - */ - virtual IFeatureSamplingWithoutReplacementConfig& setSampleSize(float32 sampleSize) = 0; - - /** - * Returns the number of trailing features that are always included in a sample. - * - * @return The number of trailing features that are always included in a sample - */ - virtual uint32 getNumRetained() const = 0; - - /** - * Sets the number fo trailing features that should always be included in a sample. - * - * @param numRetained The number of trailing features that should always be included in a sample. Must be at - * least 0 - * @return A reference to an object of type `IFeatureSamplingWithoutReplacementConfig` that allows - * further configuration of the method for sampling features - */ - virtual IFeatureSamplingWithoutReplacementConfig& setNumRetained(uint32 numRetained) = 0; -}; - -/** - * Allows to configure a method for sampling features without replacement. - */ -class FeatureSamplingWithoutReplacementConfig final : public IFeatureSamplingConfig, - public IFeatureSamplingWithoutReplacementConfig { - private: - - float32 sampleSize_; - - uint32 numRetained_; - - public: - - FeatureSamplingWithoutReplacementConfig(); - - float32 getSampleSize() const override; - - IFeatureSamplingWithoutReplacementConfig& setSampleSize(float32 sampleSize) override; - - uint32 getNumRetained() const override; - - IFeatureSamplingWithoutReplacementConfig& setNumRetained(uint32 numRetained) override; - - std::unique_ptr createFeatureSamplingFactory( - const IFeatureMatrix& featureMatrix) const override; - - bool isSamplingUsed() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/instance_sampling.hpp b/cpp/subprojects/common/include/common/sampling/instance_sampling.hpp deleted file mode 100644 index dfa1b862..00000000 --- a/cpp/subprojects/common/include/common/sampling/instance_sampling.hpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/label_matrix_c_contiguous.hpp" -#include "common/input/label_matrix_csr.hpp" -#include "common/sampling/random.hpp" -#include "common/sampling/weight_vector.hpp" -#include "common/statistics/statistics.hpp" - -#include - -// Forward declarations -class BiPartition; -class SinglePartition; - -/** - * Defines an interface for all classes that implement a method for sampling training examples. - */ -class IInstanceSampling { - public: - - virtual ~IInstanceSampling() {}; - - /** - * Creates and returns a sample of the available training examples. - * - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - * @return A reference to an object type `WeightVector` that provides access to the weights of the - * individual training examples - */ - virtual const IWeightVector& sample(RNG& rng) = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IInstanceSampling`. - */ -class IInstanceSamplingFactory { - public: - - virtual ~IInstanceSamplingFactory() {}; - - /** - * Creates and returns a new object of type `IInstanceSampling`. - * - * @param labelMatrix A reference to an object of type `CContiguousLabelMatrix` that provides access to the - * labels of the training examples - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that are included in the training set - * @param statistics A reference to an object of type `IStatistics` that provides access to the statistics + - * which serve as a basis for learning rules - * @return An unique pointer to an object of type `IInstanceSampling` that has been created - */ - virtual std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, - const SinglePartition& partition, - IStatistics& statistics) const = 0; - - /** - * Creates and returns a new object of type `IInstanceSampling`. - * - * @param labelMatrix A reference to an object of type `CContiguousLabelMatrix` that provides access to the - * labels of the training examples - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that are included in the training set and the holdout set, - * respectively - * @param statistics A reference to an object of type `IStatistics` that provides access to the statistics + - * which serve as a basis for learning rules - * @return An unique pointer to an object of type `IInstanceSampling` that has been created - */ - virtual std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, - BiPartition& partition, IStatistics& statistics) const = 0; - - /** - * Creates and returns a new object of type `IInstanceSampling`. - * - * @param labelMatrix A reference to an object of type `CsrLabelMatrix` that provides access to the labels of - * the training examples - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that are included in the training set - * @param statistics A reference to an object of type `IStatistics` that provides access to the statistics + - * which serve as a basis for learning rules - * @return An unique pointer to an object of type `IInstanceSampling` that has been created - */ - virtual std::unique_ptr create(const CsrLabelMatrix& labelMatrix, - const SinglePartition& partition, - IStatistics& statistics) const = 0; - - /** - * Creates and returns a new object of type `IInstanceSampling`. - * - * @param labelMatrix A reference to an object of type `CsrLabelMatrix` that provides access to the labels of - * the training examples - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that are included in the training set and the holdout set, - * respectively - * @param statistics A reference to an object of type `IStatistics` that provides access to the statistics + - * which serve as a basis for learning rules - * @return An unique pointer to an object of type `IInstanceSampling` that has been created - */ - virtual std::unique_ptr create(const CsrLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a method for sampling instances. - */ -class IInstanceSamplingConfig { - public: - - virtual ~IInstanceSamplingConfig() {}; - - /** - * Creates and returns a new object of type `IInstanceSamplingFactory` according to the specified configuration. - * - * @return An unique pointer to an object of type `IInstanceSamplingFactory` that has been created - */ - virtual std::unique_ptr createInstanceSamplingFactory() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/sampling/instance_sampling_no.hpp b/cpp/subprojects/common/include/common/sampling/instance_sampling_no.hpp deleted file mode 100644 index 480b25de..00000000 --- a/cpp/subprojects/common/include/common/sampling/instance_sampling_no.hpp +++ /dev/null @@ -1,16 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/instance_sampling.hpp" - -/** - * Allows to configure a method for sampling training examples that does not perform any sampling, but assigns equal - * weights to all examples. - */ -class NoInstanceSamplingConfig final : public IInstanceSamplingConfig { - public: - - std::unique_ptr createInstanceSamplingFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/instance_sampling_stratified_example_wise.hpp b/cpp/subprojects/common/include/common/sampling/instance_sampling_stratified_example_wise.hpp deleted file mode 100644 index 8a363446..00000000 --- a/cpp/subprojects/common/include/common/sampling/instance_sampling_stratified_example_wise.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * @author Anna Kulischkin (Anna_Kulischkin@web.de) - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/sampling/instance_sampling.hpp" - -/** - * Defines an interface for all classes that allow to configure a method for selecting a subset of the available - * training examples using stratification, where distinct label vectors are treated as individual classes. - */ -class MLRLCOMMON_API IExampleWiseStratifiedInstanceSamplingConfig { - public: - - virtual ~IExampleWiseStratifiedInstanceSamplingConfig() {}; - - /** - * Returns the fraction of examples that are included in a sample. - * - * @return The fraction of examples that are included in a sample - */ - virtual float32 getSampleSize() const = 0; - - /** - * Sets the fraction of examples that should be included in a sample. - * - * @param sampleSize The fraction of examples that should be included in a sample, e.g., a value of 0.6 - * corresponds to 60 % of the available training examples. Must be in (0, 1) - * @return A reference to an object of type `IExampleWiseStratifiedInstanceSamplingConfig` that - * allows further configuration of the method for sampling instances - */ - virtual IExampleWiseStratifiedInstanceSamplingConfig& setSampleSize(float32 sampleSize) = 0; -}; - -/** - * Allows to configure a method for selecting a subset of the available training examples using stratification, where - * distinct label vectors are treated as individual classes. - */ -class ExampleWiseStratifiedInstanceSamplingConfig final : public IInstanceSamplingConfig, - public IExampleWiseStratifiedInstanceSamplingConfig { - private: - - float32 sampleSize_; - - public: - - ExampleWiseStratifiedInstanceSamplingConfig(); - - float32 getSampleSize() const override; - - IExampleWiseStratifiedInstanceSamplingConfig& setSampleSize(float32 sampleSize) override; - - std::unique_ptr createInstanceSamplingFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/instance_sampling_stratified_label_wise.hpp b/cpp/subprojects/common/include/common/sampling/instance_sampling_stratified_label_wise.hpp deleted file mode 100644 index 75884389..00000000 --- a/cpp/subprojects/common/include/common/sampling/instance_sampling_stratified_label_wise.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * @author Anna Kulischkin (Anna_Kulischkin@web.de) - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/sampling/instance_sampling.hpp" - -/** - * Defines an interface for all classes that allow to configure a method for selecting a subset of the available - * training examples using stratification, such that for each label the proportion of relevant and irrelevant examples - * is maintained. - */ -class MLRLCOMMON_API ILabelWiseStratifiedInstanceSamplingConfig { - public: - - virtual ~ILabelWiseStratifiedInstanceSamplingConfig() {}; - - /** - * Returns the fraction of examples that are included in a sample. - * - * @return The fraction of examples that are included in a sample - */ - virtual float32 getSampleSize() const = 0; - - /** - * Sets the fraction of examples that should be included in a sample. - * - * @param sampleSize The fraction of examples that should be included in a sample, e.g., a value of 0.6 - * corresponds to 60 % of the available training examples. Must be in (0, 1) - * @return A reference to an object of type `ILabelWiseStratifiedInstanceSamplingConfig` that - * allows further configuration of the method for sampling instances - */ - virtual ILabelWiseStratifiedInstanceSamplingConfig& setSampleSize(float32 sampleSize) = 0; -}; - -/** - * Allows to configure a method for selecting a subset of the available training examples using stratification, such - * that for each label the proportion of relevant and irrelevant examples is maintained. - */ -class LabelWiseStratifiedInstanceSamplingConfig final : public IInstanceSamplingConfig, - public ILabelWiseStratifiedInstanceSamplingConfig { - private: - - float32 sampleSize_; - - public: - - LabelWiseStratifiedInstanceSamplingConfig(); - - float32 getSampleSize() const override; - - ILabelWiseStratifiedInstanceSamplingConfig& setSampleSize(float32 sampleSize) override; - - std::unique_ptr createInstanceSamplingFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/instance_sampling_with_replacement.hpp b/cpp/subprojects/common/include/common/sampling/instance_sampling_with_replacement.hpp deleted file mode 100644 index 43ee6bce..00000000 --- a/cpp/subprojects/common/include/common/sampling/instance_sampling_with_replacement.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/sampling/instance_sampling.hpp" - -/** - * Defines an interface for all classes that allow to configure a method for selecting a subset of the available - * training examples with replacement. - */ -class MLRLCOMMON_API IInstanceSamplingWithReplacementConfig { - public: - - virtual ~IInstanceSamplingWithReplacementConfig() {}; - - /** - * Returns the fraction of examples that are included in a sample. - * - * @return The fraction of examples that are included in a sample - */ - virtual float32 getSampleSize() const = 0; - - /** - * Sets the fraction of examples that should be included in a sample. - * - * @param sampleSize The fraction of examples that should be included in a sample, e.g., a value of 0.6 - * corresponds to 60 % of the available training examples. Must be in (0, 1) - * @return A reference to an object of type `IInstanceSamplingWithoutReplacementConfig` that allows - * further configuration of the method for sampling instances - */ - virtual IInstanceSamplingWithReplacementConfig& setSampleSize(float32 sampleSize) = 0; -}; - -/** - * Allows to configure a method for selecting a subset of the available training examples with replacement. - */ -class InstanceSamplingWithReplacementConfig final : public IInstanceSamplingConfig, - public IInstanceSamplingWithReplacementConfig { - private: - - float32 sampleSize_; - - public: - - InstanceSamplingWithReplacementConfig(); - - float32 getSampleSize() const override; - - IInstanceSamplingWithReplacementConfig& setSampleSize(float32 sampleSize) override; - - std::unique_ptr createInstanceSamplingFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/instance_sampling_without_replacement.hpp b/cpp/subprojects/common/include/common/sampling/instance_sampling_without_replacement.hpp deleted file mode 100644 index 1736f06a..00000000 --- a/cpp/subprojects/common/include/common/sampling/instance_sampling_without_replacement.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/sampling/instance_sampling.hpp" - -/** - * Defines an interface for all classes that allow to configure a method for selecting a subset of the available - * training examples without replacement. - */ -class MLRLCOMMON_API IInstanceSamplingWithoutReplacementConfig { - public: - - virtual ~IInstanceSamplingWithoutReplacementConfig() {}; - - /** - * Returns the fraction of examples that are included in a sample. - * - * @return The fraction of examples that are included in a sample - */ - virtual float32 getSampleSize() const = 0; - - /** - * Sets the fraction of examples that should be included in a sample. - * - * @param sampleSize The fraction of examples that should be included in a sample, e.g., a value of 0.6 - * corresponds to 60 % of the available training examples. Must be in (0, 1) - * @return A reference to an object of type `IInstanceSamplingWithoutReplacementConfig` that allows - * further configuration of the method for sampling instances - */ - virtual IInstanceSamplingWithoutReplacementConfig& setSampleSize(float32 sampleSize) = 0; -}; - -/** - * Allows to configure a method for selecting a subset of the available training examples without replacement. - */ -class InstanceSamplingWithoutReplacementConfig final : public IInstanceSamplingConfig, - public IInstanceSamplingWithoutReplacementConfig { - private: - - float32 sampleSize_; - - public: - - InstanceSamplingWithoutReplacementConfig(); - - float32 getSampleSize() const override; - - IInstanceSamplingWithoutReplacementConfig& setSampleSize(float32 sampleSize) override; - - std::unique_ptr createInstanceSamplingFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/label_sampling.hpp b/cpp/subprojects/common/include/common/sampling/label_sampling.hpp deleted file mode 100644 index 62fcd11d..00000000 --- a/cpp/subprojects/common/include/common/sampling/label_sampling.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector.hpp" -#include "common/input/label_matrix.hpp" -#include "common/sampling/random.hpp" - -#include - -/** - * Defines an interface for all classes that implement a method for sampling labels. - */ -class ILabelSampling { - public: - - virtual ~ILabelSampling() {}; - - /** - * Creates and returns a sample of the available labels. - * - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - * @return A reference to an object of type `IIndexVector` that provides access to the indices of the - * labels that are contained in the sample - */ - virtual const IIndexVector& sample(RNG& rng) = 0; -}; - -/** - * Defines an interface for all factories that allow to create objects of type `ILabelSampling`. - */ -class ILabelSamplingFactory { - public: - - virtual ~ILabelSamplingFactory() {}; - - /** - * Creates and returns a new object of type `ILabelSampling`. - * - * @return An unique pointer to an object of type `ILabelSampling` that has been created - */ - virtual std::unique_ptr create() const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a method for sampling labels. - */ -class ILabelSamplingConfig { - public: - - virtual ~ILabelSamplingConfig() {}; - - /** - * Creates and returns a new object of type `ILabelSamplingFactory` according to the specified configuration. - * - * @param labelMatrix A reference to an object of type `ILabelMatrix` that provides access to the labels of - * the training examples - * @return An unique pointer to an object of type `ILabelSamplingFactory` that has been created - */ - virtual std::unique_ptr createLabelSamplingFactory( - const ILabelMatrix& labelMatrix) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/sampling/label_sampling_no.hpp b/cpp/subprojects/common/include/common/sampling/label_sampling_no.hpp deleted file mode 100644 index f4617cc3..00000000 --- a/cpp/subprojects/common/include/common/sampling/label_sampling_no.hpp +++ /dev/null @@ -1,16 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/label_sampling.hpp" - -/** - * Allows to configure a method for sampling labels that does not perform any sampling, but includes all labels. - */ -class NoLabelSamplingConfig final : public ILabelSamplingConfig { - public: - - std::unique_ptr createLabelSamplingFactory( - const ILabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/label_sampling_round_robin.hpp b/cpp/subprojects/common/include/common/sampling/label_sampling_round_robin.hpp deleted file mode 100644 index a6f59df2..00000000 --- a/cpp/subprojects/common/include/common/sampling/label_sampling_round_robin.hpp +++ /dev/null @@ -1,17 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/sampling/label_sampling.hpp" - -/** - * Allows to configure a method for sampling labels in a round-robin fashion. - */ -class RoundRobinLabelSamplingConfig final : public ILabelSamplingConfig { - public: - - std::unique_ptr createLabelSamplingFactory( - const ILabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/label_sampling_without_replacement.hpp b/cpp/subprojects/common/include/common/sampling/label_sampling_without_replacement.hpp deleted file mode 100644 index 1094fd45..00000000 --- a/cpp/subprojects/common/include/common/sampling/label_sampling_without_replacement.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/sampling/label_sampling.hpp" - -/** - * Defines an interface for all classes that allow to configure a method for sampling labels without replacement. - */ -class MLRLCOMMON_API ILabelSamplingWithoutReplacementConfig { - public: - - virtual ~ILabelSamplingWithoutReplacementConfig() {}; - - /** - * Returns the number of labels that are included in a sample. - * - * @return The number of labels that are included in a sample - */ - virtual uint32 getNumSamples() const = 0; - - /** - * Sets the number of labels that should be included in a sample. - * - * @param numSamples The number of labels that should be included in a sample. Must be at least 1 - * @return A reference to an object of type `ILabelSamplingWithoutReplacementConfig` that allows - * further configuration of the method for sampling labels - */ - virtual ILabelSamplingWithoutReplacementConfig& setNumSamples(uint32 numSamples) = 0; -}; - -/** - * Allows to configure a method for sampling labels without replacement. - */ -class LabelSamplingWithoutReplacementConfig final : public ILabelSamplingConfig, - public ILabelSamplingWithoutReplacementConfig { - private: - - uint32 numSamples_; - - public: - - LabelSamplingWithoutReplacementConfig(); - - uint32 getNumSamples() const override; - - ILabelSamplingWithoutReplacementConfig& setNumSamples(uint32 numSamples) override; - - std::unique_ptr createLabelSamplingFactory( - const ILabelMatrix& labelMatrix) const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/partition.hpp b/cpp/subprojects/common/include/common/sampling/partition.hpp deleted file mode 100644 index ab938ef4..00000000 --- a/cpp/subprojects/common/include/common/sampling/partition.hpp +++ /dev/null @@ -1,119 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/util/quality.hpp" - -#include - -// Forward declarations -class IStoppingCriterion; -class IStoppingCriterionFactory; -class IInstanceSampling; -class IInstanceSamplingFactory; -class IRowWiseLabelMatrix; -class IStatistics; -class IThresholdsSubset; -class ICoverageState; -class AbstractPrediction; -class IMarginalProbabilityCalibrationModel; -class IMarginalProbabilityCalibrator; -class IJointProbabilityCalibrationModel; -class IJointProbabilityCalibrator; - -/** - * Defines an interface for all classes that provide access to the indices of training examples that have been split - * into a training set and a holdout set. - */ -class IPartition { - public: - - virtual ~IPartition() {}; - - /** - * Creates and returns a new instance of the class `IStoppingCriterion`, based on the type of this partition. - * - * @param factory A reference to an object of type `IStoppingCriterionFactory` that should be used to create - * the instance - * @return An unique pointer to an object of type `IStoppingCriterion` that has been created - */ - virtual std::unique_ptr createStoppingCriterion( - const IStoppingCriterionFactory& factory) = 0; - - /** - * Creates and returns a new instance of the class `IInstanceSampling`, based on the type of this partition. - * - * @param factory A reference to an object of type `IInstanceSamplingFactory` that should be used to - * create the instance - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise access to - * the labels of individual training examples - * @param statistics A reference to an object of type `IStatistics` that provides access to the statistics - * which serve as a basis for learning rules - * @return An unique pointer to an object of type `IInstanceSampling` that has been created - */ - virtual std::unique_ptr createInstanceSampling(const IInstanceSamplingFactory& factory, - const IRowWiseLabelMatrix& labelMatrix, - IStatistics& statistics) = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sample and are marked as covered according to a given object of type - * `ICoverageState`. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * evaluate the prediction - * @param coverageState A reference to an object of type `ICoverageState` that keeps track of the examples - * that are covered by the rule - * @param head A reference to an object of type `AbstractPrediction` that stores the scores that - * are predicted by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, - const ICoverageState& coverageState, const AbstractPrediction& head) = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `ICoverageState`. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * recalculate the prediction - * @param coverageState A reference to an object of type `ICoverageState` that keeps track of the examples - * that are covered by the rule - * @param head A reference to an object of type `AbstractPrediction` to be updated - */ - virtual void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, - const ICoverageState& coverageState, AbstractPrediction& head) = 0; - - /** - * Fits and returns a model for the calibration of marginal probabilities, based on the type of this partition. - * - * @param probabilityCalibrator A reference to an object of type `IMarginalProbabilityCalibrator` that should be - * used to fit the calibration model - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise - * access to the labels of the training examples - * @param statistics A reference to an object of type `IStatistics` that provides access to - * statistics about the labels of the training examples - * @return An unique pointer to an object of type `IMarginalProbabilityCalibrationModel` - * that has been fit - */ - virtual std::unique_ptr fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) = 0; - - /** - * Fits and returns a model for the calibration of joint probabilities, based on the type of this partition. - * - * @param probabilityCalibrator A reference to an object of type `IJointProbabilityCalibrator` that should be - * used to fit the calibration model - * @param labelMatrix A reference to an object of type `IRowWiseLabelMatrix` that provides row-wise - * access to the labels of the training examples - * @param statistics A reference to an object of type `IStatistics` that provides access to - * statistics about the labels of the training examples - * @return An unique pointer to an object of type `IJointProbabilityCalibrationModel` that - * has been fit - */ - virtual std::unique_ptr fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) = 0; -}; diff --git a/cpp/subprojects/common/include/common/sampling/partition_bi.hpp b/cpp/subprojects/common/include/common/sampling/partition_bi.hpp deleted file mode 100644 index 9577dfee..00000000 --- a/cpp/subprojects/common/include/common/sampling/partition_bi.hpp +++ /dev/null @@ -1,150 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_dense.hpp" -#include "common/sampling/instance_sampling.hpp" -#include "common/sampling/partition.hpp" - -/** - * An implementation of the class `IPartition` that provides random access to the indices of elements that are included - * two, mutually exclusive, sets. - */ -class BiPartition final : public IPartition { - private: - - DenseVector vector_; - - const uint32 numFirst_; - - bool firstSorted_; - - bool secondSorted_; - - public: - - /** - * @param numFirst The number of elements that are contained by the first set - * @param numSecond The number of elements that are contained by the second set - */ - BiPartition(uint32 numFirst, uint32 numSecond); - - /** - * An iterator that provides access to the indices that are contained by the first or second set and allows to - * modify them. - */ - typedef DenseVector::iterator iterator; - - /** - * An iterator that provides read-only access to the indices that are contained in the first or second set. - */ - typedef DenseVector::const_iterator const_iterator; - - /** - * Returns an `iterator` to the beginning of the elements that are contained by the first set. - * - * @return An `iterator` to the beginning of the first set - */ - iterator first_begin(); - - /** - * Returns an `iterator` to the end of the elements that are contained by the first set. - * - * @return An `iterator` to the end of the first set - */ - iterator first_end(); - - /** - * Returns a `const_iterator` to the beginning of the elements that are contained by the first set. - * - * @return A `const_iterator` to the beginning of the first set - */ - const_iterator first_cbegin() const; - - /** - * Returns a `const_iterator` to the end of the elements that are contained by the first set. - * - * @return A `const_iterator` to the end of the first set - */ - const_iterator first_cend() const; - - /** - * Returns an `iterator` to the beginning of the elements that are contained by the second set. - * - * @return An `iterator` to the beginning of the second set - */ - iterator second_begin(); - - /** - * Returns an `iterator` to the beginning of the elements that are contained by the second set. - * - * @return An `iterator` to the beginning of the second set - */ - iterator second_end(); - - /** - * Returns a `const_iterator` to the beginning of the elements that are contained by the second set. - * - * @return A `const_iterator` to the beginning of the second set - */ - const_iterator second_cbegin() const; - - /** - * Returns a `const_iterator` to the end of the elements that are contained by the second set. - * - * @return A `const_iterator` to the end of the second set - */ - const_iterator second_cend() const; - - /** - * Returns the number of elements that are contained by the first set. - * - * @return The number of elements that are contained by the first set - */ - uint32 getNumFirst() const; - - /** - * Returns the number of elements that are contained by the second set. - * - * @return The number of elements that are contained by the second set - */ - uint32 getNumSecond() const; - - /** - * Sorts the elements that are contained by the first set in increasing order. - */ - void sortFirst(); - - /** - * Sorts the elements that are contained by the second set in increasing order. - */ - void sortSecond(); - - /** - * Returns the total number of elements. - * - * @return The total number of elements - */ - uint32 getNumElements() const; - - std::unique_ptr createStoppingCriterion(const IStoppingCriterionFactory& factory) override; - - std::unique_ptr createInstanceSampling(const IInstanceSamplingFactory& factory, - const IRowWiseLabelMatrix& labelMatrix, - IStatistics& statistics) override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, - const AbstractPrediction& head) override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, - AbstractPrediction& head) override; - - std::unique_ptr fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) override; - - std::unique_ptr fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/partition_sampling.hpp b/cpp/subprojects/common/include/common/sampling/partition_sampling.hpp deleted file mode 100644 index 91bd823c..00000000 --- a/cpp/subprojects/common/include/common/sampling/partition_sampling.hpp +++ /dev/null @@ -1,75 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/label_matrix_c_contiguous.hpp" -#include "common/input/label_matrix_csr.hpp" -#include "common/sampling/partition.hpp" -#include "common/sampling/random.hpp" - -#include - -/** - * Defines an interface for all classes that implement a method for partitioning the available training examples into a - * training set and a holdout set. - */ -class IPartitionSampling { - public: - - virtual ~IPartitionSampling() {}; - - /** - * Creates and returns a partition of the available training examples. - * - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - * @return A reference to an object of type `IPartition` that provides access to the indices of the - * training examples that belong to the training set and holdout set, respectively - */ - virtual IPartition& partition(RNG& rng) = 0; -}; - -/** - * Defines an interface for all factories that allow to create objects of type `IPartitionSampling`. - */ -class IPartitionSamplingFactory { - public: - - virtual ~IPartitionSamplingFactory() {}; - - /** - * Creates and returns a new object of type `IPartitionSampling`. - * - * @param labelMatrix A reference to an object of type `CContiguousLabelMatrix` that provides random access to - * the labels of the training examples - * @return An unique pointer to an object of type `IPartitionSampling` that has been created - */ - virtual std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix) const = 0; - - /** - * Creates and returns a new object of type `IPartitionSampling`. - * - * @param labelMatrix A reference to an object of type `CsrLabelMatrix` that provides row-wise access to the - * labels of the training examples - * @return An unique pointer to an object of type `IPartitionSampling` that has been created - */ - virtual std::unique_ptr create(const CsrLabelMatrix& labelMatrix) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a method for partitioning the available training - * examples into a training set and a holdout set. - */ -class IPartitionSamplingConfig { - public: - - virtual ~IPartitionSamplingConfig() {}; - - /** - * Creates and returns a new object of type `IPartitionSamplingFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `IPartitionSamplingFactory` that has been created - */ - virtual std::unique_ptr createPartitionSamplingFactory() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/sampling/partition_sampling_bi_random.hpp b/cpp/subprojects/common/include/common/sampling/partition_sampling_bi_random.hpp deleted file mode 100644 index f6199c5d..00000000 --- a/cpp/subprojects/common/include/common/sampling/partition_sampling_bi_random.hpp +++ /dev/null @@ -1,57 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/sampling/partition_sampling.hpp" - -/** - * Defines an interface for all classes that allow to configure a method for partitioning the available training - * examples into a training set and a holdout set that randomly splits the training examples into two mutually exclusive - * sets. - */ -class MLRLCOMMON_API IRandomBiPartitionSamplingConfig { - public: - - virtual ~IRandomBiPartitionSamplingConfig() {}; - - /** - * Returns the fraction of examples that are included in the holdout set. - * - * @return The fraction of examples that are included in the holdout set - */ - virtual float32 getHoldoutSetSize() const = 0; - - /** - * Sets the fraction of examples that should be included in the holdout set. - * - * @param holdoutSetSize The fraction of examples that should be included in the holdout set, e.g. a value of - * 0.6 corresponds to 60 % of the available examples. Must be in (0, 1) - * @return A reference to an object of type `IRandomBiPartitionSamplingConfig` that allows - * further configuration of the method for partitioning the available training examples - * into a training set and a holdout set - */ - virtual IRandomBiPartitionSamplingConfig& setHoldoutSetSize(float32 holdoutSetSize) = 0; -}; - -/** - * Allows to configure a method for partitioning the available training examples into a training set and a holdout set - * that randomly splits the training examples into two mutually exclusive sets. - */ -class RandomBiPartitionSamplingConfig final : public IPartitionSamplingConfig, - public IRandomBiPartitionSamplingConfig { - private: - - float32 holdoutSetSize_; - - public: - - RandomBiPartitionSamplingConfig(); - - float32 getHoldoutSetSize() const override; - - IRandomBiPartitionSamplingConfig& setHoldoutSetSize(float32 holdoutSetSize) override; - - std::unique_ptr createPartitionSamplingFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/partition_sampling_bi_stratified_example_wise.hpp b/cpp/subprojects/common/include/common/sampling/partition_sampling_bi_stratified_example_wise.hpp deleted file mode 100644 index 15d68708..00000000 --- a/cpp/subprojects/common/include/common/sampling/partition_sampling_bi_stratified_example_wise.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/partition_sampling.hpp" - -/** - * Defines an interface for all classes that allow to configure a method for partitioning the available training - * examples into a training set and a holdout set using stratification, where distinct label vectors are treated as - * individual classes. - */ -class MLRLCOMMON_API IExampleWiseStratifiedBiPartitionSamplingConfig { - public: - - virtual ~IExampleWiseStratifiedBiPartitionSamplingConfig() {}; - - /** - * Returns the fraction of examples that are included in the holdout set. - * - * @return The fraction of examples that are included in the holdout set - */ - virtual float32 getHoldoutSetSize() const = 0; - - /** - * Sets the fraction of examples that should be included in the holdout set. - * - * @param holdoutSetSize The fraction of examples that should be included in the holdout set, e.g. a value of - * 0.6 corresponds to 60 % of the available examples. Must be in (0, 1) - * @return A reference to an object of type `IExampleWiseStratifiedBiPartitionSamplingConfig` - * that allows further configuration of the method for partitioning the available - * training examples into a training set and a holdout set - */ - virtual IExampleWiseStratifiedBiPartitionSamplingConfig& setHoldoutSetSize(float32 holdoutSetSize) = 0; -}; - -/** - * Allows to configure a method for partitioning the available training examples into a training set and a holdout set - * using stratification, where distinct label vectors are treated as individual classes. - */ -class ExampleWiseStratifiedBiPartitionSamplingConfig final : public IPartitionSamplingConfig, - public IExampleWiseStratifiedBiPartitionSamplingConfig { - private: - - float32 holdoutSetSize_; - - public: - - ExampleWiseStratifiedBiPartitionSamplingConfig(); - - float32 getHoldoutSetSize() const override; - - IExampleWiseStratifiedBiPartitionSamplingConfig& setHoldoutSetSize(float32 holdoutSetSize) override; - - std::unique_ptr createPartitionSamplingFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/partition_sampling_bi_stratified_label_wise.hpp b/cpp/subprojects/common/include/common/sampling/partition_sampling_bi_stratified_label_wise.hpp deleted file mode 100644 index ce2390c6..00000000 --- a/cpp/subprojects/common/include/common/sampling/partition_sampling_bi_stratified_label_wise.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/partition_sampling.hpp" - -/** - * Defines an interface for all classes that allow to configure a method for partitioning the available training - * examples into a training set and a holdout set using stratification, such that for each label the proportion of - * relevant and irrelevant examples is maintained. - */ -class MLRLCOMMON_API ILabelWiseStratifiedBiPartitionSamplingConfig { - public: - - virtual ~ILabelWiseStratifiedBiPartitionSamplingConfig() {}; - - /** - * Returns the fraction of examples that are included in the holdout set. - * - * @return The fraction of examples that are included in the holdout set - */ - virtual float32 getHoldoutSetSize() const = 0; - - /** - * Sets the fraction of examples that should be included in the holdout set. - * - * @param holdoutSetSize The fraction of examples that should be included in the holdout set, e.g. a value of - * 0.6 corresponds to 60 % of the available examples. Must be in (0, 1) - * @return A reference to an object of type `ILabelWiseStratifiedBiPartitionSamplingConfig` - * that allows further configuration of the method for partitioning the available - * training examples into a training set and a holdout set - */ - virtual ILabelWiseStratifiedBiPartitionSamplingConfig& setHoldoutSetSize(float32 holdoutSetSize) = 0; -}; - -/** - * Allows to configure a method for partitioning the available training examples into a training set and a holdout set - * using stratification, such that for each label the proportion of relevant and irrelevant examples is maintained. - */ -class LabelWiseStratifiedBiPartitionSamplingConfig final : public IPartitionSamplingConfig, - public ILabelWiseStratifiedBiPartitionSamplingConfig { - private: - - float32 holdoutSetSize_; - - public: - - LabelWiseStratifiedBiPartitionSamplingConfig(); - - float32 getHoldoutSetSize() const override; - - ILabelWiseStratifiedBiPartitionSamplingConfig& setHoldoutSetSize(float32 holdoutSetSize) override; - - std::unique_ptr createPartitionSamplingFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/partition_sampling_no.hpp b/cpp/subprojects/common/include/common/sampling/partition_sampling_no.hpp deleted file mode 100644 index f391a86f..00000000 --- a/cpp/subprojects/common/include/common/sampling/partition_sampling_no.hpp +++ /dev/null @@ -1,16 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/partition_sampling.hpp" - -/** - * Allows to configure a method for partitioning the available training examples into a training set and a holdout set - * that does not split the training examples, but includes all of them in the training set. - */ -class NoPartitionSamplingConfig final : public IPartitionSamplingConfig { - public: - - std::unique_ptr createPartitionSamplingFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/partition_single.hpp b/cpp/subprojects/common/include/common/sampling/partition_single.hpp deleted file mode 100644 index 508436f3..00000000 --- a/cpp/subprojects/common/include/common/sampling/partition_single.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/iterator/index_iterator.hpp" -#include "common/sampling/partition.hpp" - -/** - * An implementation of the class `IPartition` that provides random access to the indices of elements that are included - * in a single set. - */ -class SinglePartition final : public IPartition { - private: - - const uint32 numElements_; - - public: - - /** - * @param numElements The number of elements to be included in the partition. Must be at least 1 - */ - SinglePartition(uint32 numElements); - - /** - * An iterator that provides read-only access to the indices of the elements that are included in the partition. - */ - typedef IndexIterator const_iterator; - - /** - * Returns a `const_iterator` to the beginning of the indices that are contained in the partition. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the indices that are contained in the partition. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the number of elements that are contained in the partition. - * - * @return The number of elements - */ - uint32 getNumElements() const; - - std::unique_ptr createStoppingCriterion(const IStoppingCriterionFactory& factory) override; - - std::unique_ptr createInstanceSampling(const IInstanceSamplingFactory& factory, - const IRowWiseLabelMatrix& labelMatrix, - IStatistics& statistics) override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, - const AbstractPrediction& head) override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, - AbstractPrediction& head) override; - - std::unique_ptr fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) override; - - std::unique_ptr fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/random.hpp b/cpp/subprojects/common/include/common/sampling/random.hpp deleted file mode 100644 index 27f3afe3..00000000 --- a/cpp/subprojects/common/include/common/sampling/random.hpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -/** - * Implements a fast random number generator using 32 bit XOR shifts (for details, see - * http://www.jstatsoft.org/v08/i14/paper). - */ -class RNG final { - private: - - uint32 randomState_; - - public: - - /** - * @param randomState The seed to be used by the random number generator - */ - RNG(uint32 randomState); - - /** - * Generates and returns a random number in [min, max). - * - * @param min The minimum number (inclusive) - * @param max The maximum number (exclusive) - * @return The random number that has been generated - */ - uint32 random(uint32 min, uint32 max); -}; diff --git a/cpp/subprojects/common/include/common/sampling/stratified_sampling_example_wise.hpp b/cpp/subprojects/common/include/common/sampling/stratified_sampling_example_wise.hpp deleted file mode 100644 index be372692..00000000 --- a/cpp/subprojects/common/include/common/sampling/stratified_sampling_example_wise.hpp +++ /dev/null @@ -1,65 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/weight_vector_bit.hpp" - -#include -#include -#include - -/** - * Implements stratified sampling, where distinct label vectors are treated as individual classes. - * - * @tparam LabelMatrix The type of the label matrix that provides random or row-wise access to the labels of the - * training examples - * @tparam IndexIterator The type of the iterator that provides access to the indices of the examples that should be - * considered - */ -template -class ExampleWiseStratification final { - private: - - const uint32 numTotal_; - - typedef typename LabelMatrix::view_type Key; - - typedef typename LabelMatrix::view_type::Hash Hash; - - typedef typename LabelMatrix::view_type::Pred Pred; - - std::unordered_map, Hash, Pred> map_; - - std::vector>> order_; - - public: - - /** - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides random or row-wise - * access to the labels of the training examples - * @param indicesBegin An iterator to the beginning of the indices of the examples that should be considered - * @param indicesEnd An iterator to the end of the indices of hte examples that should be considered - */ - ExampleWiseStratification(const LabelMatrix& labelMatrix, IndexIterator indicesBegin, IndexIterator indicesEnd); - - /** - * Randomly selects a stratified sample of the available examples and sets their weights to 1, while the - * remaining weights are set to 0. - * - * @param weightVector A reference to an object of type `BitWeightVector`, the weights should be written to - * @param sampleSize The fraction of the available examples to be selected - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be - * used - */ - void sampleWeights(BitWeightVector& weightVector, float32 sampleSize, RNG& rng) const; - - /** - * Randomly splits the available examples into two distinct sets and updates a given `BiPartition` accordingly. - * - * @param partition A reference to an object of type `BiPartition` to be updated - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - */ - void sampleBiPartition(BiPartition& partition, RNG& rng) const; -}; diff --git a/cpp/subprojects/common/include/common/sampling/stratified_sampling_label_wise.hpp b/cpp/subprojects/common/include/common/sampling/stratified_sampling_label_wise.hpp deleted file mode 100644 index ced20285..00000000 --- a/cpp/subprojects/common/include/common/sampling/stratified_sampling_label_wise.hpp +++ /dev/null @@ -1,63 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/weight_vector_bit.hpp" - -/** - * Implements iterative stratified sampling for selecting a subset of the available training examples as proposed in the - * following publication: - * - * Sechidis K., Tsoumakas G., Vlahavas I. (2011) On the Stratification of Multi-label Data. In: Machine Learning and - * Knowledge Discovery in Databases. ECML PKDD 2011. Lecture Notes in Computer Science, vol 6913. Springer. - * - * @tparam LabelMatrix The type of the label matrix that provides random or row-wise access to the labels of the - * training examples - * @tparam IndexIterator The type of the iterator that provides access to the indices of the examples that should be - * considered - */ -template -class LabelWiseStratification final { - private: - - const uint32 numRows_; - - uint32 numCols_; - - uint32* rowIndices_; - - uint32* colIndices_; - - public: - - /** - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides random or row-wise - * access to the labels of the training examples - * @param indicesBegin An iterator to the beginning of the indices of the examples that should be considered - * @param indicesEnd An iterator to the end of the indices of the examples that should be considered - */ - LabelWiseStratification(const LabelMatrix& labelMatrix, IndexIterator indicesBegin, IndexIterator indicesEnd); - - ~LabelWiseStratification(); - - /** - * Randomly selects a stratified sample of the available examples and sets their weights to 1, while the - * remaining weights are set to 0. - * - * @param weightVector A reference to an object of type `BitWeightVector`, the weights should be written to - * @param sampleSize The fraction of the available examples to be selected - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be - * used - */ - void sampleWeights(BitWeightVector& weightVector, float32 sampleSize, RNG& rng) const; - - /** - * Randomly splits the available examples into two distinct sets and updates a given `BiPartition` accordingly. - * - * @param partition A reference to an object of type `BiPartition` to be updated - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - */ - void sampleBiPartition(BiPartition& partition, RNG& rng) const; -}; diff --git a/cpp/subprojects/common/include/common/sampling/weight_sampling.hpp b/cpp/subprojects/common/include/common/sampling/weight_sampling.hpp deleted file mode 100644 index 7212b7b5..00000000 --- a/cpp/subprojects/common/include/common/sampling/weight_sampling.hpp +++ /dev/null @@ -1,109 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/weight_vector_bit.hpp" - -#include - -/** - * Randomly selects `numSamples` out of `numTotal` elements and sets their weights to 1, while the remaining weights are - * set to 0, by using a set to keep track of the elements that have already been selected. This method is suitable if - * `numSamples` is much smaller than `numTotal`. - * - * @tparam Iterator The type of the iterator that provides random access to the indices of the available elements to - * sample from - * @param weightVector A reference to an object of type `BitWeightVector` the weights should be written to - * @param iterator An iterator that provides random access to the indices of the available elements to sample from - * @param numTotal The total number of available elements to sample from - * @param numSamples The number of weights to be set to 1 - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - */ -template -static inline void sampleWeightsWithoutReplacementViaTrackingSelection(BitWeightVector& weightVector, Iterator iterator, - uint32 numTotal, uint32 numSamples, RNG& rng) { - weightVector.clear(); - std::unordered_set selectedIndices; - - for (uint32 i = 0; i < numSamples; i++) { - bool shouldContinue = true; - uint32 sampledIndex; - - while (shouldContinue) { - uint32 randomIndex = rng.random(0, numTotal); - sampledIndex = iterator[randomIndex]; - shouldContinue = !selectedIndices.insert(sampledIndex).second; - } - - weightVector.set(sampledIndex, true); - } - - weightVector.setNumNonZeroWeights(numSamples); -} - -/** - * Randomly selects `numSamples` out of `numTotal` elements and sets their weights to 1, while the remaining weights are - * set to 0, by using a pool, i.e., an array, to keep track of the elements that have not been selected yet. - * - * @tparam Iterator The type of the iterator that provides random access to the indices of the available elements to - * sample from - * @param weightVector A reference to an object of type `BitWeightVector` the weights should be written to - * @param iterator An iterator that provides random access to the indices of the available elements to sample from - * @param numTotal The total number of available elements to sample from - * @param numSamples The number of weights to be set to 1 - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - */ -template -static inline void sampleWeightsWithoutReplacementViaPool(BitWeightVector& weightVector, Iterator iterator, - uint32 numTotal, uint32 numSamples, RNG& rng) { - weightVector.clear(); - uint32* pool = new uint32[numTotal]; - - // Initialize pool... - for (uint32 i = 0; i < numTotal; i++) { - pool[i] = iterator[i]; - } - - for (uint32 i = 0; i < numSamples; i++) { - // Randomly select an index that has not been drawn yet... - uint32 randomIndex = rng.random(0, numTotal - i); - uint32 sampledIndex = pool[randomIndex]; - - // Set weight at the selected index to 1... - weightVector.set(sampledIndex, true); - - // Move the index at the border to the position of the recently drawn index... - pool[randomIndex] = pool[numTotal - i - 1]; - } - - delete[] pool; - weightVector.setNumNonZeroWeights(numSamples); -} - -/** - * Randomly selects `numSamples` out of `numTotal` elements and sets their weights to 1, while the remaining weights are - * set to 0. The method that is used internally is chosen automatically, depending on the ratio `numSamples / numTotal`. - * - * @tparam Iterator The type of the iterator that provides random access to the indices of the available elements to - * sample from - * @param weightVector A reference to an object of type `BitWeightVector` the weights should be written to - * @param iterator An iterator that provides random access to the indices of the available elements to sample from - * @param numTotal The total number of available elements to sample from - * @param numSamples The number of weights to be set to 1 - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - * - */ -template -static inline void sampleWeightsWithoutReplacement(BitWeightVector& weightVector, Iterator iterator, uint32 numTotal, - uint32 numSamples, RNG& rng) { - float64 ratio = numTotal > 0 ? ((float64) numSamples) / ((float64) numTotal) : 1; - - if (ratio < 0.06) { - // For very small ratios use tracking selection - sampleWeightsWithoutReplacementViaTrackingSelection(weightVector, iterator, numTotal, numSamples, rng); - } else { - // Otherwise, use a pool as the default method - sampleWeightsWithoutReplacementViaPool(weightVector, iterator, numTotal, numSamples, rng); - } -} diff --git a/cpp/subprojects/common/include/common/sampling/weight_vector.hpp b/cpp/subprojects/common/include/common/sampling/weight_vector.hpp deleted file mode 100644 index 0ec26408..00000000 --- a/cpp/subprojects/common/include/common/sampling/weight_vector.hpp +++ /dev/null @@ -1,38 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include - -// Forward declarations -class IThresholds; -class IThresholdsSubset; - -/** - * Defines an interface for one-dimensional vectors that provide access to weights. - */ -class IWeightVector { - public: - - virtual ~IWeightVector() {}; - - /** - * Returns whether the vector contains any zero weights or not. - * - * @return True, if the vector contains any zero weights, false otherwise - */ - virtual bool hasZeroWeights() const = 0; - - /** - * Creates and returns a new instance of type `IThresholdsSubset` that provides access to the statistics that - * correspond to individual training examples whose weights are stored in this vector. - * - * @param thresholds A reference to an object of type `IThresholds` that should be used to create the - * instance - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created - */ - virtual std::unique_ptr createThresholdsSubset(IThresholds& thresholds) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/sampling/weight_vector_bit.hpp b/cpp/subprojects/common/include/common/sampling/weight_vector_bit.hpp deleted file mode 100644 index 430341bc..00000000 --- a/cpp/subprojects/common/include/common/sampling/weight_vector_bit.hpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_bit.hpp" -#include "common/sampling/weight_vector.hpp" - -/** - * An one-dimensional vector that provides random access to a fixed number of binary weights stored in a `BitVector`. - */ -class BitWeightVector final : public IWeightVector { - private: - - BitVector vector_; - - uint32 numNonZeroWeights_; - - public: - - /** - * @param numElements The number of elements in the vector - */ - BitWeightVector(uint32 numElements); - - /** - * @param numElements The number of elements in the vector - * @param init True, if all elements in the vector should be value-initialized, false otherwise - */ - BitWeightVector(uint32 numElements, bool init); - - /** - * Returns the number of elements in the vector. - * - * @return The number of elements - */ - uint32 getNumElements() const; - - /** - * Returns the weight at a specific position. - * - * @param pos The position - * @return The weight at the specified position - */ - bool operator[](uint32 pos) const; - - /** - * Sets the weight at a specific position. - * - * @param pos The position - * @param weight The weight to be set - */ - void set(uint32 pos, bool weight); - - /** - * Sets all weights to zero. - */ - void clear(); - - /** - * Returns the number of non-zero weights. - * - * @return The number of non-zero weights - */ - uint32 getNumNonZeroWeights() const; - - /** - * Sets the number of non-zero weights. - * - * @param numNonZeroWeights The number of non-zero weights to be set - */ - void setNumNonZeroWeights(uint32 numNonZeroWeights); - - bool hasZeroWeights() const override; - - std::unique_ptr createThresholdsSubset(IThresholds& thresholds) const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/weight_vector_dense.hpp b/cpp/subprojects/common/include/common/sampling/weight_vector_dense.hpp deleted file mode 100644 index 6ba5fcdd..00000000 --- a/cpp/subprojects/common/include/common/sampling/weight_vector_dense.hpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/vector_dense.hpp" -#include "common/sampling/weight_vector.hpp" - -/** - * An one-dimensional vector that provides random access to a fixed number of weights stored in a C-contiguous array. - * - * @tparam T The type of the weights - */ -template -class DenseWeightVector final : public IWeightVector { - private: - - DenseVector vector_; - - uint32 numNonZeroWeights_; - - public: - - /** - * @param numElements The number of elements in the vector - */ - DenseWeightVector(uint32 numElements); - - /** - * @param numElements The number of elements in the vector - * @param init True, if all elements in the vector should be value-initialized, false otherwise - */ - DenseWeightVector(uint32 numElements, bool init); - - /** - * An iterator that provides access to the weights in the vector and allows to modify them. - */ - typedef typename DenseVector::iterator iterator; - - /** - * An iterator that provides read-only access to the weights in the vector. - */ - typedef typename DenseVector::const_iterator const_iterator; - - /** - * Returns an `iterator` to the beginning of the vector. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the vector. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns a `const_iterator` to the beginning of the vector. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the vector. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the number of elements in the vector. - * - * @return The number of elements - */ - uint32 getNumElements() const; - - /** - * Returns a const reference to the weight at a specific position. - * - * @param pos The position - * @return A const reference to the specified weight - */ - const T& operator[](uint32 pos) const; - - /** - * Returns a reference to the weight at a specific position. - * - * @param pos The position - * @return A reference to the specified weight - */ - T& operator[](uint32 pos); - - /** - * Returns the number of non-zero weights. - * - * @return The number of non-zero weights - */ - uint32 getNumNonZeroWeights() const; - - /** - * Sets the number of non-zero weights. - * - * @param numNonZeroWeights The number of non-zero weights to be set - */ - void setNumNonZeroWeights(uint32 numNonZeroWeights); - - bool hasZeroWeights() const override; - - std::unique_ptr createThresholdsSubset(IThresholds& thresholds) const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/weight_vector_equal.hpp b/cpp/subprojects/common/include/common/sampling/weight_vector_equal.hpp deleted file mode 100644 index d5a230ac..00000000 --- a/cpp/subprojects/common/include/common/sampling/weight_vector_equal.hpp +++ /dev/null @@ -1,48 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/weight_vector.hpp" - -/** - * An one-dimensional vector that provides random access to a fixed number of equal weights. - */ -class EqualWeightVector final : public IWeightVector { - private: - - const uint32 numElements_; - - public: - - /** - * @param numElements The number of elements in the vector - */ - EqualWeightVector(uint32 numElements); - - /** - * Returns the number of elements in the vector. - * - * @return The number of elements - */ - uint32 getNumElements() const; - - /** - * Returns the number of non-zero weights. - * - * @return The number of non-zero weights - */ - uint32 getNumNonZeroWeights() const; - - /** - * Returns the weight at a specific position. - * - * @param pos The position - * @return The weight at the specified position - */ - uint32 operator[](uint32 pos) const; - - bool hasZeroWeights() const override; - - std::unique_ptr createThresholdsSubset(IThresholds& thresholds) const override; -}; diff --git a/cpp/subprojects/common/include/common/sampling/weight_vector_out_of_sample.hpp b/cpp/subprojects/common/include/common/sampling/weight_vector_out_of_sample.hpp deleted file mode 100644 index dfebde75..00000000 --- a/cpp/subprojects/common/include/common/sampling/weight_vector_out_of_sample.hpp +++ /dev/null @@ -1,42 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -/** - * An one-dimensional vector that provides random access to a fixed number of weights that are obtained from another - * vector by setting zero weights to one and non-zero weights to zero. - * - * @tparam WeightVector The type of the other vector - */ -template -class OutOfSampleWeightVector final { - private: - - const WeightVector& vector_; - - public: - - /** - * @param vector A reference to an object of template type `WeightVector` that provides access to the original - * weights - */ - OutOfSampleWeightVector(const WeightVector& vector); - - /** - * Returns the number of elements in the vector. - * - * @return The number of elements - */ - uint32 getNumElements() const; - - /** - * Returns the weight at a specific position. - * - * @param pos The position - * @return The weight at the specified position - */ - bool operator[](uint32 pos) const; -}; diff --git a/cpp/subprojects/common/include/common/statistics/histogram.hpp b/cpp/subprojects/common/include/common/statistics/histogram.hpp deleted file mode 100644 index 7cae62cc..00000000 --- a/cpp/subprojects/common/include/common/statistics/histogram.hpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/statistics/statistics_weighted_immutable.hpp" - -/** - * Defines an interface for all classes that provide access to statistics that are organized as a histogram, i.e., where - * the statistics of multiple training examples are aggregated into the same bin. - */ -class IHistogram : virtual public IImmutableWeightedStatistics { - public: - - virtual ~IHistogram() override {}; - - /** - * Sets all statistics in the histogram to zero. - */ - virtual void clear() = 0; - - /** - * Returns the weight of the bin at a specific index, i.e., the number of statistics that have been assigned to - * it. - * - * @param binIndex The index of the bin - * @return The weight of the bin - */ - virtual uint32 getBinWeight(uint32 binIndex) const = 0; - - /** - * Adds the statistic at a specific index to the corresponding bin. - * - * @param statisticIndex The index of the statistic - */ - virtual void addToBin(uint32 statisticIndex) = 0; -}; diff --git a/cpp/subprojects/common/include/common/statistics/statistics.hpp b/cpp/subprojects/common/include/common/statistics/statistics.hpp deleted file mode 100644 index 6c19bb31..00000000 --- a/cpp/subprojects/common/include/common/statistics/statistics.hpp +++ /dev/null @@ -1,278 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - */ -#pragma once - -#include "common/rule_refinement/prediction_complete.hpp" -#include "common/rule_refinement/prediction_partial.hpp" -#include "common/sampling/weight_vector_bit.hpp" -#include "common/sampling/weight_vector_dense.hpp" -#include "common/sampling/weight_vector_equal.hpp" -#include "common/sampling/weight_vector_out_of_sample.hpp" -#include "common/statistics/statistics_weighted.hpp" - -/** - * Defines an interface for all classes that provide access to statistics about the labels of the training examples, - * which serve as the basis for learning a new rule or refining an existing one. - */ -class IStatistics { - public: - - virtual ~IStatistics() {}; - - /** - * Returns the number of available statistics. - * - * @return The number of statistics - */ - virtual uint32 getNumStatistics() const = 0; - - /** - * Returns the number of available labels. - * - * @return The number of labels - */ - virtual uint32 getNumLabels() const = 0; - - /** - * Updates a specific statistic based on the prediction of a rule that predicts for all available labels. - * - * This function must be called for each statistic that is covered by the new rule before learning the next - * rule. - * - * @param statisticIndex The index of the statistic to be updated - * @param prediction A reference to an object of type `CompletePrediction` that stores the scores that - * are predicted by the rule - */ - virtual void applyPrediction(uint32 statisticIndex, const CompletePrediction& prediction) = 0; - - /** - * Updates a specific statistic based on the prediction of a rule that predicts for a subset of the available - * labels. - * - * This function must be called for each statistic that is covered by the new rule before learning the next - * rule. - * - * @param statisticIndex The index of the statistic to be updated - * @param prediction A reference to an object of type `PartialPrediction` that stores the scores that are - * predicted by the rule - */ - virtual void applyPrediction(uint32 statisticIndex, const PartialPrediction& prediction) = 0; - - /** - * Reverts a specific statistic that has previously been updated via the function `applyPrediction` based on the - * prediction of a rule that predicts for all available labels. - * - * @param statisticIndex The index of the statistic to be reverted - * @param prediction A reference to an object of type `CompletePrediction` that stores the scores that - * are predicted by the rule - */ - virtual void revertPrediction(uint32 statisticIndex, const CompletePrediction& prediction) = 0; - - /** - * Reverts a specific statistic that has previously been updated via the function `applyPrediction` based on the - * prediction of a rule that predicts for a subset of the available labels. - * - * @param statisticIndex The index of the statistic to be reverted - * @param prediction A reference to an object of type `PartialPrediction` that stores the scores that are - * predicted by the rule - */ - virtual void revertPrediction(uint32 statisticIndex, const PartialPrediction& prediction) = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of the current predictions for a specific - * statistic. - * - * @param statisticIndex The index of the statistic for which the predictions should be evaluated - * @return The numerical score that has been calculated - */ - virtual float64 evaluatePrediction(uint32 statisticIndex) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `CompleteIndexVector`. - * - * @param labelIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `EqualWeightVector` that provides access to the weights - * of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset(const CompleteIndexVector& labelIndices, - const EqualWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `PartialIndexVector`. - * - * @param labelIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `EqualWeightVector` that provides access to the weights - * of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset(const PartialIndexVector& labelIndices, - const EqualWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `CompleteIndexVector`. - * - * @param labelIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `BitWeightVector` that provides access to the weights - * of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset(const CompleteIndexVector& labelIndices, - const BitWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `PartialIndexVector`. - * - * @param labelIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `BitWeightVector` that provides access to the weights - * of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset(const PartialIndexVector& labelIndices, - const BitWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `CompleteIndexVector`. - * - * @param labelIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `DenseWeightVector` that provides access to the - * weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset(const CompleteIndexVector& labelIndices, - const DenseWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `PartialIndexVector`. - * - * @param labelIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `DenseWeightVector` that provides access to the - * weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset(const PartialIndexVector& labelIndices, - const DenseWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `CompleteIndexVector`. - * - * @param labelIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `OutOfSampleWeightVector` that - * provides access to the weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, const OutOfSampleWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `PartialIndexVector`. - * - * @param labelIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `OutOfSampleWeightVector` that - * provides access to the weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, const OutOfSampleWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `CompleteIndexVector`. - * - * @param labelIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `OutOfSampleWeightVector` that - * provides access to the weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, const OutOfSampleWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `PartialIndexVector`. - * - * @param labelIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `OutOfSampleWeightVector` that - * provides access to the weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, const OutOfSampleWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `CompleteIndexVector`. - * - * @param labelIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `OutOfSampleWeightVector>` - * that provides access to the weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices, - const OutOfSampleWeightVector>& weights) const = 0; - - /** - * Creates and returns a new object of type `IStatisticsSubset` that includes only those labels, whose indices - * are provided by a specific `PartialIndexVector`. - * - * @param labelIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @param weights A reference to an object of type `OutOfSampleWeightVector>` - * that provides access to the weights of individual training examples - * @return An unique pointer to an object of type `IStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset( - const PartialIndexVector& labelIndices, - const OutOfSampleWeightVector>& weights) const = 0; - - /** - * Creates and returns a new object of type `IWeightedStatistics`. - * - * @param weights A reference to an object of type `EqualWeightVector` that provides access to the weights of - * individual training examples - * @return An unique pointer to an object of type `IWeightedStatistics` that has been created - */ - virtual std::unique_ptr createWeightedStatistics( - const EqualWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IWeightedStatistics`. - * - * @param weights A reference to an object of type `BitWeightVector` that provides access to the weights of - * individual training examples - * @return An unique pointer to an object of type `IWeightedStatistics` that has been created - */ - virtual std::unique_ptr createWeightedStatistics(const BitWeightVector& weights) const = 0; - - /** - * Creates and returns a new object of type `IWeightedStatistics`. - * - * @param weights A reference to an object of type `DenseWeightVector` that provides access to the - * weights of individual training examples - * @return An unique pointer to an object of type `IWeightedStatistics` that has been created - */ - virtual std::unique_ptr createWeightedStatistics( - const DenseWeightVector& weights) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/statistics/statistics_provider.hpp b/cpp/subprojects/common/include/common/statistics/statistics_provider.hpp deleted file mode 100644 index 8e1331b9..00000000 --- a/cpp/subprojects/common/include/common/statistics/statistics_provider.hpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/view_c_contiguous.hpp" -#include "common/data/view_csr_binary.hpp" -#include "common/statistics/statistics.hpp" - -/** - * Provides access to an object of type `IStatistics`. - */ -class IStatisticsProvider { - public: - - virtual ~IStatisticsProvider() {}; - - /** - * Returns an object of type `IStatistics`. - * - * @return A reference to an object of type `IStatistics` - */ - virtual IStatistics& get() const = 0; - - /** - * Switches the implementation that is used for calculating the predictions of rules, as well their overall - * quality, to the one that should be used for learning regular rules. - */ - virtual void switchToRegularRuleEvaluation() = 0; - - /** - * Switches the implementation that is used for calculating the predictions of rules, as well as their overall - * quality, to the one that should be used for pruning rules. - */ - virtual void switchToPruningRuleEvaluation() = 0; -}; - -/** - * Defines an interface for all classes that allow to create instances of the class `IStatisticsProvider`. - */ -class IStatisticsProviderFactory { - public: - - virtual ~IStatisticsProviderFactory() {}; - - /** - * Creates and returns a new instance of the class `IStatisticsProvider`, based on a label matrix that provides - * random access to the labels of the training examples. - * - * @param labelMatrix A reference to an object of type `CContiguousConstView` that provides random access to - * the labels of the training examples - * @return An unique pointer to an object of type `IStatisticsProvider` that has been created - */ - virtual std::unique_ptr create( - const CContiguousConstView& labelMatrix) const = 0; - - /** - * Creates and returns a new instance of the class `IStatisticsProvider`, based on a sparse label matrix that - * provides row-wise access to the labels of the training examples. - * - * @param labelMatrix A reference to an object of type `BinaryCsrConstView` that provides row-wise access to - * the labels of the training examples - * @return An unique pointer to an object of type `IStatisticsProvider` that has been created - */ - virtual std::unique_ptr create(const BinaryCsrConstView& labelMatrix) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/statistics/statistics_subset.hpp b/cpp/subprojects/common/include/common/statistics/statistics_subset.hpp deleted file mode 100644 index 3a8bb711..00000000 --- a/cpp/subprojects/common/include/common/statistics/statistics_subset.hpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/rule_evaluation/score_vector.hpp" - -/** - * Defines an interface for all classes that provide access to a subset of the statistics that are stored by an instance - * of the class `IStatistics` and allows to calculate the scores to be predicted by rules that cover such a subset. - */ -class IStatisticsSubset { - public: - - virtual ~IStatisticsSubset() {}; - - /** - * Returns whether the statistics at a specific index have a non-zero weight or not. - * - * @return True, if the statistics at the given index have a non-zero weight, false otherwise - */ - virtual bool hasNonZeroWeight(uint32 statisticIndex) const = 0; - - /** - * Adds the statistics at a specific index to the subset in order to mark it as covered by the condition that is - * currently considered for refining a rule. - * - * This function must be called repeatedly for each statistic that is covered by the current condition, - * immediately after the invocation of the function `IImmutableWeightedStatistics::createSubset`. If a rule has - * already been refined, each of these statistics must have been marked as covered earlier via the function - * `IWeightedStatistics::addCoveredStatistic` and must not have been marked as uncovered via the function - * `IWeightedStatistics::removeCoveredStatistic`. - * - * This function is supposed to update any internal state of the subset that relates to the statistics that are - * covered by the current condition, i.e., to compute and store local information that is required by the other - * functions that will be called later. Any information computed by this function is expected to be reset when - * invoking the function `resetSubset` for the next time. - * - * @param statisticIndex The index of the covered statistic - */ - virtual void addToSubset(uint32 statisticIndex) = 0; - - /** - * Calculates and returns the scores to be predicted by a rule that covers all statistics that have been added - * to the subset via the function `addToSubset`, as well as a numerical score that assesses the overall quality - * of the predicted scores. - * - * @return A reference to an object of type `IScoreVector` that stores the scores to be predicted by the rule - * for each considered label, as well as a numerical score that assesses their overall quality - */ - virtual const IScoreVector& calculateScores() = 0; -}; diff --git a/cpp/subprojects/common/include/common/statistics/statistics_subset_weighted.hpp b/cpp/subprojects/common/include/common/statistics/statistics_subset_weighted.hpp deleted file mode 100644 index a0b2d629..00000000 --- a/cpp/subprojects/common/include/common/statistics/statistics_subset_weighted.hpp +++ /dev/null @@ -1,79 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/statistics/statistics_subset.hpp" - -/** - * Defines an interface for all classes that provide access to a subset of the weighted statistics that are stored by an - * instance of the class `IWeightedStatistics` and allows to calculate the scores to be predicted by rules that cover - * such a subset. - */ -class IWeightedStatisticsSubset : virtual public IStatisticsSubset { - public: - - virtual ~IWeightedStatisticsSubset() override {}; - - /** - * Marks the statistics at a specific index as missing, i.e., no condition that will be considered in the - * following for refining a rule will be able to cover it and consequently the function `addToSubset` will never - * be called for the given `statisticIndex`. - * - * @param statisticIndex The index of the missing statistic - */ - virtual void addToMissing(uint32 statisticIndex) = 0; - - /** - * Resets the subset by removing all statistics that have been added via preceding calls to the function - * `addToSubset`. - * - * This function is supposed to reset the internal state of the subset to the state when the subset was created - * via the function `IImmutableWeightedStatistics::createSubset`. When calling this function, the current state - * must not be purged entirely, but it must be cached and made available for use by the functions - * `evaluateAccumulated` and `evaluateUncoveredAccumulated`. - * - * This function may be invoked multiple times with one or several calls to `addToSubset` in between, which is - * supposed to update the previously cached state by accumulating the current one, i.e., the accumulated cached - * state should be the same as if `resetSubset` would not have been called at all. - */ - virtual void resetSubset() = 0; - - /** - * Calculates and returns the scores to be predicted by a rule that covers all statistics that have been added - * to the subset via the function `addToSubset`, as well as a numerical score that assesses the quality of the - * predicted scores. All statistics that have been added since the subset was created via the function - * `IImmutableWeightedStatistics::createSubset` are taken into account even if the function `resetSubset` was - * called since then. - * - * @return A reference to an object of type `IScoreVector` that stores the scores to be predicted by the rule - * for each considered label, as well as a numerical score that assesses their overall quality - */ - virtual const IScoreVector& calculateScoresAccumulated() = 0; - - /** - * Calculates and returns the scores to be predicted by a rule that covers all statistics that correspond to the - * difference between the statistics that have been added to the subset via the function `addToSubset` and those - * that have been marked as covered via the function `IWeightedStatistics::addCoveredStatistic` or - * `IWeightedStatistics::removeCoveredStatistic`, as well as a numerical score that assesses the quality of the - * predicted scores. - * - * @return A reference to an object of type `IScoreVector` that stores the scores to be predicted by the rule - * for each considered label, as well as a numerical score that assesses their overall quality - */ - virtual const IScoreVector& calculateScoresUncovered() = 0; - - /** - * Calculates and returns the scores to be predicted by a rule that covers all statistics that correspond to the - * difference between the statistics that have been added to the subset via the function `addToSubset` and those - * that have been marked as covered via the function `IWeightedStatistics::addCoveredStatistic` or - * `IWeightedStatistics::removeCoveredStatistic`, as well as a numerical score that assesses the quality of the - * predicted scores. All statistics that have been added since the subset was created via the function - * `IImmutableWeightedStatistics::createSubset` are taken into account even if the function `resetSubset` was - * called since then. - * - * @return A reference to an object of type `IScoreVector` that stores the scores to be predicted by the rule - * for each considered label, as well as a numerical score that assesses their overall quality - */ - virtual const IScoreVector& calculateScoresUncoveredAccumulated() = 0; -}; diff --git a/cpp/subprojects/common/include/common/statistics/statistics_weighted.hpp b/cpp/subprojects/common/include/common/statistics/statistics_weighted.hpp deleted file mode 100644 index 51141656..00000000 --- a/cpp/subprojects/common/include/common/statistics/statistics_weighted.hpp +++ /dev/null @@ -1,91 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/bin_index_vector_dense.hpp" -#include "common/binning/bin_index_vector_dok.hpp" -#include "common/statistics/histogram.hpp" -#include "common/statistics/statistics_weighted_immutable.hpp" - -/** - * Defines an interface for all classes that inherit from `IImmutableWeightedStatistics`, but do also provide functions - * that allow to only use a sub-sample of the available statistics. - */ -class IWeightedStatistics : virtual public IImmutableWeightedStatistics { - public: - - virtual ~IWeightedStatistics() override {}; - - /** - * Creates and returns a copy of this object. - * - * @return An unique pointer to an object of type `IWeightedStatistics` that has been created - */ - virtual std::unique_ptr copy() const = 0; - - /** - * Resets the statistics which should be considered in the following for refining an existing rule. The indices - * of the respective statistics must be provided via subsequent calls to the function `addCoveredStatistic`. - * - * This function must be invoked each time an existing rule has been refined, i.e., when a new condition has - * been added to its body, because this results in a subset of the statistics being covered by the refined rule. - * - * This function is supposed to reset any non-global internal state that only holds for a certain subset of the - * available statistics and therefore becomes invalid when a different subset of the statistics should be used. - */ - virtual void resetCoveredStatistics() = 0; - - /** - * Adds a specific statistic to the subset that is covered by an existing rule and therefore should be - * considered in the following for refining an existing rule. - * - * This function must be called repeatedly for each statistic that is covered by the existing rule, immediately - * after the invocation of the function `resetCoveredStatistics`. - * - * This function is supposed to update any internal state that relates to the considered statistics, i.e., to - * compute and store local information that is required by the other functions that will be called later. Any - * information computed by this function is expected to be reset when invoking the function - * `resetCoveredStatistics` for the next time. - * - * @param statisticIndex The index of the statistic that should be added - */ - virtual void addCoveredStatistic(uint32 statisticIndex) = 0; - - /** - * Removes a specific statistic from the subset that is covered by an existing rule and therefore should not be - * considered in the following for refining an existing rule. - * - * This function must be called repeatedly for each statistic that is not covered anymore by the existing rule. - * - * This function is supposed to update any internal state that relates to the considered statistics, i.e., to - * compute and store local information that is required by the other functions that will be called later. Any - * information computed by this function is expected to be reset when invoking the function - * `resetCoveredStatistics` for the next time. - * - * @param statisticIndex The index of the statistic that should be removed - */ - virtual void removeCoveredStatistic(uint32 statisticIndex) = 0; - - /** - * Creates and returns a new histogram based on the statistics. - * - * @param binIndexVector A reference to an object of type `DenseBinIndexVector` that stores the indices of - * the bins, individual examples have been assigned to - * @param numBins The number of bins in the histogram - * @return An unique pointer to an object of type `IHistogram` that has been created - */ - virtual std::unique_ptr createHistogram(const DenseBinIndexVector& binIndexVector, - uint32 numBins) const = 0; - - /** - * Creates and returns a new histogram based on the statistics. - * - * @param binIndexVector A reference to an object of type `DokBinIndexVector` that stores the indices of the - * bins, individual examples have been assigned to - * @param numBins The number of bins in the histogram - * @return An unique pointer to an object of type `IHistogram` that has been created - */ - virtual std::unique_ptr createHistogram(const DokBinIndexVector& binIndexVector, - uint32 numBins) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/statistics/statistics_weighted_immutable.hpp b/cpp/subprojects/common/include/common/statistics/statistics_weighted_immutable.hpp deleted file mode 100644 index d18eca37..00000000 --- a/cpp/subprojects/common/include/common/statistics/statistics_weighted_immutable.hpp +++ /dev/null @@ -1,56 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" -#include "common/statistics/statistics_subset_weighted.hpp" - -#include - -/** - * Defines an interface for all classes that provide access to weighted statistics about the labels of the training - * examples, which serve as the basis for learning a new rule or refining an existing one. - */ -class IImmutableWeightedStatistics { - public: - - virtual ~IImmutableWeightedStatistics() {}; - - /** - * Returns the number of available statistics. - * - * @return The number of statistics - */ - virtual uint32 getNumStatistics() const = 0; - - /** - * Returns the number of available labels. - * - * @return The number of labels - */ - virtual uint32 getNumLabels() const = 0; - - /** - * Creates and returns a new object of type `IWeightedStatisticsSubset` that includes only those labels, whose - * indices are provided by a specific `CompleteIndexVector`. - * - * @param labelIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @return An unique pointer to an object of type `IWeightedStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset( - const CompleteIndexVector& labelIndices) const = 0; - - /** - * Creates and returns a new object of type `IWeightedStatisticsSubset` that includes only those labels, whose - * indices are provided by a specific `PartialIndexVector`. - * - * @param labelIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices of the labels that should be included in the subset - * @return An unique pointer to an object of type `IWeightedStatisticsSubset` that has been created - */ - virtual std::unique_ptr createSubset( - const PartialIndexVector& labelIndices) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/stopping/aggregation_function.hpp b/cpp/subprojects/common/include/common/stopping/aggregation_function.hpp deleted file mode 100644 index 51d087db..00000000 --- a/cpp/subprojects/common/include/common/stopping/aggregation_function.hpp +++ /dev/null @@ -1,27 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" -#include "common/macros.hpp" - -/** - * Specifies different types of aggregation functions that allow to aggregate the values that are stored in a buffer. - */ -enum MLRLCOMMON_API AggregationFunction : uint8 { - /** - * An aggregation function that finds the minimum value in a buffer. - */ - MIN = 0, - - /** - * An aggregation function that finds the maximum value in a buffer. - */ - MAX = 1, - - /** - * An aggregation function that calculates the arithmetic mean of the values in a buffer. - */ - ARITHMETIC_MEAN = 2 -}; diff --git a/cpp/subprojects/common/include/common/stopping/global_pruning.hpp b/cpp/subprojects/common/include/common/stopping/global_pruning.hpp deleted file mode 100644 index 581b45dd..00000000 --- a/cpp/subprojects/common/include/common/stopping/global_pruning.hpp +++ /dev/null @@ -1,30 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/stopping/stopping_criterion.hpp" - -/** - * Defines an interface for all classes that allow to configure a stopping criterion that allows to decide how many - * rules should be included in a model, such that its performance is optimized globally. - */ -class IGlobalPruningConfig : public IStoppingCriterionConfig { - public: - - virtual ~IGlobalPruningConfig() override {}; - - /** - * Returns whether a holdout set should be used, if available, or not. - * - * @return True, if a holdout set should be used, false otherwise - */ - virtual bool shouldUseHoldoutSet() const = 0; - - /** - * Returns whether unused rules should be removed from the final model or not. - * - * @return True, if unused rules should be removed, false otherwise - */ - virtual bool shouldRemoveUnusedRules() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/stopping/global_pruning_post.hpp b/cpp/subprojects/common/include/common/stopping/global_pruning_post.hpp deleted file mode 100644 index 0dee6c1c..00000000 --- a/cpp/subprojects/common/include/common/stopping/global_pruning_post.hpp +++ /dev/null @@ -1,141 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/stopping/global_pruning.hpp" - -/** - * Defines an interface for all classes that allow to configure a stopping criterion that keeps track of the number of - * rules in a model that perform best with respect to the examples in the training or holdout set according to a certain - * measure. - * - * This stopping criterion assesses the performance of the current model after every `interval` rules and stores and - * checks whether the current model is the best one evaluated so far. - */ -class MLRLCOMMON_API IPostPruningConfig { - public: - - virtual ~IPostPruningConfig() {}; - - /** - * Returns whether the quality of the current model's predictions is measured on the holdout set, if available, - * or if the training set is used instead. - * - * @return True, if the quality of the current model's predictions is measured on the holdout set, if available, - * false, if the training set is used instead - */ - virtual bool isHoldoutSetUsed() const = 0; - - /** - * Sets whether the quality of the current model's predictions should be measured on the holdout set, if - * available, or if the training set should be used instead. - * - * @param useHoldoutSet True, if the quality of the current model's predictions should be measured on the - * holdout set, if available, false, if the training set should be used instead - * @return A reference to an object of type `IPostPruningConfig` that allows further configuration - * of the stopping criterion - */ - virtual IPostPruningConfig& setUseHoldoutSet(bool useHoldoutSet) = 0; - - /** - * Returns whether rules that have been induced, but are not used, should be removed from the final model or - * not. - * - * @return True, if unused rules should be removed from the model, false otherwise - */ - virtual bool isRemoveUnusedRules() const = 0; - - /** - * Sets whether rules that have been induced, but are not used, should be removed from the final model or not. - * - * @param removeUnusedRules True, if unused rules should be removed from the model, false otherwise - * @return A reference to an object of type `IPostPruningConfig` that allows further - * configuration of the stopping criterion - */ - virtual IPostPruningConfig& setRemoveUnusedRules(bool removeUnusedRules) = 0; - - /** - * Returns the minimum number of rules that must be included in a model. - * - * @return The minimum number of rules that must be included in a model - */ - virtual uint32 getMinRules() const = 0; - - /** - * Sets the minimum number of rules that must be included in a model. - * - * @param minRules The minimum number of rules that must be included in a model. Must be at least 1 - * @return A reference to an object of type `IPostPruningConfig` that allows further configuration of - * the stopping criterion - */ - virtual IPostPruningConfig& setMinRules(uint32 minRules) = 0; - - /** - * Returns the interval that is used to check whether the current model is the best one evaluated so far. - * - * @return The interval that is used to check whether the current model is the best one evaluated so far - */ - virtual uint32 getInterval() const = 0; - - /** - * Sets the interval that should be used to check whether the current model is the best one evaluated so far. - * - * @param interval The interval that should be used to check whether the current model is the best one - * evaluated so far, e.g., a value of 10 means that the best model may include 10, 20, ... - * rules - * @return A reference to an object of type `IPostPruningConfig` that allows further configuration of - * the stopping criterion - */ - virtual IPostPruningConfig& setInterval(uint32 interval) = 0; -}; - -/** - * Allows to configure a stopping criterion the keeps track of the number of rules in a model that perform best with - * respect to the examples in the training or holdout set according to a certain measure. - */ -class PostPruningConfig final : public IGlobalPruningConfig, - public IPostPruningConfig { - private: - - bool useHoldoutSet_; - - bool removeUnusedRules_; - - uint32 minRules_; - - uint32 interval_; - - public: - - PostPruningConfig(); - - bool isHoldoutSetUsed() const override; - - IPostPruningConfig& setUseHoldoutSet(bool useHoldoutSet) override; - - bool isRemoveUnusedRules() const override; - - IPostPruningConfig& setRemoveUnusedRules(bool removeUnusedRules) override; - - uint32 getMinRules() const override; - - IPostPruningConfig& setMinRules(uint32 minRules) override; - - uint32 getInterval() const override; - - IPostPruningConfig& setInterval(uint32 interval) override; - - std::unique_ptr createStoppingCriterionFactory() const override; - - /** - * @see `IGlobalPruningConfig::shouldUseHoldoutSet` - */ - bool shouldUseHoldoutSet() const override; - - /** - * @see `IGlobalPruningConfig::shouldRemoveUnusedRules` - */ - bool shouldRemoveUnusedRules() const override; -}; diff --git a/cpp/subprojects/common/include/common/stopping/global_pruning_pre.hpp b/cpp/subprojects/common/include/common/stopping/global_pruning_pre.hpp deleted file mode 100644 index 974130a6..00000000 --- a/cpp/subprojects/common/include/common/stopping/global_pruning_pre.hpp +++ /dev/null @@ -1,269 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/stopping/aggregation_function.hpp" -#include "common/stopping/global_pruning.hpp" - -/** - * Defines an interface for all classes that allow to configure a stopping criterion that stops the induction of rules - * as soon as the quality of a model's predictions for the examples in the training or holdout set do not improve - * according to a certain measure. - * - * This stopping criterion assesses the performance of the current model after every `updateInterval` rules and stores - * its quality in a buffer that keeps track of the last `numCurrent` iterations. If the capacity of this buffer is - * already reached, the oldest quality is passed to a buffer of size `numPast`. Every `stopInterval` rules, it is - * decided whether the rule induction should be stopped. For this reason, the `numCurrent` qualities in the first - * buffer, as well as the `numPast` qualities in the second buffer are aggregated according to a certain - * `aggregationFunction`. If the percentage improvement, which results from comparing the more recent qualities from the - * first buffer to the older qualities from the second buffer, is greater than a certain `minImprovement`, the rule - * induction is continued, otherwise it is stopped. - */ -class MLRLCOMMON_API IPrePruningConfig { - public: - - virtual ~IPrePruningConfig() {}; - - /** - * Returns the type of the aggregation function that is used to aggregate the values that are stored in a - * buffer. - * - * @return A value of the enum `AggregationFunction` that specifies the type of the aggregation function that is - * used to aggregate the values that are stored in a buffer - */ - virtual AggregationFunction getAggregationFunction() const = 0; - - /** - * Sets the type of the aggregation function that should be used to aggregate the values that are stored in a - * buffer. - * - * @param aggregationFunction A value of the enum `AggregationFunction` that specifies the type of the - * aggregation function that should be used to aggregate the values that are stored - * in a buffer - * @return A reference to an object of type `IPrePruningConfig` that allows further - * configuration of the stopping criterion - */ - virtual IPrePruningConfig& setAggregationFunction(AggregationFunction aggregationFunction) = 0; - - /** - * Returns whether the quality of the current model's predictions is measured on the holdout set, if available, - * or if the training set is used instead. - * - * @return True, if the quality of the current model's predictions is measured on the holdout set, if available, - * false, if the training set is used instead - */ - virtual bool isHoldoutSetUsed() const = 0; - - /** - * Sets whether the quality of the current model's predictions should be measured on the holdout set, if - * available, or if the training set should be used instead. - * - * @param useHoldoutSet True, if the quality of the current model's predictions should be measured on the - * holdout set, if available, false, if the training set should be used instead - * @return A reference to an object of type `IPrePruningConfig` that allows further configuration - * of the stopping criterion - */ - virtual IPrePruningConfig& setUseHoldoutSet(bool useHoldoutSet) = 0; - - /** - * Returns whether rules that have been induced, but are not used, should be removed from the final model or - * not. - * - * @return True, if unused rules should be removed from the model, false otherwise - */ - virtual bool isRemoveUnusedRules() const = 0; - - /** - * Sets whether rules that have been induced, but are not used, should be removed from the final model or not. - * - * @param removeUnusedRules True, if unused rules should be removed from the model, false otherwise - * @return A reference to an object of type `IPrePruningConfig` that allows further - * configuration of the stopping criterion - */ - virtual IPrePruningConfig& setRemoveUnusedRules(bool removeUnusedRules) = 0; - - /** - * Returns the minimum number of rules that must have been learned until the induction of rules might be - * stopped. - * - * @return The minimum number of rules that must have been learned until the induction of rules might be stopped - */ - virtual uint32 getMinRules() const = 0; - - /** - * Sets the minimum number of rules that must have been learned until the induction of rules might be stopped. - * - * @param minRules The minimum number of rules that must have been learned until the induction of rules might - * be stopped. Must be at least 1 - * @return A reference to an object of type `IPrePruningConfig` that allows further configuration of - * the stopping criterion - */ - virtual IPrePruningConfig& setMinRules(uint32 minRules) = 0; - - /** - * Returns the interval that is used to update the quality of the current model. - * - * @return The interval that is used to update the quality of the current model - */ - virtual uint32 getUpdateInterval() const = 0; - - /** - * Sets the interval that should be used to update the quality of the current model. - * - * @param updateInterval The interval that should be used to update the quality of the current model, e.g., a - * value of 5 means that the model quality is assessed every 5 rules. Must be at least - * 1 - * @return A reference to an object of type `IPrePruningConfig` that allows further - * configuration of the stopping criterion - */ - virtual IPrePruningConfig& setUpdateInterval(uint32 updateInterval) = 0; - - /** - * Returns the interval that is used to decide whether the induction of rules should be stopped. - * - * @return The interval that is used to decide whether the induction of rules should be stopped - */ - virtual uint32 getStopInterval() const = 0; - - /** - * Sets the interval that should be used to decide whether the induction of rules should be stopped. - * - * @param stopInterval The interval that should be used to decide whether the induction of rules should be - * stopped, e.g., a value of 10 means that the rule induction might be stopped after 10, - * 20, ... rules. Must be a multiple of the update interval - * @return A reference to an object of type `IPrePruningConfig` that allows further configuration - * of the stopping criterion - */ - virtual IPrePruningConfig& setStopInterval(uint32 stopInterval) = 0; - - /** - * Returns the number of quality stores of past iterations that are stored in a buffer. - * - * @return The number of quality stores of past iterations that are stored in a buffer - */ - virtual uint32 getNumPast() const = 0; - - /** - * Sets the number of past iterations that should be stored in a buffer. - * - * @param numPast The number of past iterations that should be be stored in a buffer. Must be at least 1 - * @return A reference to an object of type `IPrePruningConfig` that allows further configuration of - * the stopping criterion - */ - virtual IPrePruningConfig& setNumPast(uint32 numPast) = 0; - - /** - * Returns the number of the most recent iterations that are stored in a buffer. - * - * @return The number of the most recent iterations that are stored in a buffer - */ - virtual uint32 getNumCurrent() const = 0; - - /** - * Sets the number of the most recent iterations that should be stored in a buffer. - * - * @param numCurrent The number of the most recent iterations that should be stored in a buffer. Must be at - * least 1 - * @return A reference to an object of type `IPrePruningConfig` that allows further configuration - * of the stopping criterion - */ - virtual IPrePruningConfig& setNumCurrent(uint32 numCurrent) = 0; - - /** - * Returns the minimum improvement that must be reached for the rule induction to be continued. - * - * @return The minimum improvement that must be reached for the rule induction to be continued - */ - virtual float64 getMinImprovement() const = 0; - - /** - * Sets the minimum improvement that must be reached for the rule induction to be continued. - * - * @param minImprovement The minimum improvement in percent that must be reached for the rule induction to be - * continued. Must be in [0, 1] - * @return A reference to an object of type `IPrePruningConfig` that allows further - * configuration of the stopping criterion - */ - virtual IPrePruningConfig& setMinImprovement(float64 minImprovement) = 0; -}; - -/** - * Allows to configure a stopping criterion that stops the induction of rules as soon as the quality of a model's - * predictions for the examples in the training or holdout set do not improve according to a certain measure. - */ -class PrePruningConfig final : public IGlobalPruningConfig, - public IPrePruningConfig { - private: - - AggregationFunction aggregationFunction_; - - bool useHoldoutSet_; - - bool removeUnusedRules_; - - uint32 minRules_; - - uint32 updateInterval_; - - uint32 stopInterval_; - - uint32 numPast_; - - uint32 numCurrent_; - - float64 minImprovement_; - - public: - - PrePruningConfig(); - - AggregationFunction getAggregationFunction() const override; - - IPrePruningConfig& setAggregationFunction(AggregationFunction aggregationFunction) override; - - bool isHoldoutSetUsed() const override; - - IPrePruningConfig& setUseHoldoutSet(bool useHoldoutSet) override; - - bool isRemoveUnusedRules() const override; - - IPrePruningConfig& setRemoveUnusedRules(bool removeUnusedRules) override; - - uint32 getMinRules() const override; - - IPrePruningConfig& setMinRules(uint32 minRules) override; - - uint32 getUpdateInterval() const override; - - IPrePruningConfig& setUpdateInterval(uint32 updateInterval) override; - - uint32 getStopInterval() const override; - - IPrePruningConfig& setStopInterval(uint32 stopInterval) override; - - uint32 getNumPast() const override; - - IPrePruningConfig& setNumPast(uint32 numPast) override; - - uint32 getNumCurrent() const override; - - IPrePruningConfig& setNumCurrent(uint32 numCurrent) override; - - float64 getMinImprovement() const override; - - IPrePruningConfig& setMinImprovement(float64 minImprovement) override; - - std::unique_ptr createStoppingCriterionFactory() const override; - - /** - * @see `IGlobalPruningConfig::shouldUseHoldoutSet` - */ - bool shouldUseHoldoutSet() const override; - - /** - * @see `IGlobalPruningConfig::shouldRemoveUnusedRules` - */ - bool shouldRemoveUnusedRules() const override; -}; diff --git a/cpp/subprojects/common/include/common/stopping/stopping_criterion.hpp b/cpp/subprojects/common/include/common/stopping/stopping_criterion.hpp deleted file mode 100644 index ceebde0f..00000000 --- a/cpp/subprojects/common/include/common/stopping/stopping_criterion.hpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/partition_single.hpp" -#include "common/statistics/statistics.hpp" - -/** - * Defines an interface for all stopping criteria that allow to decide whether additional rules should be induced or - * not. - */ -class IStoppingCriterion { - public: - - /** - * The result that is returned by a stopping criterion. It consists of the action to be executed, as well as the - * number of rules to be used, if the action is not `CONTINUE`. - */ - struct Result final { - public: - - Result() : stop(false), numUsedRules(0) {}; - - /** - True, if the induction of rules should be stopped, false otherwise. - */ - bool stop; - - /** - * The number of rules to be used. - */ - uint32 numUsedRules; - }; - - virtual ~IStoppingCriterion() {}; - - /** - * Checks whether additional rules should be induced or not. - * - * @param statistics A reference to an object of type `IStatistics` that will serve as the basis for learning - * the next rule - * @param numRules The number of rules induced so far - * @return A value of the enum `Result` that specifies whether the induction of rules should be - * continued (`CONTINUE`), whether the current number of rules should be stored as a - * potential point for stopping while continuing to induce rules (`STORE_STOP`), or if the - * induction of rules should be forced to be stopped (`FORCE_STOP`) - */ - virtual Result test(const IStatistics& statistics, uint32 numRules) = 0; -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IStoppingCriterion`. - */ -class IStoppingCriterionFactory { - public: - - virtual ~IStoppingCriterionFactory() {}; - - /** - * Creates and returns a new object of type `IStoppingCriterion`. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that belong to the training set and the holdout set, - * respectively - * @return An unique pointer to an object of type `IStoppingCriterion` that has been created - */ - virtual std::unique_ptr create(const SinglePartition& partition) const = 0; - - /** - * Creates and returns a new object of type `IStoppingCriterion`. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that belong to the training set and the holdout set, respectively - * @return An unique pointer to an object of type `IStoppingCriterion` that has been created - */ - virtual std::unique_ptr create(BiPartition& partition) const = 0; -}; - -/** - * Defines an interface for all classes that allow to configure a stopping criterion that allows to decide whether - * additional rules should be induced or not. - */ -class IStoppingCriterionConfig { - public: - - virtual ~IStoppingCriterionConfig() {}; - - /** - * Creates and returns a new object of type `IStoppingCriterionFactory` according to the specified - * configuration. - * - * @return An unique pointer to an object of type `IStoppingCriterionFactory` that has been created - */ - virtual std::unique_ptr createStoppingCriterionFactory() const = 0; -}; diff --git a/cpp/subprojects/common/include/common/stopping/stopping_criterion_list.hpp b/cpp/subprojects/common/include/common/stopping/stopping_criterion_list.hpp deleted file mode 100644 index 438eab69..00000000 --- a/cpp/subprojects/common/include/common/stopping/stopping_criterion_list.hpp +++ /dev/null @@ -1,32 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/stopping/stopping_criterion.hpp" - -#include - -/** - * A factory that allows to create instances of the type `IStoppingCriterion` that allow to test multiple stopping - * criteria. If at least one of these stopping criteria is met, the induction of additional rules is stopped. - */ -class StoppingCriterionListFactory final : public IStoppingCriterionFactory { - private: - - std::vector> stoppingCriterionFactories_; - - public: - - /** - * Adds a new factory that allows to create instances of a stopping criterion to be tested. - * - * @param stoppingCriterionFactoryPtr An unique pointer to an object of type `IStoppingCriterionFactory` that - * should be added - */ - void addStoppingCriterionFactory(std::unique_ptr stoppingCriterionFactoryPtr); - - std::unique_ptr create(const SinglePartition& partition) const override; - - std::unique_ptr create(BiPartition& partition) const override; -}; diff --git a/cpp/subprojects/common/include/common/stopping/stopping_criterion_size.hpp b/cpp/subprojects/common/include/common/stopping/stopping_criterion_size.hpp deleted file mode 100644 index a9cb3b08..00000000 --- a/cpp/subprojects/common/include/common/stopping/stopping_criterion_size.hpp +++ /dev/null @@ -1,54 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/stopping/stopping_criterion.hpp" - -/** - * Defines an interface for all classes that allow to configure a stopping criterion that ensures that the number of - * induced rules does not exceed a certain maximum. - */ -class MLRLCOMMON_API ISizeStoppingCriterionConfig { - public: - - virtual ~ISizeStoppingCriterionConfig() {}; - - /** - * Returns the maximum number of rules that are induced. - * - * @return The maximum number of rules that are induced - */ - virtual uint32 getMaxRules() const = 0; - - /** - * Sets the maximum number of rules that should be induced. - * - * @param maxRules The maximum number of rules that should be induced. Must be at least 1 - * @return A reference to an object of type `ISizeStoppingCriterionConfig` that allows further - * configuration of the stopping criterion - */ - virtual ISizeStoppingCriterionConfig& setMaxRules(uint32 maxRules) = 0; -}; - -/** - * Allows to configure a stopping criterion that ensures that the number of induced rules does not exceed a certain - * maximum. - */ -class SizeStoppingCriterionConfig final : public IStoppingCriterionConfig, - public ISizeStoppingCriterionConfig { - private: - - uint32 maxRules_; - - public: - - SizeStoppingCriterionConfig(); - - uint32 getMaxRules() const override; - - ISizeStoppingCriterionConfig& setMaxRules(uint32 maxRules) override; - - std::unique_ptr createStoppingCriterionFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/stopping/stopping_criterion_time.hpp b/cpp/subprojects/common/include/common/stopping/stopping_criterion_time.hpp deleted file mode 100644 index cde7f2a9..00000000 --- a/cpp/subprojects/common/include/common/stopping/stopping_criterion_time.hpp +++ /dev/null @@ -1,53 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/macros.hpp" -#include "common/stopping/stopping_criterion.hpp" - -/** - * Defines an interface for all classes that allow to configure a stopping criterion that ensures that a certain time - * limit is not exceeded. - */ -class MLRLCOMMON_API ITimeStoppingCriterionConfig { - public: - - virtual ~ITimeStoppingCriterionConfig() {}; - - /** - * Returns the time limit. - * - * @return The time limit in seconds - */ - virtual uint32 getTimeLimit() const = 0; - - /** - * Sets the time limit. - * - * @param timeLimit The time limit in seconds. Must be at least 1 - * @return A reference to an object of type `ITimeStoppingCriterionConfig` that allows further - * configuration of the stopping criterion - */ - virtual ITimeStoppingCriterionConfig& setTimeLimit(uint32 timeLimit) = 0; -}; - -/** - * Allows to configure a stopping criterion that ensures that a certain time limit is not exceeded. - */ -class TimeStoppingCriterionConfig final : public IStoppingCriterionConfig, - public ITimeStoppingCriterionConfig { - private: - - uint32 timeLimit_; - - public: - - TimeStoppingCriterionConfig(); - - uint32 getTimeLimit() const override; - - ITimeStoppingCriterionConfig& setTimeLimit(uint32 timeLimit) override; - - std::unique_ptr createStoppingCriterionFactory() const override; -}; diff --git a/cpp/subprojects/common/include/common/thresholds/coverage_mask.hpp b/cpp/subprojects/common/include/common/thresholds/coverage_mask.hpp deleted file mode 100644 index 90319582..00000000 --- a/cpp/subprojects/common/include/common/thresholds/coverage_mask.hpp +++ /dev/null @@ -1,121 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/thresholds/coverage_state.hpp" - -/** - * Allows to check whether individual examples are covered by a rule or not. For each example, an integer is stored in a - * C-contiguous array that may be updated when the rule is refined. If the value that corresponds to a certain example - * is equal to the "indicator value", it is considered to be covered. - */ -class CoverageMask final : public ICoverageState { - private: - - uint32* array_; - - const uint32 numElements_; - - uint32 indicatorValue_; - - public: - - /** - * @param numElements The total number of examples - */ - CoverageMask(uint32 numElements); - - /** - * @param coverageMask A reference to an object of type `CoverageMask` to be copied - */ - CoverageMask(const CoverageMask& coverageMask); - - ~CoverageMask() override; - - /** - * An iterator that provides access to the values in the mask and allows to modify them. - */ - typedef uint32* iterator; - - /** - * An iterator that provides read-only access to the values in the mask. - */ - typedef const uint32* const_iterator; - - /** - * Returns an `iterator` to the beginning of the mask. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the mask. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns a `const_iterator` to the beginning of the mask. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the mask. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the total number of examples - * - * @return The total number of examples - */ - uint32 getNumElements() const; - - /** - * Returns the "indicator value". - * - * @return The "indicator value" - */ - uint32 getIndicatorValue() const; - - /** - * Sets the "indicator value". - * - * @param indicatorValue The "indicator value" to be set - */ - void setIndicatorValue(uint32 indicatorValue); - - /** - * Resets the mask and the "indicator value" such that all examples are marked as covered. - */ - void reset(); - - /** - * Returns whether the example at a specific index is covered or not. - * - * @param pos The index of the example - * @return True, if the example at the given index is covered, false otherwise - */ - bool isCovered(uint32 pos) const; - - std::unique_ptr copy() const override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const AbstractPrediction& head) const override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const AbstractPrediction& head) const override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - AbstractPrediction& head) const override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - AbstractPrediction& head) const override; -}; diff --git a/cpp/subprojects/common/include/common/thresholds/coverage_set.hpp b/cpp/subprojects/common/include/common/thresholds/coverage_set.hpp deleted file mode 100644 index 2d0ffc37..00000000 --- a/cpp/subprojects/common/include/common/thresholds/coverage_set.hpp +++ /dev/null @@ -1,112 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/thresholds/coverage_state.hpp" - -/** - * Provides access to the indices of the examples that are covered by a rule. The indices of the covered examples are - * stored in a C-contiguous array that may be updated when the rule is refined. - */ -class CoverageSet final : public ICoverageState { - private: - - uint32* array_; - - const uint32 numElements_; - - uint32 numCovered_; - - public: - - /** - * @param numElements The total number of examples - */ - CoverageSet(uint32 numElements); - - /** - * @param coverageSet A reference to an object of type `CoverageSet` to be copied - */ - CoverageSet(const CoverageSet& coverageSet); - - ~CoverageSet() override; - - /** - * An iterator that provides access to the indices of the covered examples and allows to modify them. - */ - typedef uint32* iterator; - - /** - * An iterator that provides read-only access to the indices of the covered examples. - */ - typedef const uint32* const_iterator; - - /** - * Returns an `iterator` to the beginning of the indices of the covered examples. - * - * @return An `iterator` to the beginning - */ - iterator begin(); - - /** - * Returns an `iterator` to the end of the indices of the covered examples. - * - * @return An `iterator` to the end - */ - iterator end(); - - /** - * Returns a `const_iterator` to the beginning of the indices of the covered examples. - * - * @return A `const_iterator` to the beginning - */ - const_iterator cbegin() const; - - /** - * Returns a `const_iterator` to the end of the indices of the covered examples. - * - * @return A `const_iterator` to the end - */ - const_iterator cend() const; - - /** - * Returns the total number of examples. - * - * @return The total number of examples - */ - uint32 getNumElements() const; - - /** - * Returns the number of covered examples. - * - * @return The number of covered examples - */ - uint32 getNumCovered() const; - - /** - * Sets the number of covered examples. - * - * @param numCovered The number of covered examples to be set - */ - void setNumCovered(uint32 numCovered); - - /** - * Resets the number of covered examples and their indices such that all examples are marked as covered. - */ - void reset(); - - std::unique_ptr copy() const override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const AbstractPrediction& head) const override; - - Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const AbstractPrediction& head) const override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - AbstractPrediction& head) const override; - - void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - AbstractPrediction& head) const override; -}; diff --git a/cpp/subprojects/common/include/common/thresholds/coverage_state.hpp b/cpp/subprojects/common/include/common/thresholds/coverage_state.hpp deleted file mode 100644 index 11d21841..00000000 --- a/cpp/subprojects/common/include/common/thresholds/coverage_state.hpp +++ /dev/null @@ -1,86 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/util/quality.hpp" - -#include - -// Forward declarations -class IThresholdsSubset; -class SinglePartition; -class BiPartition; -class AbstractPrediction; - -/** - * Defines an interface for all classes that allow to keep track of the examples that are covered by a rule. - */ -class ICoverageState { - public: - - virtual ~ICoverageState() {}; - - /** - * Creates and returns a deep copy of the coverage state. - * - * @return An unique pointer to an object of type `ICoverageState` that has been created - */ - virtual std::unique_ptr copy() const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * evaluate the prediction - * @param partition A reference to an object of type `SinglePartition` that provides access to the - * indices of the training examples that belong to the training set - * @param head A reference to an object of type `AbstractPrediction` that stores the scores that - * are predicted by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const AbstractPrediction& head) const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * evaluate the prediction - * @param partition A reference to an object of type `BiPartition` that provides access to the indices - * of the training examples that belong to the training set - * @param head A reference to an object of type `AbstractPrediction` that stores the scores that - * are predicted by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const AbstractPrediction& head) const = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * recalculate the prediction - * @param partition A reference to an object of type `SinglePartition` that provides access to the - * indices of the training examples that belong to the training set - * @param head A reference to an object of type `AbstractPrediction` to be updated - */ - virtual void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - AbstractPrediction& head) const = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered. - * - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to - * recalculate the prediction - * @param partition A reference to an object of type `BiPartition` that provides access to the indices - * of the training examples that belong to the training set - * @param head A reference to an object of type `AbstractPrediction` to be updated - */ - virtual void recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - AbstractPrediction& head) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/thresholds/thresholds.hpp b/cpp/subprojects/common/include/common/thresholds/thresholds.hpp deleted file mode 100644 index 5ecf55fe..00000000 --- a/cpp/subprojects/common/include/common/thresholds/thresholds.hpp +++ /dev/null @@ -1,81 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_info.hpp" -#include "common/input/feature_matrix_column_wise.hpp" -#include "common/sampling/weight_vector_bit.hpp" -#include "common/sampling/weight_vector_dense.hpp" -#include "common/sampling/weight_vector_equal.hpp" -#include "common/statistics/statistics_provider.hpp" -#include "common/thresholds/thresholds_subset.hpp" - -/** - * Defines an interface for all classes that provide access to thresholds that may be used by the first condition of a - * rule that currently has an empty body and therefore covers the entire instance space. - */ -class IThresholds { - public: - - virtual ~IThresholds() {}; - - /** - * Creates and returns a new subset of the thresholds, which initially contains all of the thresholds. - * - * @param weights A reference to an object of type `EqualWeightVector` that provides access to the weights of - * individual training examples - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created - */ - virtual std::unique_ptr createSubset(const EqualWeightVector& weights) = 0; - - /** - * Creates and returns a new subset of the thresholds, which initially contains all of the thresholds. - * - * @param weights A reference to an object of type `BitWeightVector` that provides access to the weights of - * individual training examples - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created - */ - virtual std::unique_ptr createSubset(const BitWeightVector& weights) = 0; - - /** - * Creates and returns a new subset of the thresholds, which initially contains all of the thresholds. - * - * @param weights A reference to an object of type `DenseWeightVector` that provides access to the - * weights of individual training examples - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created - */ - virtual std::unique_ptr createSubset(const DenseWeightVector& weights) = 0; - - /** - * Returns a reference to an object of type `IStatisticsProvider` that provides access to the statistics that - * correspond to individual training examples in the instance space. - * - * @return A reference to an object of type `IStatisticsProvider` - */ - virtual IStatisticsProvider& getStatisticsProvider() const = 0; -}; - -/** - * Defines an interface for all classes that allow to create instances of the type `IThresholds`. - */ -class IThresholdsFactory { - public: - - virtual ~IThresholdsFactory() {}; - - /** - * Creates and returns a new object of type `IThresholds`. - * - * @param featureMatrix A reference to an object of type `IColumnWiseFeatureMatrix` that provides - * column-wise access to the feature values of individual training examples - * @param featureInfo A reference to an object of type `IFeatureInfo` that provides information about - * the types of individual features - * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides access to - * statistics about the labels of the training examples - * @return An unique pointer to an object of type `IThresholds` that has been created - */ - virtual std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const = 0; -}; diff --git a/cpp/subprojects/common/include/common/thresholds/thresholds_approximate.hpp b/cpp/subprojects/common/include/common/thresholds/thresholds_approximate.hpp deleted file mode 100644 index f8020157..00000000 --- a/cpp/subprojects/common/include/common/thresholds/thresholds_approximate.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Lukas Johannes Eberle (lukasjohannes.eberle@stud.tu-darmstadt.de) - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/feature_binning.hpp" -#include "common/thresholds/thresholds.hpp" - -/** - * A factory that allows to create instances of the type `ApproximateThresholds`. - */ -class ApproximateThresholdsFactory final : public IThresholdsFactory { - private: - - const std::unique_ptr numericalFeatureBinningFactoryPtr_; - - const std::unique_ptr nominalFeatureBinningFactoryPtr_; - - const uint32 numThreads_; - - public: - - /** - * @param numericalFeatureBinningFactoryPtr An unique pointer to an object of type `IFeatureBinningFactory` that - * allows to create implementations of the binning method to be used - * for assigning numerical feature values to bins - * @param nominalFeatureBinningFactoryPtr An unique pointer to an object of type `IFeatureBinningFactory` that - * allows to create implementations of the binning method to be used - * for assigning nominal feature values to bins - * @param numThreads The number of CPU threads to be used to update statistics in - * parallel. Must be at least 1 - */ - ApproximateThresholdsFactory(std::unique_ptr numericalFeatureBinningFactoryPtr, - std::unique_ptr nominalFeatureBinningFactoryPtr, - uint32 numThreads); - - std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const override; -}; diff --git a/cpp/subprojects/common/include/common/thresholds/thresholds_exact.hpp b/cpp/subprojects/common/include/common/thresholds/thresholds_exact.hpp deleted file mode 100644 index fb01cf34..00000000 --- a/cpp/subprojects/common/include/common/thresholds/thresholds_exact.hpp +++ /dev/null @@ -1,26 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/thresholds/thresholds.hpp" - -/** - * A factory that allows to create instances of the type `ExactThresholds`. - */ -class ExactThresholdsFactory final : public IThresholdsFactory { - private: - - const uint32 numThreads_; - - public: - - /** - * @param numThreads The number of CPU threads to be used to update statistics in parallel. Must be at least 1 - */ - ExactThresholdsFactory(uint32 numThreads); - - std::unique_ptr create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const override; -}; diff --git a/cpp/subprojects/common/include/common/thresholds/thresholds_subset.hpp b/cpp/subprojects/common/include/common/thresholds/thresholds_subset.hpp deleted file mode 100644 index 4e7044b2..00000000 --- a/cpp/subprojects/common/include/common/thresholds/thresholds_subset.hpp +++ /dev/null @@ -1,237 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" -#include "common/model/condition.hpp" -#include "common/rule_refinement/prediction.hpp" -#include "common/rule_refinement/rule_refinement.hpp" -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/partition_single.hpp" -#include "common/thresholds/coverage_mask.hpp" -#include "common/thresholds/coverage_set.hpp" - -#include - -/** - * Defines an interface for all classes that provide access a subset of thresholds that may be used by the conditions of - * a rule with arbitrary body. The thresholds may include only those that correspond to the subspace of the instance - * space that is covered by the rule. - */ -class IThresholdsSubset { - public: - - virtual ~IThresholdsSubset() {}; - - /** - * Creates and returns a copy of this object. - * - * @return An unique pointer to an object of type `IThresholdsSubset` that has been created - */ - virtual std::unique_ptr copy() const = 0; - - /** - * Creates and returns a new instance of the type `IRuleRefinement` that allows to find the best refinement of - * an existing rule that predicts for all available labels. - * - * @param labelIndices A reference to an object of type `CompleteIndexVector` that provides access to the - * indices of the labels for which the existing rule predicts - * @param featureIndex The index of the feature that should be considered when searching for refinements - * @return An unique pointer to an object of type `IRuleRefinement` that has been created - */ - virtual std::unique_ptr createRuleRefinement(const CompleteIndexVector& labelIndices, - uint32 featureIndex) = 0; - - /** - * Creates and returns a new instance of the type `IRuleRefinement` that allows to find the best refinement of - * an existing rule that predicts for a subset of the available labels. - * - * @param labelIndices A reference to an object of type `PartialIndexVector` that provides access to the - * indices of the labels for which the existing rule predicts - * @param featureIndex The index of the feature that should be considered when searching for refinements - * @return An unique pointer to an object of type `IRuleRefinement` that has been created - */ - virtual std::unique_ptr createRuleRefinement(const PartialIndexVector& labelIndices, - uint32 featureIndex) = 0; - - /** - * Filters the thresholds such that only those thresholds, which correspond to the instance space that is - * covered by specific condition of a rule, are included. - * - * @param condition A reference to an object of type `Condition` that stores the properties of the condition - */ - virtual void filterThresholds(const Condition& condition) = 0; - - /** - * Resets the filtered thresholds. This reverts the effects of all previous calls to the function - * `filterThresholds`. - */ - virtual void resetThresholds() = 0; - - /** - * Returns an object of type `ICoverageState` that keeps track of the elements that are covered by the - * refinement that has been applied via the function `applyRefinement`. - * - * @return A reference to an object of type `ICoverageState` that keeps track of the elements that are covered - * by the refinement - */ - virtual const ICoverageState& getCoverageState() const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered according to a given object of type - * `CoverageMask`. - * - * For calculating the quality, only examples that belong to the training set and are not included in the - * current sub-sample, i.e., only examples with zero weights, are considered. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that - * are covered by the rule - * @param head A reference to an object of type `AbstractPrediction` that stores the scores that are - * predicted by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageMask& coverageState, - const AbstractPrediction& head) const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered according to a given object of type - * `CoverageMask`. - * - * For calculating the quality, only examples that belong to the training set and are not included in the - * current sub-sample, i.e., only examples with zero weights, are considered. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that - * are covered by the rule - * @param head A reference to an object of type `AbstractPrediction` that stores the scores that are - * predicted by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageState, - const AbstractPrediction& head) const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered according to a given object of type - * `CoverageSet`. - * - * For calculating the quality, only examples that belong to the training set and are not included in the - * current sub-sample, i.e., only examples with zero weights, are considered. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageSet` that keeps track of the examples that are - * covered by the rule - * @param head A reference to an object of type `AbstractPrediction` that stores the scores that are - * predicted by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageSet& coverageState, - const AbstractPrediction& head) const = 0; - - /** - * Calculates and returns a numerical score that assesses the quality of a rule's prediction for all examples - * that do not belong to the current sub-sample and are marked as covered according to a given object of type - * `CoverageSet`. - * - * For calculating the quality, only examples that belong to the training set and are not included in the - * current sub-sample, i.e., only examples with zero weights, are considered. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageSet` that keeps track of the examples that are - * covered by the rule - * @param head A reference to an object of type `AbstractPrediction` that stores the scores that are - * predicted by the rule - * @return An object of type `Quality` that stores the calculated quality - */ - virtual Quality evaluateOutOfSample(BiPartition& partition, const CoverageSet& coverageState, - const AbstractPrediction& head) const = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `CoverageMask`. - * - * When calculating the updated prediction, the weights of the individual training examples are ignored and - * equally distributed weights are assumed instead. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that - * are covered by the rule - * @param head A reference to an object of type `AbstractPrediction` to be updated - */ - virtual void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageState, - AbstractPrediction& head) const = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `CoverageMask`. - * - * When calculating the updated prediction, the weights of the individual training examples are ignored and - * equally distributed weights are assumed instead. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that - * are covered by the rule - * @param head A reference to an object of type `AbstractPrediction` to be updated - */ - virtual void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageState, - AbstractPrediction& head) const = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `CoverageSet`. - * - * When calculating the updated prediction, the weights of the individual training examples are ignored and - * equally distributed weights are assumed instead. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices - * of the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageMask` that keeps track of the examples that - * are covered by the rule - * @param head A reference to an object of type `AbstractPrediction` to be updated - */ - virtual void recalculatePrediction(const SinglePartition& partition, const CoverageSet& coverageState, - AbstractPrediction& head) const = 0; - - /** - * Recalculates and updates a rule's prediction based on all examples in the training set that are marked as - * covered according to a given object of type `CoverageSet`. - * - * When calculating the updated prediction, the weights of the individual training examples are ignored and - * equally distributed weights are assumed instead. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of - * the training examples that belong to the training set - * @param coverageState A reference to an object of type `CoverageSet` that keeps track of the examples that are - * covered by the rule - * @param head A reference to an object of type `AbstractPrediction` to be updated - */ - virtual void recalculatePrediction(BiPartition& partition, const CoverageSet& coverageState, - AbstractPrediction& head) const = 0; - - /** - * Updates the statistics that correspond to the current subset based on the prediction of a rule. - * - * @param prediction A reference to an object of type `AbstractPrediction` that stores the prediction of the - * rule - */ - virtual void applyPrediction(const AbstractPrediction& prediction) = 0; - - /** - * Reverts the statistics that correspond to the current subset based on the predictions of a rule. - * - * @param prediction A reference to an object of type `AbstractPrediction` that stores the prediction of the - * rule - */ - virtual void revertPrediction(const AbstractPrediction& prediction) = 0; -}; diff --git a/cpp/subprojects/common/include/common/util/quality.hpp b/cpp/subprojects/common/include/common/util/quality.hpp deleted file mode 100644 index ce3ef208..00000000 --- a/cpp/subprojects/common/include/common/util/quality.hpp +++ /dev/null @@ -1,41 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -/** - * A struct that stores a numerical score that represents a quality. - */ -struct Quality { - public: - - Quality() {}; - - /** - * @param q A numerical score that represents the quality - */ - Quality(float64 q) : quality(q) {}; - - /** - * @param q A reference to an object of type `Quality` to be copied - */ - Quality(const Quality& q) : quality(q.quality) {}; - - /** - * Assigns the numerical score of an existing object to this object. - * - * @param q A reference to the existing object - * @return A reference to the modified object - */ - Quality& operator=(const Quality& q) { - quality = q.quality; - return *this; - } - - /** - * A numerical score that represents the quality. - */ - float64 quality; -}; diff --git a/cpp/subprojects/common/include/common/util/threads.hpp b/cpp/subprojects/common/include/common/util/threads.hpp deleted file mode 100644 index 75510f75..00000000 --- a/cpp/subprojects/common/include/common/util/threads.hpp +++ /dev/null @@ -1,20 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/types.hpp" - -#include -#include - -/** - * Returns the number of threads that are available for parallelized algorithms. - * - * @param numPreferredThreads The preferred number of threads or 0, if all available CPU cores should be used - * @return The number of available threads - */ -static inline uint32 getNumAvailableThreads(uint32 numPreferredThreads) { - uint32 numAvailableThreads = std::max(std::thread::hardware_concurrency(), 1); - return numPreferredThreads > 0 ? std::min(numAvailableThreads, numPreferredThreads) : numAvailableThreads; -} diff --git a/cpp/subprojects/common/include/common/util/validation.hpp b/cpp/subprojects/common/include/common/util/validation.hpp deleted file mode 100644 index 25130335..00000000 --- a/cpp/subprojects/common/include/common/util/validation.hpp +++ /dev/null @@ -1,87 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include -#include - -/** - * Throws a `std::invalid_argument` exception if a given value is not greater than a specific threshold. - * - * @tparam T The type of the value and threshold - * @param name The name of the parameter, the value corresponds to - * @param value The value - * @param threshold The threshold - */ -template -static inline constexpr void assertGreater(const std::string& name, const T value, const T threshold) { - if (value <= threshold) { - throw std::invalid_argument("Invalid value given for parameter \"" + name + "\": Must be greater than " - + std::to_string(threshold) + ", but is " + std::to_string(value)); - } -} - -/** - * Throws a `std::invalid_argument` exception if a given value not greater or equal to a specific threshold. - * - * @tparam T The type of the value and threshold - * @param name The name of the parameter, the value corresponds to - * @param value The value - * @param threshold The threshold - */ -template -static inline constexpr void assertGreaterOrEqual(const std::string& name, const T value, const T threshold) { - if (value < threshold) { - throw std::invalid_argument("Invalid value given for parameter \"" + name + "\": Must be greater or equal to " - + std::to_string(threshold) + ", but is " + std::to_string(value)); - } -} - -/** - * Throws a `std::invalid_argument` exception if a given value is not less than a specific threshold. - * - * @tparam T The type of the value and threshold - * @param name The name of the parameter, the value corresponds to - * @param value The value - * @param threshold The threshold - */ -template -static inline constexpr void assertLess(const std::string& name, const T value, const T threshold) { - if (value >= threshold) { - throw std::invalid_argument("Invalid value given for parameter \"" + name + "\": Must be less than " - + std::to_string(threshold) + ", but is " + std::to_string(value)); - } -} - -/** - * Throws a `std::invalid_argument` exception if a given value is not less or equal to a specific threshold. - * - * @tparam T The type of the value and threshold - * @param name The name of the parameter, the value corresponds to - * @param value The value - * @param threshold The threshold - */ -template -static inline constexpr void assertLessOrEqual(const std::string& name, const T value, const T threshold) { - if (value > threshold) { - throw std::invalid_argument("Invalid value given for parameter \"" + name + "\": Must be less or equal to " - + std::to_string(threshold) + ", but is " + std::to_string(value)); - } -} - -/** - * Throws a `std::invalid_argument` exception if a given value is not a multiple of another value. - * - * @tparam T The type of the values - * @param name The name of the parameter, the value corresponds to - * @param value The value that should be a multiple of `other` - * @param other The other value - */ -template -static inline constexpr void assertMultiple(const std::string& name, const T value, const T other) { - if (value % other != 0) { - throw std::invalid_argument("Invalid value given for parameter \"" + name + "\": Must be a multiple of " - + std::to_string(other) + ", but is " + std::to_string(value)); - } -} diff --git a/cpp/subprojects/common/meson.build b/cpp/subprojects/common/meson.build deleted file mode 100644 index 09d8710a..00000000 --- a/cpp/subprojects/common/meson.build +++ /dev/null @@ -1,146 +0,0 @@ -project('common', 'cpp') - -# Source files -source_files = [ - 'src/common/binning/bin_index_vector_dense.cpp', - 'src/common/binning/bin_index_vector_dok.cpp', - 'src/common/binning/bin_weight_vector.cpp', - 'src/common/binning/feature_binning_equal_frequency.cpp', - 'src/common/binning/feature_binning_equal_width.cpp', - 'src/common/binning/feature_binning_no.cpp', - 'src/common/binning/threshold_vector.cpp', - 'src/common/data/list_of_lists.cpp', - 'src/common/data/matrix_c_contiguous.cpp', - 'src/common/data/matrix_sparse_set.cpp', - 'src/common/data/ring_buffer.cpp', - 'src/common/data/vector_binned_dense.cpp', - 'src/common/data/vector_bit.cpp', - 'src/common/data/vector_dense.cpp', - 'src/common/data/vector_dok.cpp', - 'src/common/data/vector_dok_binary.cpp', - 'src/common/data/vector_sparse_array.cpp', - 'src/common/data/view_c_contiguous.cpp', - 'src/common/data/view_csc.cpp', - 'src/common/data/view_csc_binary.cpp', - 'src/common/data/view_csr.cpp', - 'src/common/data/view_csr_binary.cpp', - 'src/common/data/view_fortran_contiguous.cpp', - 'src/common/data/view_vector.cpp', - 'src/common/indices/index_iterator.cpp', - 'src/common/indices/index_vector_complete.cpp', - 'src/common/indices/index_vector_partial.cpp', - 'src/common/input/feature_info_equal.cpp', - 'src/common/input/feature_info_mixed.cpp', - 'src/common/input/feature_matrix_c_contiguous.cpp', - 'src/common/input/feature_matrix_csc.cpp', - 'src/common/input/feature_matrix_csr.cpp', - 'src/common/input/feature_matrix_fortran_contiguous.cpp', - 'src/common/input/feature_type_nominal.cpp', - 'src/common/input/feature_type_numerical.cpp', - 'src/common/input/feature_type_ordinal.cpp', - 'src/common/input/feature_vector.cpp', - 'src/common/input/label_matrix_c_contiguous.cpp', - 'src/common/input/label_matrix_csc.cpp', - 'src/common/input/label_matrix_csr.cpp', - 'src/common/input/missing_feature_vector.cpp', - 'src/common/model/body_conjunctive.cpp', - 'src/common/model/body_empty.cpp', - 'src/common/model/condition_list.cpp', - 'src/common/model/head_complete.cpp', - 'src/common/model/head_partial.cpp', - 'src/common/model/rule_list.cpp', - 'src/common/multi_threading/multi_threading_manual.cpp', - 'src/common/multi_threading/multi_threading_no.cpp', - 'src/common/post_optimization/model_builder_intermediate.cpp', - 'src/common/post_optimization/post_optimization_phase_list.cpp', - 'src/common/post_optimization/post_optimization_sequential.cpp', - 'src/common/post_optimization/post_optimization_unused_rule_removal.cpp', - 'src/common/post_processing/post_processor_no.cpp', - 'src/common/prediction/label_space_info_no.cpp', - 'src/common/prediction/label_vector_set.cpp', - 'src/common/prediction/prediction_matrix_dense.cpp', - 'src/common/prediction/prediction_matrix_sparse_binary.cpp', - 'src/common/prediction/probability_calibration_isotonic.cpp', - 'src/common/prediction/probability_calibration_no.cpp', - 'src/common/rule_evaluation/score_vector_binned_dense.cpp', - 'src/common/rule_evaluation/score_vector_dense.cpp', - 'src/common/rule_induction/rule_induction_top_down_beam_search.cpp', - 'src/common/rule_induction/rule_induction_top_down_greedy.cpp', - 'src/common/rule_model_assemblage/default_rule.cpp', - 'src/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp', - 'src/common/rule_pruning/rule_pruning_irep.cpp', - 'src/common/rule_pruning/rule_pruning_no.cpp', - 'src/common/rule_refinement/prediction_complete.cpp', - 'src/common/rule_refinement/prediction_evaluated.cpp', - 'src/common/rule_refinement/prediction_partial.cpp', - 'src/common/rule_refinement/prediction.cpp', - 'src/common/rule_refinement/refinement_comparator_fixed.cpp', - 'src/common/rule_refinement/refinement_comparator_single.cpp', - 'src/common/rule_refinement/rule_refinement_approximate.cpp', - 'src/common/rule_refinement/rule_refinement_exact.cpp', - 'src/common/rule_refinement/score_processor.cpp', - 'src/common/sampling/feature_sampling_no.cpp', - 'src/common/sampling/feature_sampling_predefined.cpp', - 'src/common/sampling/feature_sampling_without_replacement.cpp', - 'src/common/sampling/instance_sampling_no.cpp', - 'src/common/sampling/instance_sampling_stratified_example_wise.cpp', - 'src/common/sampling/instance_sampling_stratified_label_wise.cpp', - 'src/common/sampling/instance_sampling_with_replacement.cpp', - 'src/common/sampling/instance_sampling_without_replacement.cpp', - 'src/common/sampling/label_sampling_no.cpp', - 'src/common/sampling/label_sampling_round_robin.cpp', - 'src/common/sampling/label_sampling_without_replacement.cpp', - 'src/common/sampling/partition_bi.cpp', - 'src/common/sampling/partition_sampling_bi_random.cpp', - 'src/common/sampling/partition_sampling_bi_stratified_example_wise.cpp', - 'src/common/sampling/partition_sampling_bi_stratified_label_wise.cpp', - 'src/common/sampling/partition_sampling_no.cpp', - 'src/common/sampling/partition_single.cpp', - 'src/common/sampling/random.cpp', - 'src/common/sampling/stratified_sampling_example_wise.cpp', - 'src/common/sampling/stratified_sampling_label_wise.cpp', - 'src/common/sampling/weight_vector_bit.cpp', - 'src/common/sampling/weight_vector_dense.cpp', - 'src/common/sampling/weight_vector_equal.cpp', - 'src/common/sampling/weight_vector_out_of_sample.cpp', - 'src/common/stopping/global_pruning_post.cpp', - 'src/common/stopping/global_pruning_pre.cpp', - 'src/common/stopping/stopping_criterion_list.cpp', - 'src/common/stopping/stopping_criterion_size.cpp', - 'src/common/stopping/stopping_criterion_time.cpp', - 'src/common/thresholds/coverage_mask.cpp', - 'src/common/thresholds/coverage_set.cpp', - 'src/common/thresholds/thresholds_approximate.cpp', - 'src/common/thresholds/thresholds_exact.cpp', - 'src/common/learner.cpp' -] - -# Directory containing public headers -include_directories = include_directories('include') - -# Directory into which the library should be installed -install_root = meson.current_source_dir() / '../../../python/subprojects' -install_dir = install_root / meson.project_name() / 'mlrl' / meson.project_name() / 'cython/' - -# Library version -fs = import('fs') -version = fs.read('../../../VERSION') - -# Library declaration -lib_name = 'mlrl' + meson.project_name() -cpp_args = ['-fopenmp'] -link_args = ['-fopenmp'] - -if host_machine.system() == 'windows' - cpp_args = ['/openmp', '-DMLRLCOMMON_EXPORTS'] - link_args = [] -elif host_machine.system() == 'darwin' - cpp_args = ['-Xclang', '-fopenmp'] - link_args = ['-lomp'] -endif - -library(lib_name, source_files, include_directories : include_directories, cpp_args : cpp_args, link_args : link_args, - version : version, install : true, install_dir : install_dir) -common_lib = static_library(lib_name, source_files, include_directories : include_directories, cpp_args : cpp_args, - link_args : link_args) -common_dep = declare_dependency(include_directories : include_directories, link_with : common_lib) diff --git a/cpp/subprojects/common/src/common/binning/bin_index_vector_dense.cpp b/cpp/subprojects/common/src/common/binning/bin_index_vector_dense.cpp deleted file mode 100644 index 4a4b0f8d..00000000 --- a/cpp/subprojects/common/src/common/binning/bin_index_vector_dense.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "common/binning/bin_index_vector_dense.hpp" - -#include "common/statistics/statistics_weighted.hpp" - -DenseBinIndexVector::DenseBinIndexVector(uint32 numElements) : vector_(DenseVector(numElements)) {} - -uint32 DenseBinIndexVector::getBinIndex(uint32 exampleIndex) const { - return vector_[exampleIndex]; -} - -void DenseBinIndexVector::setBinIndex(uint32 exampleIndex, uint32 binIndex) { - vector_[exampleIndex] = binIndex; -} - -std::unique_ptr DenseBinIndexVector::createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const { - return statistics.createHistogram(*this, numBins); -} diff --git a/cpp/subprojects/common/src/common/binning/bin_index_vector_dok.cpp b/cpp/subprojects/common/src/common/binning/bin_index_vector_dok.cpp deleted file mode 100644 index 2c38173b..00000000 --- a/cpp/subprojects/common/src/common/binning/bin_index_vector_dok.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "common/binning/bin_index_vector_dok.hpp" - -#include "common/statistics/statistics_weighted.hpp" - -DokBinIndexVector::DokBinIndexVector() : vector_(DokVector(BIN_INDEX_SPARSE)) {} - -DokBinIndexVector::iterator DokBinIndexVector::begin() { - return vector_.begin(); -} - -DokBinIndexVector::iterator DokBinIndexVector::end() { - return vector_.end(); -} - -uint32 DokBinIndexVector::getBinIndex(uint32 exampleIndex) const { - return vector_[exampleIndex]; -} - -void DokBinIndexVector::setBinIndex(uint32 exampleIndex, uint32 binIndex) { - vector_.set(exampleIndex, binIndex); -} - -std::unique_ptr DokBinIndexVector::createHistogram(const IWeightedStatistics& statistics, - uint32 numBins) const { - return statistics.createHistogram(*this, numBins); -} diff --git a/cpp/subprojects/common/src/common/binning/bin_weight_vector.cpp b/cpp/subprojects/common/src/common/binning/bin_weight_vector.cpp deleted file mode 100644 index 0a1374b7..00000000 --- a/cpp/subprojects/common/src/common/binning/bin_weight_vector.cpp +++ /dev/null @@ -1,21 +0,0 @@ -#include "common/binning/bin_weight_vector.hpp" - -#include "common/data/arrays.hpp" - -BinWeightVector::BinWeightVector(uint32 numElements) : vector_(DenseVector(numElements)) {} - -void BinWeightVector::clear() { - setArrayToZeros(vector_.begin(), vector_.getNumElements()); -} - -void BinWeightVector::increaseWeight(uint32 pos) { - vector_[pos] += 1; -} - -bool BinWeightVector::operator[](uint32 pos) const { - return vector_[pos] != 0; -} - -uint32 BinWeightVector::getNumElements() const { - return vector_.getNumElements(); -} diff --git a/cpp/subprojects/common/src/common/binning/feature_binning_equal_frequency.cpp b/cpp/subprojects/common/src/common/binning/feature_binning_equal_frequency.cpp deleted file mode 100644 index f2cb7959..00000000 --- a/cpp/subprojects/common/src/common/binning/feature_binning_equal_frequency.cpp +++ /dev/null @@ -1,231 +0,0 @@ -#include "common/binning/feature_binning_equal_frequency.hpp" - -#include "common/binning/bin_index_vector_dense.hpp" -#include "common/binning/bin_index_vector_dok.hpp" -#include "common/math/math.hpp" -#include "common/thresholds/thresholds_approximate.hpp" -#include "common/util/validation.hpp" -#include "feature_binning_nominal.hpp" - -static inline uint32 getNumBins(FeatureVector& featureVector, bool sparse, float32 binRatio, uint32 minBins, - uint32 maxBins) { - uint32 numElements = featureVector.getNumElements(); - - if (numElements > 0) { - featureVector.sortByValues(); - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - uint32 numDistinctValues = 1; - float32 previousValue; - uint32 i; - - if (sparse) { - previousValue = 0; - i = 0; - } else { - previousValue = featureIterator[0].value; - i = 1; - } - - for (; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if ((!sparse || currentValue != 0) && currentValue != previousValue) { - numDistinctValues++; - previousValue = currentValue; - } - } - - return numDistinctValues > 1 ? calculateBoundedFraction(numDistinctValues, binRatio, minBins, maxBins) : 0; - } - - return 0; -} - -/** - * An implementation of the type `IFeatureBinning` that assigns numerical feature values to bins, such that each bin - * contains approximately the same number of values. - */ -class EqualFrequencyFeatureBinning final : public IFeatureBinning { - private: - - const float32 binRatio_; - - const uint32 minBins_; - - const uint32 maxBins_; - - public: - - /** - * @param binRatio A percentage that specifies how many bins should be used, e.g., if 100 values are available, - * 0.5 means that `ceil(0.5 * 100) = 50` bins should be used. Must be in (0, 1) - * @param minBins The minimum number of bins to be used. Must be at least 2 - * @param maxBins The maximum number of bins to be used. Must be at least `minBins` or 0, if the maximum - * number of bins should not be restricted - */ - EqualFrequencyFeatureBinning(float32 binRatio, uint32 minBins, uint32 maxBins) - : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - - Result createBins(FeatureVector& featureVector, uint32 numExamples) const override { - Result result; - uint32 numElements = featureVector.getNumElements(); - uint32 numSparse = numExamples - numElements; - bool sparse = numSparse > 0; - uint32 numBins = getNumBins(featureVector, sparse, binRatio_, minBins_, maxBins_); - result.thresholdVectorPtr = std::make_unique(featureVector, numBins); - - if (sparse) { - result.binIndicesPtr = std::make_unique(); - } else { - result.binIndicesPtr = std::make_unique(numElements); - } - - if (numBins > 0) { - IBinIndexVector& binIndices = *result.binIndicesPtr; - ThresholdVector& thresholdVector = *result.thresholdVectorPtr; - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - ThresholdVector::iterator thresholdIterator = thresholdVector.begin(); - uint32 numElementsPerBin = (uint32) std::ceil((float) numElements / (float) numBins); - uint32 numElementsInCurrentBin = 0; - uint32 binIndex = 0; - float32 previousValue = 0; - uint32 i = 0; - - // Iterate feature values < 0... - for (; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if (currentValue >= 0) { - break; - } - - if (currentValue != previousValue) { - if (numElementsInCurrentBin >= numElementsPerBin) { - thresholdIterator[binIndex] = arithmeticMean(previousValue, currentValue); - binIndex++; - numElementsInCurrentBin = 0; - } - - previousValue = currentValue; - } - - binIndices.setBinIndex(featureIterator[i].index, binIndex); - numElementsInCurrentBin++; - } - - // If there are any sparse values, check if they belong to the current one or the next one... - if (sparse) { - previousValue = 0; - - if (numElementsInCurrentBin >= numElementsPerBin) { - thresholdIterator[binIndex] = arithmeticMean(previousValue, 0); - binIndex++; - numElementsInCurrentBin = 0; - } - - thresholdVector.setSparseBinIndex(binIndex); - numElementsInCurrentBin += numSparse; - } - - // Iterate feature values >= 0... - for (; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if (!sparse || currentValue != 0) { - if (currentValue != previousValue) { - if (numElementsInCurrentBin >= numElementsPerBin) { - thresholdIterator[binIndex] = arithmeticMean(previousValue, currentValue); - binIndex++; - numElementsInCurrentBin = 0; - } - - previousValue = currentValue; - } - - binIndices.setBinIndex(featureIterator[i].index, binIndex); - numElementsInCurrentBin++; - } - } - - thresholdVector.setNumElements(binIndex + 1, true); - } - - return result; - } -}; - -/** - * Allows to create instances of the type `IFeatureBinning` that assign numerical feature values to bins, such that each - * bin contains approximately the same number of values. - */ -class EqualFrequencyFeatureBinningFactory final : public IFeatureBinningFactory { - private: - - const float32 binRatio_; - - const uint32 minBins_; - - const uint32 maxBins_; - - public: - - /** - * @param binRatio A percentage that specifies how many bins should be used, e.g., if 100 values are available, - * a percentage of 0.5 means that `ceil(0.5 * 100) = 50` bins should be used. Must be in (0, 1) - * @param minBins The minimum number of bins to be used. Must be at least 2 - * @param maxBins The maximum number of bins to be used. Must be at least `minBins` or 0, if the maximum - * number of bins should not be restricted - */ - EqualFrequencyFeatureBinningFactory(float32 binRatio, uint32 minBins, uint32 maxBins) - : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - - std::unique_ptr create() const override { - return std::make_unique(binRatio_, minBins_, maxBins_); - } -}; - -EqualFrequencyFeatureBinningConfig::EqualFrequencyFeatureBinningConfig( - const std::unique_ptr& multiThreadingConfigPtr) - : binRatio_(0.33f), minBins_(2), maxBins_(0), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - -float32 EqualFrequencyFeatureBinningConfig::getBinRatio() const { - return binRatio_; -} - -IEqualFrequencyFeatureBinningConfig& EqualFrequencyFeatureBinningConfig::setBinRatio(float32 binRatio) { - assertGreater("binRatio", binRatio, 0); - assertLess("binRatio", binRatio, 1); - binRatio_ = binRatio; - return *this; -} - -uint32 EqualFrequencyFeatureBinningConfig::getMinBins() const { - return minBins_; -} - -IEqualFrequencyFeatureBinningConfig& EqualFrequencyFeatureBinningConfig::setMinBins(uint32 minBins) { - assertGreaterOrEqual("minBins", minBins, 2); - minBins_ = minBins; - return *this; -} - -uint32 EqualFrequencyFeatureBinningConfig::getMaxBins() const { - return maxBins_; -} - -IEqualFrequencyFeatureBinningConfig& EqualFrequencyFeatureBinningConfig::setMaxBins(uint32 maxBins) { - if (maxBins != 0) assertGreaterOrEqual("maxBins", maxBins, minBins_); - maxBins_ = maxBins; - return *this; -} - -std::unique_ptr EqualFrequencyFeatureBinningConfig::createThresholdsFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - std::unique_ptr numericalFeatureBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - std::unique_ptr nominalFeatureBinningFactoryPtr = - std::make_unique(); - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - return std::make_unique(std::move(numericalFeatureBinningFactoryPtr), - std::move(nominalFeatureBinningFactoryPtr), numThreads); -} diff --git a/cpp/subprojects/common/src/common/binning/feature_binning_equal_width.cpp b/cpp/subprojects/common/src/common/binning/feature_binning_equal_width.cpp deleted file mode 100644 index 49b034d9..00000000 --- a/cpp/subprojects/common/src/common/binning/feature_binning_equal_width.cpp +++ /dev/null @@ -1,251 +0,0 @@ -#include "common/binning/feature_binning_equal_width.hpp" - -#include "common/binning/bin_index_vector_dense.hpp" -#include "common/binning/bin_index_vector_dok.hpp" -#include "common/math/math.hpp" -#include "common/thresholds/thresholds_approximate.hpp" -#include "common/util/validation.hpp" -#include "feature_binning_nominal.hpp" - -#include -#include - -static inline std::tuple preprocess(const FeatureVector& featureVector, bool sparse, - float32 binRatio, uint32 minBins, uint32 maxBins) { - std::tuple result; - uint32 numElements = featureVector.getNumElements(); - - if (numElements > 0) { - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - float32 minValue; - uint32 i; - - if (sparse) { - minValue = 0; - i = 0; - } else { - minValue = featureIterator[0].value; - i = 1; - } - - float32 maxValue = minValue; - uint32 numDistinctValues = 1; - std::unordered_set distinctValues; - - for (; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if ((!sparse || currentValue != 0) && distinctValues.insert(currentValue).second) { - numDistinctValues++; - - if (currentValue < minValue) { - minValue = currentValue; - } - - if (currentValue > maxValue) { - maxValue = currentValue; - } - } - } - - std::get<0>(result) = - numDistinctValues > 1 ? calculateBoundedFraction(numDistinctValues, binRatio, minBins, maxBins) : 0; - std::get<1>(result) = minValue; - std::get<2>(result) = maxValue; - } else { - std::get<0>(result) = 0; - } - - return result; -} - -static inline uint32 getBinIndex(float32 value, float32 min, float32 width, uint32 numBins) { - uint32 binIndex = (uint32) std::floor((value - min) / width); - return binIndex >= numBins ? numBins - 1 : binIndex; -} - -/** - * An implementation of the type `IFeatureBinning` that assigns numerical feature values to bins, such that each bin - * contains values from equally sized value ranges. - */ -class EqualWidthFeatureBinning final : public IFeatureBinning { - private: - - const float32 binRatio_; - - const uint32 minBins_; - - const uint32 maxBins_; - - public: - - /** - * @param binRatio A percentage that specifies how many bins should be used, e.g., if 100 values are available, - * 0.5 means that `ceil(0.5 * 100) = 50` bins should be used. Must be in (0, 1) - * @param minBins The minimum number of bins to be used. Must be at least 2 - * @param maxBins The maximum number of bins to be used. Must be at least `minBins` or 0, if the maximum - * number of bins should not be restricted - */ - EqualWidthFeatureBinning(float32 binRatio, uint32 minBins, uint32 maxBins) - : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - - Result createBins(FeatureVector& featureVector, uint32 numExamples) const override { - Result result; - uint32 numElements = featureVector.getNumElements(); - bool sparse = numElements < numExamples; - std::tuple tuple = - preprocess(featureVector, sparse, binRatio_, minBins_, maxBins_); - uint32 numBins = std::get<0>(tuple); - result.thresholdVectorPtr = std::make_unique(featureVector, numBins, true); - - if (sparse) { - result.binIndicesPtr = std::make_unique(); - } else { - result.binIndicesPtr = std::make_unique(numElements); - } - - if (numBins > 0) { - IBinIndexVector& binIndices = *result.binIndicesPtr; - ThresholdVector& thresholdVector = *result.thresholdVectorPtr; - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - ThresholdVector::iterator thresholdIterator = thresholdVector.begin(); - float32 min = std::get<1>(tuple); - float32 max = std::get<2>(tuple); - float32 width = (max - min) / numBins; - uint32 sparseBinIndex; - - // If there are any sparse values, identify the bin they belong to... - if (sparse) { - sparseBinIndex = getBinIndex(0, min, width, numBins); - thresholdIterator[sparseBinIndex] = 1; - thresholdVector.setSparseBinIndex(sparseBinIndex); - } else { - sparseBinIndex = numBins; - } - - // Iterate all non-sparse feature values and identify the bins they belong to... - for (uint32 i = 0; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if (!sparse || currentValue != 0) { - uint32 binIndex = getBinIndex(currentValue, min, width, numBins); - - if (binIndex != sparseBinIndex) { - thresholdIterator[binIndex] = 1; - binIndices.setBinIndex(featureIterator[i].index, binIndex); - } - } - } - - // Remove empty bins and calculate thresholds... - uint32* mapping = new uint32[numBins]; - uint32 n = 0; - - for (uint32 i = 0; i < numBins; i++) { - mapping[i] = n; - - if (thresholdIterator[i] > 0) { - thresholdIterator[n] = min + ((i + 1) * width); - n++; - } - } - - thresholdVector.setNumElements(n, true); - - // Adjust bin indices... - DokBinIndexVector* dokBinIndices = dynamic_cast(&binIndices); - - if (dokBinIndices) { - for (auto it = dokBinIndices->begin(); it != dokBinIndices->end(); it++) { - uint32 binIndex = it->second; - it->second = mapping[binIndex]; - } - } else { - for (uint32 i = 0; i < numElements; i++) { - uint32 binIndex = binIndices.getBinIndex(i); - binIndices.setBinIndex(i, mapping[binIndex]); - } - } - - delete[] mapping; - } - - return result; - } -}; - -/** - * Allows to create instances of the type `IFeatureBinning` that assign numerical feature values to bins, such that each - * bin contains values from equally sized value ranges. - */ -class EqualWidthFeatureBinningFactory final : public IFeatureBinningFactory { - private: - - const float32 binRatio_; - - const uint32 minBins_; - - const uint32 maxBins_; - - public: - - /** - * @param binRatio A percentage that specifies how many bins should be used, e.g., if 100 values are available, - * 0.5 means that `ceil(0.5 * 100) = 50` bins should be used. Must be in (0, 1) - * @param minBins The minimum number of bins to be used. Must be at least 2 - * @param maxBins The maximum number of bins to be used. Must be at least `minBins` or 0, if the maximum - * number of bins should not be restricted - */ - EqualWidthFeatureBinningFactory(float32 binRatio, uint32 minBins, uint32 maxBins) - : binRatio_(binRatio), minBins_(minBins), maxBins_(maxBins) {} - - std::unique_ptr create() const override { - return std::make_unique(binRatio_, minBins_, maxBins_); - } -}; - -EqualWidthFeatureBinningConfig::EqualWidthFeatureBinningConfig( - const std::unique_ptr& multiThreadingConfigPtr) - : binRatio_(0.33f), minBins_(2), maxBins_(0), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - -float32 EqualWidthFeatureBinningConfig::getBinRatio() const { - return binRatio_; -} - -IEqualWidthFeatureBinningConfig& EqualWidthFeatureBinningConfig::setBinRatio(float32 binRatio) { - assertGreater("binRatio", binRatio, 0); - assertLess("binRatio", binRatio, 1); - binRatio_ = binRatio; - return *this; -} - -uint32 EqualWidthFeatureBinningConfig::getMinBins() const { - return minBins_; -} - -IEqualWidthFeatureBinningConfig& EqualWidthFeatureBinningConfig::setMinBins(uint32 minBins) { - assertGreaterOrEqual("minBins", minBins, 2); - minBins_ = minBins; - return *this; -} - -uint32 EqualWidthFeatureBinningConfig::getMaxBins() const { - return maxBins_; -} - -IEqualWidthFeatureBinningConfig& EqualWidthFeatureBinningConfig::setMaxBins(uint32 maxBins) { - if (maxBins != 0) assertGreaterOrEqual("maxBins", maxBins, minBins_); - maxBins_ = maxBins; - return *this; -} - -std::unique_ptr EqualWidthFeatureBinningConfig::createThresholdsFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - std::unique_ptr numericalFeatureBinningFactoryPtr = - std::make_unique(binRatio_, minBins_, maxBins_); - std::unique_ptr nominalFeatureBinningFactoryPtr = - std::make_unique(); - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - return std::make_unique(std::move(numericalFeatureBinningFactoryPtr), - std::move(nominalFeatureBinningFactoryPtr), numThreads); -} diff --git a/cpp/subprojects/common/src/common/binning/feature_binning_no.cpp b/cpp/subprojects/common/src/common/binning/feature_binning_no.cpp deleted file mode 100644 index 94b608c1..00000000 --- a/cpp/subprojects/common/src/common/binning/feature_binning_no.cpp +++ /dev/null @@ -1,12 +0,0 @@ -#include "common/binning/feature_binning_no.hpp" - -#include "common/thresholds/thresholds_exact.hpp" - -NoFeatureBinningConfig::NoFeatureBinningConfig(const std::unique_ptr& multiThreadingConfigPtr) - : multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - -std::unique_ptr NoFeatureBinningConfig::createThresholdsFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - return std::make_unique(numThreads); -} diff --git a/cpp/subprojects/common/src/common/binning/feature_binning_nominal.hpp b/cpp/subprojects/common/src/common/binning/feature_binning_nominal.hpp deleted file mode 100644 index a73a702d..00000000 --- a/cpp/subprojects/common/src/common/binning/feature_binning_nominal.hpp +++ /dev/null @@ -1,80 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/binning/bin_index_vector_dense.hpp" -#include "common/binning/bin_index_vector_dok.hpp" -#include "common/binning/feature_binning.hpp" - -#include - -/** - * An implementation of the type `IFeatureBinning` that assigns nominal feature values to bins, such that each bin - * contains one of the available values. - */ -class NominalFeatureBinning final : public IFeatureBinning { - public: - - Result createBins(FeatureVector& featureVector, uint32 numExamples) const override { - Result result; - uint32 numElements = featureVector.getNumElements(); - bool sparse = numElements < numExamples; - uint32 maxBins = sparse ? numElements + 1 : numElements; - result.thresholdVectorPtr = std::make_unique(featureVector, maxBins); - - if (sparse) { - result.binIndicesPtr = std::make_unique(); - } else { - result.binIndicesPtr = std::make_unique(numElements); - } - - if (numElements > 0) { - IBinIndexVector& binIndices = *result.binIndicesPtr; - ThresholdVector& thresholdVector = *result.thresholdVectorPtr; - FeatureVector::const_iterator featureIterator = featureVector.cbegin(); - ThresholdVector::iterator thresholdIterator = thresholdVector.begin(); - std::unordered_map mapping; - uint32 nextBinIndex = 0; - - if (sparse) { - thresholdVector.setSparseBinIndex(0); - thresholdIterator[0] = 0; - nextBinIndex++; - } - - for (uint32 i = 0; i < numElements; i++) { - float32 currentValue = featureIterator[i].value; - - if (!sparse || currentValue != 0) { - uint32 index = featureIterator[i].index; - auto mapIterator = mapping.emplace(currentValue, nextBinIndex); - - if (mapIterator.second) { - thresholdIterator[nextBinIndex] = currentValue; - binIndices.setBinIndex(index, nextBinIndex); - nextBinIndex++; - } else { - binIndices.setBinIndex(index, mapIterator.first->second); - } - } - } - - thresholdVector.setNumElements(nextBinIndex, true); - } - - return result; - } -}; - -/** - * Allows to create instances of the type `IFeatureBinning` that assign nominal feature values to bins, such that each - * bin contains one of the available values. - */ -class NominalFeatureBinningFactory final : public IFeatureBinningFactory { - public: - - std::unique_ptr create() const override { - return std::make_unique(); - } -}; diff --git a/cpp/subprojects/common/src/common/binning/threshold_vector.cpp b/cpp/subprojects/common/src/common/binning/threshold_vector.cpp deleted file mode 100644 index c75252cc..00000000 --- a/cpp/subprojects/common/src/common/binning/threshold_vector.cpp +++ /dev/null @@ -1,50 +0,0 @@ -#include "common/binning/threshold_vector.hpp" - -ThresholdVector::ThresholdVector(MissingFeatureVector& missingFeatureVector, uint32 numElements) - : ThresholdVector(missingFeatureVector, numElements, false) {} - -ThresholdVector::ThresholdVector(MissingFeatureVector& missingFeatureVector, uint32 numElements, bool init) - : MissingFeatureVector(missingFeatureVector), vector_(DenseVector(numElements, init)), - sparseBinIndex_(numElements) {} - -ThresholdVector::iterator ThresholdVector::begin() { - return vector_.begin(); -} - -ThresholdVector::iterator ThresholdVector::end() { - return vector_.end(); -} - -ThresholdVector::const_iterator ThresholdVector::cbegin() const { - return vector_.cbegin(); -} - -ThresholdVector::const_iterator ThresholdVector::cend() const { - return vector_.cend(); -} - -uint32 ThresholdVector::getNumElements() const { - return vector_.getNumElements(); -} - -void ThresholdVector::setNumElements(uint32 numElements, bool freeMemory) { - vector_.setNumElements(numElements, freeMemory); - - if (sparseBinIndex_ > numElements) { - sparseBinIndex_ = numElements; - } -} - -uint32 ThresholdVector::getSparseBinIndex() const { - return sparseBinIndex_; -} - -void ThresholdVector::setSparseBinIndex(uint32 sparseBinIndex) { - uint32 numElements = this->getNumElements(); - - if (sparseBinIndex > numElements) { - sparseBinIndex_ = numElements; - } else { - sparseBinIndex_ = sparseBinIndex; - } -} diff --git a/cpp/subprojects/common/src/common/data/list_of_lists.cpp b/cpp/subprojects/common/src/common/data/list_of_lists.cpp deleted file mode 100644 index a903d180..00000000 --- a/cpp/subprojects/common/src/common/data/list_of_lists.cpp +++ /dev/null @@ -1,82 +0,0 @@ -#include "common/data/list_of_lists.hpp" - -#include "common/data/indexed_value.hpp" -#include "common/data/triple.hpp" -#include "common/data/tuple.hpp" - -template -ListOfLists::ListOfLists(uint32 numRows) - : numRows_(numRows), array_(new std::vector[numRows] { - }) {} - -template -ListOfLists::~ListOfLists() { - delete[] array_; -} - -template -typename ListOfLists::iterator ListOfLists::begin(uint32 row) { - return array_[row].begin(); -} - -template -typename ListOfLists::iterator ListOfLists::end(uint32 row) { - return array_[row].end(); -} - -template -typename ListOfLists::const_iterator ListOfLists::cbegin(uint32 row) const { - return array_[row].cbegin(); -} - -template -typename ListOfLists::const_iterator ListOfLists::cend(uint32 row) const { - return array_[row].cend(); -} - -template -typename ListOfLists::row ListOfLists::operator[](uint32 row) { - return array_[row]; -} - -template -typename ListOfLists::const_row ListOfLists::operator[](uint32 row) const { - return array_[row]; -} - -template -uint32 ListOfLists::getNumRows() const { - return numRows_; -} - -template -void ListOfLists::clear() { - for (uint32 i = 0; i < numRows_; i++) { - array_[i].clear(); - } -} - -template class ListOfLists; -template class ListOfLists; -template class ListOfLists; -template class ListOfLists; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>>; -template class ListOfLists>>; -template class ListOfLists>>; -template class ListOfLists>>; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>; -template class ListOfLists>>; -template class ListOfLists>>; -template class ListOfLists>>; -template class ListOfLists>>; diff --git a/cpp/subprojects/common/src/common/data/matrix_c_contiguous.cpp b/cpp/subprojects/common/src/common/data/matrix_c_contiguous.cpp deleted file mode 100644 index e4766683..00000000 --- a/cpp/subprojects/common/src/common/data/matrix_c_contiguous.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "common/data/matrix_c_contiguous.hpp" - -#include - -template -CContiguousMatrix::CContiguousMatrix(uint32 numRows, uint32 numCols) - : CContiguousMatrix(numRows, numCols, false) {} - -template -CContiguousMatrix::CContiguousMatrix(uint32 numRows, uint32 numCols, bool init) - : CContiguousView(numRows, numCols, - (T*) (init ? calloc(numRows * numCols, sizeof(T)) : malloc(numRows * numCols * sizeof(T)))) {} - -template -CContiguousMatrix::~CContiguousMatrix() { - free(this->array_); -} - -template class CContiguousMatrix; -template class CContiguousMatrix; -template class CContiguousMatrix; -template class CContiguousMatrix; diff --git a/cpp/subprojects/common/src/common/data/matrix_sparse_set.cpp b/cpp/subprojects/common/src/common/data/matrix_sparse_set.cpp deleted file mode 100644 index 067a5fce..00000000 --- a/cpp/subprojects/common/src/common/data/matrix_sparse_set.cpp +++ /dev/null @@ -1,196 +0,0 @@ -#include "common/data/matrix_sparse_set.hpp" - -#include "common/data/arrays.hpp" -#include "common/data/triple.hpp" -#include "common/data/tuple.hpp" - -static const uint32 MAX_INDEX = std::numeric_limits::max(); - -template -static inline void clearRow(typename LilMatrix::row row, - typename CContiguousView::value_iterator indexIterator) { - while (!row.empty()) { - const IndexedValue& lastEntry = row.back(); - indexIterator[lastEntry.index] = MAX_INDEX; - row.pop_back(); - } -} - -template -SparseSetMatrix::ConstRow::ConstRow(typename LilMatrix::const_row row, - typename CContiguousView::value_const_iterator indexIterator) - : row_(row), indexIterator_(indexIterator) {} - -template -typename LilMatrix::const_iterator SparseSetMatrix::ConstRow::cbegin() const { - return row_.cbegin(); -} - -template -typename LilMatrix::const_iterator SparseSetMatrix::ConstRow::cend() const { - return row_.cend(); -} - -template -uint32 SparseSetMatrix::ConstRow::getNumElements() const { - return (uint32) row_.size(); -} - -template -const IndexedValue* SparseSetMatrix::ConstRow::operator[](uint32 index) const { - uint32 i = indexIterator_[index]; - return i == MAX_INDEX ? nullptr : &row_[i]; -} - -template -SparseSetMatrix::Row::Row(typename LilMatrix::row row, - typename CContiguousView::value_iterator indexIterator) - : row_(row), indexIterator_(indexIterator) {} - -template -typename LilMatrix::iterator SparseSetMatrix::Row::begin() { - return row_.begin(); -} - -template -typename LilMatrix::iterator SparseSetMatrix::Row::end() { - return row_.end(); -} - -template -typename LilMatrix::const_iterator SparseSetMatrix::Row::cbegin() const { - return row_.cbegin(); -} - -template -typename LilMatrix::const_iterator SparseSetMatrix::Row::cend() const { - return row_.cend(); -} - -template -uint32 SparseSetMatrix::Row::getNumElements() const { - return (uint32) row_.size(); -} - -template -const IndexedValue* SparseSetMatrix::Row::operator[](uint32 index) const { - uint32 i = indexIterator_[index]; - return i == MAX_INDEX ? nullptr : &row_[i]; -} - -template -IndexedValue& SparseSetMatrix::Row::emplace(uint32 index) { - uint32 i = indexIterator_[index]; - - if (i == MAX_INDEX) { - indexIterator_[index] = (uint32) row_.size(); - row_.emplace_back(index); - return row_.back(); - } - - return row_[i]; -} - -template -IndexedValue& SparseSetMatrix::Row::emplace(uint32 index, const T& defaultValue) { - uint32 i = indexIterator_[index]; - - if (i == MAX_INDEX) { - indexIterator_[index] = (uint32) row_.size(); - row_.emplace_back(index, defaultValue); - return row_.back(); - } - - return row_[i]; -} - -template -void SparseSetMatrix::Row::erase(uint32 index) { - uint32 i = indexIterator_[index]; - - if (i != MAX_INDEX) { - const IndexedValue& lastEntry = row_.back(); - uint32 lastIndex = lastEntry.index; - - if (lastIndex != index) { - row_[i] = lastEntry; - indexIterator_[lastIndex] = i; - } - - indexIterator_[index] = MAX_INDEX; - row_.pop_back(); - } -} - -template -void SparseSetMatrix::Row::clear() { - clearRow(row_, indexIterator_); -} - -template -SparseSetMatrix::SparseSetMatrix(uint32 numRows, uint32 numCols) - : lilMatrix_(LilMatrix(numRows)), indexMatrix_(CContiguousMatrix(numRows, numCols)) { - setArrayToValue(indexMatrix_.values_begin(0), numRows * numCols, MAX_INDEX); -} - -template -typename SparseSetMatrix::iterator SparseSetMatrix::begin(uint32 row) { - return lilMatrix_.begin(row); -} - -template -typename SparseSetMatrix::iterator SparseSetMatrix::end(uint32 row) { - return lilMatrix_.end(row); -} - -template -typename SparseSetMatrix::const_iterator SparseSetMatrix::cbegin(uint32 row) const { - return lilMatrix_.cbegin(row); -} - -template -typename SparseSetMatrix::const_iterator SparseSetMatrix::cend(uint32 row) const { - return lilMatrix_.cend(row); -} - -template -typename SparseSetMatrix::row SparseSetMatrix::operator[](uint32 row) { - return Row(lilMatrix_[row], indexMatrix_.values_begin(row)); -} - -template -typename SparseSetMatrix::const_row SparseSetMatrix::operator[](uint32 row) const { - return ConstRow(lilMatrix_[row], indexMatrix_.values_cbegin(row)); -} - -template -uint32 SparseSetMatrix::getNumRows() const { - return lilMatrix_.getNumRows(); -} - -template -uint32 SparseSetMatrix::getNumCols() const { - return indexMatrix_.getNumCols(); -} - -template -void SparseSetMatrix::clear() { - uint32 numRows = lilMatrix_.getNumRows(); - - for (uint32 i = 0; i < numRows; i++) { - clearRow(lilMatrix_[i], indexMatrix_.values_begin(i)); - } -} - -template class SparseSetMatrix; -template class SparseSetMatrix; -template class SparseSetMatrix; -template class SparseSetMatrix; -template class SparseSetMatrix>; -template class SparseSetMatrix>; -template class SparseSetMatrix>; -template class SparseSetMatrix>; -template class SparseSetMatrix>; -template class SparseSetMatrix>; -template class SparseSetMatrix>; -template class SparseSetMatrix>; diff --git a/cpp/subprojects/common/src/common/data/ring_buffer.cpp b/cpp/subprojects/common/src/common/data/ring_buffer.cpp deleted file mode 100644 index d371e25c..00000000 --- a/cpp/subprojects/common/src/common/data/ring_buffer.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "common/data/ring_buffer.hpp" - -template -RingBuffer::RingBuffer(uint32 capacity) : array_(new T[capacity]), capacity_(capacity), pos_(0), full_(false) {} - -template -RingBuffer::~RingBuffer() { - delete[] array_; -} - -template -typename RingBuffer::const_iterator RingBuffer::cbegin() const { - return array_; -} - -template -typename RingBuffer::const_iterator RingBuffer::cend() const { - return &array_[full_ ? capacity_ : pos_]; -} - -template -uint32 RingBuffer::getCapacity() const { - return capacity_; -} - -template -uint32 RingBuffer::getNumElements() const { - return full_ ? capacity_ : pos_; -} - -template -bool RingBuffer::isFull() const { - return full_; -} - -template -std::pair RingBuffer::push(T value) { - std::pair result; - result.first = full_; - result.second = array_[pos_]; - array_[pos_] = value; - pos_++; - - if (pos_ >= capacity_) { - pos_ = 0; - full_ = true; - } - - return result; -} - -template class RingBuffer; -template class RingBuffer; -template class RingBuffer; -template class RingBuffer; diff --git a/cpp/subprojects/common/src/common/data/vector_binned_dense.cpp b/cpp/subprojects/common/src/common/data/vector_binned_dense.cpp deleted file mode 100644 index 1a409114..00000000 --- a/cpp/subprojects/common/src/common/data/vector_binned_dense.cpp +++ /dev/null @@ -1,134 +0,0 @@ -#include "common/data/vector_binned_dense.hpp" - -template -DenseBinnedVector::ValueConstIterator::ValueConstIterator(DenseVector::const_iterator binIndexIterator, - typename DenseVector::const_iterator valueIterator) - : binIndexIterator_(binIndexIterator), valueIterator_(valueIterator) {} - -template -typename DenseBinnedVector::ValueConstIterator::reference DenseBinnedVector::ValueConstIterator::operator[]( - uint32 index) const { - uint32 binIndex = binIndexIterator_[index]; - return valueIterator_[binIndex]; -} - -template -typename DenseBinnedVector::ValueConstIterator::reference DenseBinnedVector::ValueConstIterator::operator*() - const { - uint32 binIndex = *binIndexIterator_; - return valueIterator_[binIndex]; -} - -template -typename DenseBinnedVector::ValueConstIterator& DenseBinnedVector::ValueConstIterator::operator++() { - ++binIndexIterator_; - return *this; -} - -template -typename DenseBinnedVector::ValueConstIterator& DenseBinnedVector::ValueConstIterator::operator++(int n) { - binIndexIterator_++; - return *this; -} - -template -typename DenseBinnedVector::ValueConstIterator& DenseBinnedVector::ValueConstIterator::operator--() { - --binIndexIterator_; - return *this; -} - -template -typename DenseBinnedVector::ValueConstIterator& DenseBinnedVector::ValueConstIterator::operator--(int n) { - binIndexIterator_--; - return *this; -} - -template -bool DenseBinnedVector::ValueConstIterator::operator!=(const ValueConstIterator& rhs) const { - return binIndexIterator_ != rhs.binIndexIterator_; -} - -template -bool DenseBinnedVector::ValueConstIterator::operator==(const ValueConstIterator& rhs) const { - return binIndexIterator_ == rhs.binIndexIterator_; -} - -template -typename DenseBinnedVector::ValueConstIterator::difference_type DenseBinnedVector::ValueConstIterator::operator-( - const ValueConstIterator& rhs) const { - return (difference_type) (binIndexIterator_ - rhs.binIndexIterator_); -} - -template -DenseBinnedVector::DenseBinnedVector(uint32 numElements, uint32 numBins) - : binIndices_(DenseVector(numElements)), values_(DenseVector(numBins)) {} - -template -typename DenseBinnedVector::const_iterator DenseBinnedVector::cbegin() const { - return ValueConstIterator(binIndices_.cbegin(), values_.cbegin()); -} - -template -typename DenseBinnedVector::const_iterator DenseBinnedVector::cend() const { - return ValueConstIterator(binIndices_.cend(), values_.cbegin()); -} - -template -typename DenseBinnedVector::index_iterator DenseBinnedVector::indices_begin() { - return binIndices_.begin(); -} - -template -typename DenseBinnedVector::index_iterator DenseBinnedVector::indices_end() { - return binIndices_.end(); -} - -template -typename DenseBinnedVector::index_const_iterator DenseBinnedVector::indices_cbegin() const { - return binIndices_.cbegin(); -} - -template -typename DenseBinnedVector::index_const_iterator DenseBinnedVector::indices_cend() const { - return binIndices_.cend(); -} - -template -typename DenseBinnedVector::value_iterator DenseBinnedVector::values_begin() { - return values_.begin(); -} - -template -typename DenseBinnedVector::value_iterator DenseBinnedVector::values_end() { - return values_.end(); -} - -template -typename DenseBinnedVector::value_const_iterator DenseBinnedVector::values_cbegin() const { - return values_.cbegin(); -} - -template -typename DenseBinnedVector::value_const_iterator DenseBinnedVector::values_cend() const { - return values_.cend(); -} - -template -uint32 DenseBinnedVector::getNumElements() const { - return binIndices_.getNumElements(); -} - -template -uint32 DenseBinnedVector::getNumBins() const { - return values_.getNumElements(); -} - -template -void DenseBinnedVector::setNumBins(uint32 numBins, bool freeMemory) { - values_.setNumElements(numBins, freeMemory); -} - -template class DenseBinnedVector; -template class DenseBinnedVector; -template class DenseBinnedVector; -template class DenseBinnedVector; diff --git a/cpp/subprojects/common/src/common/data/vector_bit.cpp b/cpp/subprojects/common/src/common/data/vector_bit.cpp deleted file mode 100644 index 3c371bac..00000000 --- a/cpp/subprojects/common/src/common/data/vector_bit.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "common/data/vector_bit.hpp" - -#include "common/data/arrays.hpp" - -#include - -constexpr std::size_t UINT32_SIZE = CHAR_BIT * sizeof(uint32); - -static inline constexpr std::size_t size(uint32 numElements) { - return (numElements + UINT32_SIZE - 1) / UINT32_SIZE; -} - -static inline constexpr uint32 index(uint32 pos) { - return pos / UINT32_SIZE; -} - -static inline constexpr uint32 mask(uint32 pos) { - return 1U << (pos % UINT32_SIZE); -} - -BitVector::BitVector(uint32 numElements) : BitVector(numElements, false) {} - -BitVector::BitVector(uint32 numElements, bool init) - : numElements_(numElements), array_(init ? new uint32[size(numElements)] {} : new uint32[size(numElements)]) {} - -BitVector::~BitVector() { - delete[] array_; -} - -bool BitVector::operator[](uint32 pos) const { - return array_[index(pos)] & mask(pos); -} - -void BitVector::set(uint32 pos, bool value) { - if (value) { - array_[index(pos)] |= mask(pos); - } else { - array_[index(pos)] &= ~mask(pos); - } -} - -uint32 BitVector::getNumElements() const { - return numElements_; -} - -void BitVector::clear() { - setArrayToZeros(array_, size(numElements_)); -} diff --git a/cpp/subprojects/common/src/common/data/vector_dense.cpp b/cpp/subprojects/common/src/common/data/vector_dense.cpp deleted file mode 100644 index 00ec6c66..00000000 --- a/cpp/subprojects/common/src/common/data/vector_dense.cpp +++ /dev/null @@ -1,47 +0,0 @@ -#include "common/data/vector_dense.hpp" - -#include "common/data/indexed_value.hpp" -#include "common/data/tuple.hpp" - -#include - -template -DenseVector::DenseVector(uint32 numElements) : DenseVector(numElements, false) {} - -template -DenseVector::DenseVector(uint32 numElements, bool init) - : VectorView(numElements, (T*) (init ? calloc(numElements, sizeof(T)) : malloc(numElements * sizeof(T)))), - maxCapacity_(numElements) {} - -template -DenseVector::~DenseVector() { - free(this->array_); -} - -template -void DenseVector::setNumElements(uint32 numElements, bool freeMemory) { - if (numElements < maxCapacity_) { - if (freeMemory) { - this->array_ = (T*) realloc(this->array_, numElements * sizeof(T)); - maxCapacity_ = numElements; - } - } else if (numElements > maxCapacity_) { - this->array_ = (T*) realloc(this->array_, numElements * sizeof(T)); - maxCapacity_ = numElements; - } - - this->numElements_ = numElements; -} - -template class DenseVector; -template class DenseVector; -template class DenseVector; -template class DenseVector; -template class DenseVector>; -template class DenseVector>; -template class DenseVector>; -template class DenseVector>; -template class DenseVector>>; -template class DenseVector>>; -template class DenseVector>>; -template class DenseVector>>; diff --git a/cpp/subprojects/common/src/common/data/vector_dok.cpp b/cpp/subprojects/common/src/common/data/vector_dok.cpp deleted file mode 100644 index 8bf68340..00000000 --- a/cpp/subprojects/common/src/common/data/vector_dok.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include "common/data/vector_dok.hpp" - -template -DokVector::DokVector(T sparseValue) : sparseValue_(sparseValue) {} - -template -typename DokVector::iterator DokVector::begin() { - return data_.begin(); -} - -template -typename DokVector::iterator DokVector::end() { - return data_.end(); -} - -template -typename DokVector::const_iterator DokVector::cbegin() const { - return data_.cbegin(); -} - -template -typename DokVector::const_iterator DokVector::cend() const { - return data_.cend(); -} - -template -const T& DokVector::operator[](uint32 pos) const { - auto it = data_.find(pos); - return it != data_.cend() ? it->second : sparseValue_; -} - -template -void DokVector::set(uint32 pos, T value) { - auto result = data_.emplace(pos, value); - - if (!result.second) { - result.first->second = value; - } -} - -template -void DokVector::clear() { - data_.clear(); -} - -template class DokVector; -template class DokVector; -template class DokVector; -template class DokVector; diff --git a/cpp/subprojects/common/src/common/data/vector_dok_binary.cpp b/cpp/subprojects/common/src/common/data/vector_dok_binary.cpp deleted file mode 100644 index 86fe9e76..00000000 --- a/cpp/subprojects/common/src/common/data/vector_dok_binary.cpp +++ /dev/null @@ -1,25 +0,0 @@ -#include "common/data/vector_dok_binary.hpp" - -BinaryDokVector::index_const_iterator BinaryDokVector::indices_cbegin() const { - return data_.cbegin(); -} - -BinaryDokVector::index_const_iterator BinaryDokVector::indices_cend() const { - return data_.cend(); -} - -bool BinaryDokVector::operator[](uint32 pos) const { - return data_.find(pos) != data_.end(); -} - -void BinaryDokVector::set(uint32 pos, bool value) { - if (value) { - data_.insert(pos); - } else { - data_.erase(pos); - } -} - -void BinaryDokVector::clear() { - data_.clear(); -} diff --git a/cpp/subprojects/common/src/common/data/vector_sparse_array.cpp b/cpp/subprojects/common/src/common/data/vector_sparse_array.cpp deleted file mode 100644 index badf2c0f..00000000 --- a/cpp/subprojects/common/src/common/data/vector_sparse_array.cpp +++ /dev/null @@ -1,279 +0,0 @@ -#include "common/data/vector_sparse_array.hpp" - -#include "common/data/tuple.hpp" - -template -SparseArrayVector::IndexConstIterator::IndexConstIterator( - typename VectorConstView>::const_iterator iterator) - : iterator_(iterator) {} - -template -typename SparseArrayVector::IndexConstIterator::reference SparseArrayVector::IndexConstIterator::operator[]( - uint32 index) const { - return iterator_[index].index; -} - -template -typename SparseArrayVector::IndexConstIterator::reference SparseArrayVector::IndexConstIterator::operator*() - const { - return (*iterator_).index; -} - -template -typename SparseArrayVector::IndexConstIterator& SparseArrayVector::IndexConstIterator::operator++() { - ++iterator_; - return *this; -} - -template -typename SparseArrayVector::IndexConstIterator& SparseArrayVector::IndexConstIterator::operator++(int n) { - iterator_++; - return *this; -} - -template -typename SparseArrayVector::IndexConstIterator& SparseArrayVector::IndexConstIterator::operator--() { - --iterator_; - return *this; -} - -template -typename SparseArrayVector::IndexConstIterator& SparseArrayVector::IndexConstIterator::operator--(int n) { - iterator_--; - return *this; -} - -template -bool SparseArrayVector::IndexConstIterator::operator!=(const IndexConstIterator& rhs) const { - return iterator_ != rhs.iterator_; -} - -template -bool SparseArrayVector::IndexConstIterator::operator==(const IndexConstIterator& rhs) const { - return iterator_ == rhs.iterator_; -} - -template -typename SparseArrayVector::IndexConstIterator::difference_type SparseArrayVector::IndexConstIterator::operator-( - const IndexConstIterator& rhs) const { - return iterator_ - rhs.iterator_; -} - -template -SparseArrayVector::IndexIterator::IndexIterator(typename VectorView>::iterator iterator) - : iterator_(iterator) {} - -template -typename SparseArrayVector::IndexIterator::reference SparseArrayVector::IndexIterator::operator[]( - uint32 index) const { - return iterator_[index].index; -} - -template -typename SparseArrayVector::IndexIterator::reference SparseArrayVector::IndexIterator::operator*() const { - return (*iterator_).index; -} - -template -typename SparseArrayVector::IndexIterator& SparseArrayVector::IndexIterator::operator++() { - ++iterator_; - return *this; -} - -template -typename SparseArrayVector::IndexIterator& SparseArrayVector::IndexIterator::operator++(int n) { - iterator_++; - return *this; -} - -template -typename SparseArrayVector::IndexIterator& SparseArrayVector::IndexIterator::operator--() { - --iterator_; - return *this; -} - -template -typename SparseArrayVector::IndexIterator& SparseArrayVector::IndexIterator::operator--(int n) { - iterator_--; - return *this; -} - -template -bool SparseArrayVector::IndexIterator::operator!=(const IndexIterator& rhs) const { - return iterator_ != rhs.iterator_; -} - -template -bool SparseArrayVector::IndexIterator::operator==(const IndexIterator& rhs) const { - return iterator_ == rhs.iterator_; -} - -template -typename SparseArrayVector::IndexIterator::difference_type SparseArrayVector::IndexIterator::operator-( - const IndexIterator& rhs) const { - return iterator_ - rhs.iterator_; -} - -template -SparseArrayVector::ValueConstIterator::ValueConstIterator( - typename VectorConstView>::const_iterator iterator) - : iterator_(iterator) {} - -template -typename SparseArrayVector::ValueConstIterator::reference SparseArrayVector::ValueConstIterator::operator[]( - uint32 index) const { - return iterator_[index].value; -} - -template -typename SparseArrayVector::ValueConstIterator::reference SparseArrayVector::ValueConstIterator::operator*() - const { - return (*iterator_).value; -} - -template -typename SparseArrayVector::ValueConstIterator& SparseArrayVector::ValueConstIterator::operator++() { - ++iterator_; - return *this; -} - -template -typename SparseArrayVector::ValueConstIterator& SparseArrayVector::ValueConstIterator::operator++(int n) { - iterator_++; - return *this; -} - -template -typename SparseArrayVector::ValueConstIterator& SparseArrayVector::ValueConstIterator::operator--() { - --iterator_; - return *this; -} - -template -typename SparseArrayVector::ValueConstIterator& SparseArrayVector::ValueConstIterator::operator--(int n) { - iterator_--; - return *this; -} - -template -bool SparseArrayVector::ValueConstIterator::operator!=(const ValueConstIterator& rhs) const { - return iterator_ != rhs.iterator_; -} - -template -bool SparseArrayVector::ValueConstIterator::operator==(const ValueConstIterator& rhs) const { - return iterator_ == rhs.iterator_; -} - -template -typename SparseArrayVector::ValueConstIterator::difference_type SparseArrayVector::ValueConstIterator::operator-( - const ValueConstIterator& rhs) const { - return iterator_ - rhs.iterator_; -} - -template -SparseArrayVector::ValueIterator::ValueIterator(typename VectorView>::iterator iterator) - : iterator_(iterator) {} - -template -typename SparseArrayVector::ValueIterator::reference SparseArrayVector::ValueIterator::operator[]( - uint32 index) const { - return iterator_[index].value; -} - -template -typename SparseArrayVector::ValueIterator::reference SparseArrayVector::ValueIterator::operator*() const { - return (*iterator_).value; -} - -template -typename SparseArrayVector::ValueIterator& SparseArrayVector::ValueIterator::operator++() { - ++iterator_; - return *this; -} - -template -typename SparseArrayVector::ValueIterator& SparseArrayVector::ValueIterator::operator++(int n) { - iterator_++; - return *this; -} - -template -typename SparseArrayVector::ValueIterator& SparseArrayVector::ValueIterator::operator--() { - --iterator_; - return *this; -} - -template -typename SparseArrayVector::ValueIterator& SparseArrayVector::ValueIterator::operator--(int n) { - iterator_--; - return *this; -} - -template -bool SparseArrayVector::ValueIterator::operator!=(const ValueIterator& rhs) const { - return iterator_ != rhs.iterator_; -} - -template -bool SparseArrayVector::ValueIterator::operator==(const ValueIterator& rhs) const { - return iterator_ == rhs.iterator_; -} - -template -typename SparseArrayVector::ValueIterator::difference_type SparseArrayVector::ValueIterator::operator-( - const ValueIterator& rhs) const { - return iterator_ - rhs.iterator_; -} - -template -SparseArrayVector::SparseArrayVector(uint32 numElements) : DenseVector>(numElements) {} - -template -typename SparseArrayVector::index_iterator SparseArrayVector::indices_begin() { - return IndexIterator(this->begin()); -} - -template -typename SparseArrayVector::index_iterator SparseArrayVector::indices_end() { - return IndexIterator(this->end()); -} - -template -typename SparseArrayVector::index_const_iterator SparseArrayVector::indices_cbegin() const { - return IndexConstIterator(this->cbegin()); -} - -template -typename SparseArrayVector::index_const_iterator SparseArrayVector::indices_cend() const { - return IndexConstIterator(this->cend()); -} - -template -typename SparseArrayVector::value_iterator SparseArrayVector::values_begin() { - return ValueIterator(this->begin()); -} - -template -typename SparseArrayVector::value_iterator SparseArrayVector::values_end() { - return ValueIterator(this->end()); -} - -template -typename SparseArrayVector::value_const_iterator SparseArrayVector::values_cbegin() const { - return ValueConstIterator(this->cbegin()); -} - -template -typename SparseArrayVector::value_const_iterator SparseArrayVector::values_cend() const { - return ValueConstIterator(this->cend()); -} - -template class SparseArrayVector; -template class SparseArrayVector; -template class SparseArrayVector; -template class SparseArrayVector; -template class SparseArrayVector>; -template class SparseArrayVector>; -template class SparseArrayVector>; -template class SparseArrayVector>; diff --git a/cpp/subprojects/common/src/common/data/view_c_contiguous.cpp b/cpp/subprojects/common/src/common/data/view_c_contiguous.cpp deleted file mode 100644 index 309f2a22..00000000 --- a/cpp/subprojects/common/src/common/data/view_c_contiguous.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "common/data/view_c_contiguous.hpp" - -template -CContiguousConstView::CContiguousConstView(uint32 numRows, uint32 numCols, T* array) - : numRows_(numRows), numCols_(numCols), array_(array) {} - -template -typename CContiguousConstView::value_const_iterator CContiguousConstView::values_cbegin(uint32 row) const { - return &array_[row * numCols_]; -} - -template -typename CContiguousConstView::value_const_iterator CContiguousConstView::values_cend(uint32 row) const { - return &array_[(row + 1) * numCols_]; -} - -template -uint32 CContiguousConstView::getNumRows() const { - return numRows_; -} - -template -uint32 CContiguousConstView::getNumCols() const { - return numCols_; -} - -template class CContiguousConstView; -template class CContiguousConstView; -template class CContiguousConstView; -template class CContiguousConstView; -template class CContiguousConstView; -template class CContiguousConstView; -template class CContiguousConstView; -template class CContiguousConstView; - -template -CContiguousView::CContiguousView(uint32 numRows, uint32 numCols, T* array) - : CContiguousConstView(numRows, numCols, array) {} - -template -typename CContiguousView::value_iterator CContiguousView::values_begin(uint32 row) { - return &CContiguousConstView::array_[row * CContiguousConstView::numCols_]; -} - -template -typename CContiguousView::value_iterator CContiguousView::values_end(uint32 row) { - return &CContiguousConstView::array_[(row + 1) * CContiguousConstView::numCols_]; -} - -template class CContiguousView; -template class CContiguousView; -template class CContiguousView; -template class CContiguousView; diff --git a/cpp/subprojects/common/src/common/data/view_csc.cpp b/cpp/subprojects/common/src/common/data/view_csc.cpp deleted file mode 100644 index f7e821a3..00000000 --- a/cpp/subprojects/common/src/common/data/view_csc.cpp +++ /dev/null @@ -1,78 +0,0 @@ -#include "common/data/view_csc.hpp" - -template -CscConstView::CscConstView(uint32 numRows, uint32 numCols, T* data, uint32* rowIndices, uint32* colIndices) - : numRows_(numRows), numCols_(numCols), data_(data), rowIndices_(rowIndices), colIndices_(colIndices) {} - -template -typename CscConstView::value_const_iterator CscConstView::values_cbegin(uint32 col) const { - return &data_[colIndices_[col]]; -} - -template -typename CscConstView::value_const_iterator CscConstView::values_cend(uint32 col) const { - return &data_[colIndices_[col + 1]]; -} - -template -typename CscConstView::index_const_iterator CscConstView::indices_cbegin(uint32 col) const { - return &rowIndices_[colIndices_[col]]; -} - -template -typename CscConstView::index_const_iterator CscConstView::indices_cend(uint32 col) const { - return &rowIndices_[colIndices_[col + 1]]; -} - -template -uint32 CscConstView::getNumNonZeroElements() const { - return colIndices_[numCols_]; -} - -template -uint32 CscConstView::getNumRows() const { - return numRows_; -} - -template -uint32 CscConstView::getNumCols() const { - return numCols_; -} - -template class CscConstView; -template class CscConstView; -template class CscConstView; -template class CscConstView; -template class CscConstView; -template class CscConstView; -template class CscConstView; -template class CscConstView; - -template -CscView::CscView(uint32 numRows, uint32 numCols, T* data, uint32* rowIndices, uint32* colIndices) - : CscConstView(numRows, numCols, data, rowIndices, colIndices) {} - -template -typename CscView::value_iterator CscView::values_begin(uint32 col) { - return &CscConstView::data_[CscConstView::colIndices_[col]]; -} - -template -typename CscView::value_iterator CscView::values_end(uint32 col) { - return &CscConstView::data_[CscConstView::colIndices_[col + 1]]; -} - -template -typename CscView::index_iterator CscView::indices_begin(uint32 col) { - return &CscConstView::rowIndices_[CscConstView::colIndices_[col]]; -} - -template -typename CscView::index_iterator CscView::indices_end(uint32 col) { - return &CscConstView::rowIndices_[CscConstView::colIndices_[col + 1]]; -} - -template class CscView; -template class CscView; -template class CscView; -template class CscView; diff --git a/cpp/subprojects/common/src/common/data/view_csc_binary.cpp b/cpp/subprojects/common/src/common/data/view_csc_binary.cpp deleted file mode 100644 index 6c22a850..00000000 --- a/cpp/subprojects/common/src/common/data/view_csc_binary.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "common/data/view_csc_binary.hpp" - -BinaryCscConstView::BinaryCscConstView(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices) - : numRows_(numRows), numCols_(numCols), rowIndices_(rowIndices), colIndices_(colIndices) {} - -BinaryCscConstView::index_const_iterator BinaryCscConstView::indices_cbegin(uint32 col) const { - return &rowIndices_[colIndices_[col]]; -} - -BinaryCscConstView::index_const_iterator BinaryCscConstView::indices_cend(uint32 col) const { - return &rowIndices_[colIndices_[col + 1]]; -} - -uint32 BinaryCscConstView::getNumRows() const { - return numRows_; -} - -uint32 BinaryCscConstView::getNumCols() const { - return numCols_; -} - -uint32 BinaryCscConstView::getNumNonZeroElements() const { - return colIndices_[numCols_]; -} - -BinaryCscView::BinaryCscView(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices) - : BinaryCscConstView(numRows, numCols, rowIndices, colIndices) {} - -BinaryCscView::index_iterator BinaryCscView::indices_begin(uint32 col) { - return &BinaryCscConstView::rowIndices_[BinaryCscConstView::colIndices_[col]]; -} - -BinaryCscView::index_iterator BinaryCscView::indices_end(uint32 col) { - return &BinaryCscConstView::rowIndices_[BinaryCscConstView::colIndices_[col + 1]]; -} diff --git a/cpp/subprojects/common/src/common/data/view_csr.cpp b/cpp/subprojects/common/src/common/data/view_csr.cpp deleted file mode 100644 index a59ef682..00000000 --- a/cpp/subprojects/common/src/common/data/view_csr.cpp +++ /dev/null @@ -1,78 +0,0 @@ -#include "common/data/view_csr.hpp" - -template -CsrConstView::CsrConstView(uint32 numRows, uint32 numCols, T* data, uint32* rowIndices, uint32* colIndices) - : numRows_(numRows), numCols_(numCols), data_(data), rowIndices_(rowIndices), colIndices_(colIndices) {} - -template -typename CsrConstView::value_const_iterator CsrConstView::values_cbegin(uint32 row) const { - return &data_[rowIndices_[row]]; -} - -template -typename CsrConstView::value_const_iterator CsrConstView::values_cend(uint32 row) const { - return &data_[rowIndices_[row + 1]]; -} - -template -typename CsrConstView::index_const_iterator CsrConstView::indices_cbegin(uint32 row) const { - return &colIndices_[rowIndices_[row]]; -} - -template -typename CsrConstView::index_const_iterator CsrConstView::indices_cend(uint32 row) const { - return &colIndices_[rowIndices_[row + 1]]; -} - -template -uint32 CsrConstView::getNumNonZeroElements() const { - return rowIndices_[numCols_]; -} - -template -uint32 CsrConstView::getNumRows() const { - return numRows_; -} - -template -uint32 CsrConstView::getNumCols() const { - return numCols_; -} - -template class CsrConstView; -template class CsrConstView; -template class CsrConstView; -template class CsrConstView; -template class CsrConstView; -template class CsrConstView; -template class CsrConstView; -template class CsrConstView; - -template -CsrView::CsrView(uint32 numRows, uint32 numCols, T* data, uint32* rowIndices, uint32* colIndices) - : CsrConstView(numRows, numCols, data, rowIndices, colIndices) {} - -template -typename CsrView::value_iterator CsrView::values_begin(uint32 row) { - return &CsrConstView::data_[CsrConstView::rowIndices_[row]]; -} - -template -typename CsrView::value_iterator CsrView::values_end(uint32 row) { - return &CsrConstView::data_[CsrConstView::rowIndices_[row + 1]]; -} - -template -typename CsrView::index_iterator CsrView::indices_begin(uint32 row) { - return &CsrConstView::colIndices_[CsrConstView::rowIndices_[row]]; -} - -template -typename CsrView::index_iterator CsrView::indices_end(uint32 row) { - return &CsrConstView::colIndices_[CsrConstView::rowIndices_[row + 1]]; -} - -template class CsrView; -template class CsrView; -template class CsrView; -template class CsrView; diff --git a/cpp/subprojects/common/src/common/data/view_csr_binary.cpp b/cpp/subprojects/common/src/common/data/view_csr_binary.cpp deleted file mode 100644 index f2f81f6e..00000000 --- a/cpp/subprojects/common/src/common/data/view_csr_binary.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "common/data/view_csr_binary.hpp" - -BinaryCsrConstView::BinaryCsrConstView(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices) - : numRows_(numRows), numCols_(numCols), rowIndices_(rowIndices), colIndices_(colIndices) {} - -BinaryCsrConstView::index_const_iterator BinaryCsrConstView::indices_cbegin(uint32 row) const { - return &colIndices_[rowIndices_[row]]; -} - -BinaryCsrConstView::index_const_iterator BinaryCsrConstView::indices_cend(uint32 row) const { - return &colIndices_[rowIndices_[row + 1]]; -} - -uint32 BinaryCsrConstView::getNumNonZeroElements() const { - return rowIndices_[numRows_]; -} - -uint32 BinaryCsrConstView::getNumRows() const { - return numRows_; -} - -uint32 BinaryCsrConstView::getNumCols() const { - return numCols_; -} - -BinaryCsrView::BinaryCsrView(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices) - : BinaryCsrConstView(numRows, numCols, rowIndices, colIndices) {} - -BinaryCsrView::index_iterator BinaryCsrView::indices_begin(uint32 row) { - return &BinaryCsrConstView::colIndices_[BinaryCsrConstView::rowIndices_[row]]; -} - -BinaryCsrView::index_iterator BinaryCsrView::indices_end(uint32 row) { - return &BinaryCsrConstView::colIndices_[BinaryCsrConstView::rowIndices_[row + 1]]; -} diff --git a/cpp/subprojects/common/src/common/data/view_fortran_contiguous.cpp b/cpp/subprojects/common/src/common/data/view_fortran_contiguous.cpp deleted file mode 100644 index 228bbb39..00000000 --- a/cpp/subprojects/common/src/common/data/view_fortran_contiguous.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "common/data/view_fortran_contiguous.hpp" - -template -FortranContiguousConstView::FortranContiguousConstView(uint32 numRows, uint32 numCols, T* array) - : numRows_(numRows), numCols_(numCols), array_(array) {} - -template -typename FortranContiguousConstView::value_const_iterator FortranContiguousConstView::values_cbegin( - uint32 col) const { - return &array_[col * numRows_]; -} - -template -typename FortranContiguousConstView::value_const_iterator FortranContiguousConstView::values_cend( - uint32 col) const { - return &array_[(col + 1) * numRows_]; -} - -template -uint32 FortranContiguousConstView::getNumRows() const { - return numRows_; -} - -template -uint32 FortranContiguousConstView::getNumCols() const { - return numCols_; -} - -template class FortranContiguousConstView; -template class FortranContiguousConstView; -template class FortranContiguousConstView; -template class FortranContiguousConstView; -template class FortranContiguousConstView; -template class FortranContiguousConstView; -template class FortranContiguousConstView; -template class FortranContiguousConstView; - -template -FortranContiguousView::FortranContiguousView(uint32 numRows, uint32 numCols, T* array) - : FortranContiguousConstView(numRows, numCols, array) {} - -template -typename FortranContiguousView::value_iterator FortranContiguousView::values_begin(uint32 col) { - return &FortranContiguousConstView::array_[col * FortranContiguousConstView::numRows_]; -} - -template -typename FortranContiguousView::value_iterator FortranContiguousView::values_end(uint32 col) { - return &FortranContiguousConstView::array_[(col + 1) * FortranContiguousConstView::numRows_]; -} - -template class FortranContiguousView; -template class FortranContiguousView; -template class FortranContiguousView; -template class FortranContiguousView; diff --git a/cpp/subprojects/common/src/common/data/view_vector.cpp b/cpp/subprojects/common/src/common/data/view_vector.cpp deleted file mode 100644 index b732a490..00000000 --- a/cpp/subprojects/common/src/common/data/view_vector.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include "common/data/view_vector.hpp" - -#include "common/data/indexed_value.hpp" -#include "common/data/tuple.hpp" - -template -VectorConstView::VectorConstView(uint32 numElements, T* array) : numElements_(numElements), array_(array) {} - -template -typename VectorConstView::const_iterator VectorConstView::cbegin() const { - return array_; -} - -template -typename VectorConstView::const_iterator VectorConstView::cend() const { - return &array_[numElements_]; -} - -template -const T& VectorConstView::operator[](uint32 pos) const { - return array_[pos]; -} - -template -uint32 VectorConstView::getNumElements() const { - return numElements_; -} - -template class VectorConstView; -template class VectorConstView; -template class VectorConstView; -template class VectorConstView; -template class VectorConstView; -template class VectorConstView; -template class VectorConstView; -template class VectorConstView; -template class VectorConstView>; -template class VectorConstView>; -template class VectorConstView>; -template class VectorConstView>; -template class VectorConstView>>; -template class VectorConstView>>; -template class VectorConstView>>; -template class VectorConstView>>; - -template -VectorView::VectorView(uint32 numElements, T* array) : VectorConstView(numElements, array) {} - -template -typename VectorView::iterator VectorView::begin() { - return VectorConstView::array_; -} - -template -typename VectorView::iterator VectorView::end() { - return &VectorConstView::array_[VectorConstView::numElements_]; -} - -template -const T& VectorView::operator[](uint32 pos) const { - return VectorConstView::array_[pos]; -} - -template -T& VectorView::operator[](uint32 pos) { - return VectorConstView::array_[pos]; -} - -template class VectorView; -template class VectorView; -template class VectorView; -template class VectorView; -template class VectorView>; -template class VectorView>; -template class VectorView>; -template class VectorView>; -template class VectorView>>; -template class VectorView>>; -template class VectorView>>; -template class VectorView>>; diff --git a/cpp/subprojects/common/src/common/indices/index_iterator.cpp b/cpp/subprojects/common/src/common/indices/index_iterator.cpp deleted file mode 100644 index c9f1318b..00000000 --- a/cpp/subprojects/common/src/common/indices/index_iterator.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "common/iterator/index_iterator.hpp" - -IndexIterator::IndexIterator() : IndexIterator(0) {} - -IndexIterator::IndexIterator(uint32 index) : index_(index) {} - -IndexIterator::reference IndexIterator::operator[](uint32 index) const { - return index; -} - -IndexIterator::reference IndexIterator::operator*() const { - return index_; -} - -IndexIterator& IndexIterator::operator++() { - ++index_; - return *this; -} - -IndexIterator& IndexIterator::operator++(int n) { - index_++; - return *this; -} - -IndexIterator& IndexIterator::operator--() { - --index_; - return *this; -} - -IndexIterator& IndexIterator::operator--(int n) { - index_++; - return *this; -} - -bool IndexIterator::operator!=(const IndexIterator& rhs) const { - return index_ != rhs.index_; -} - -bool IndexIterator::operator==(const IndexIterator& rhs) const { - return index_ == rhs.index_; -} - -IndexIterator::difference_type IndexIterator::operator-(const IndexIterator& rhs) const { - return (difference_type) index_ - (difference_type) rhs.index_; -} diff --git a/cpp/subprojects/common/src/common/indices/index_vector_complete.cpp b/cpp/subprojects/common/src/common/indices/index_vector_complete.cpp deleted file mode 100644 index c00f2665..00000000 --- a/cpp/subprojects/common/src/common/indices/index_vector_complete.cpp +++ /dev/null @@ -1,36 +0,0 @@ -#include "common/indices/index_vector_complete.hpp" - -#include "common/thresholds/thresholds_subset.hpp" - -CompleteIndexVector::CompleteIndexVector(uint32 numElements) { - numElements_ = numElements; -} - -bool CompleteIndexVector::isPartial() const { - return false; -} - -uint32 CompleteIndexVector::getNumElements() const { - return numElements_; -} - -void CompleteIndexVector::setNumElements(uint32 numElements, bool freeMemory) { - numElements_ = numElements; -} - -uint32 CompleteIndexVector::getIndex(uint32 pos) const { - return pos; -} - -CompleteIndexVector::const_iterator CompleteIndexVector::cbegin() const { - return IndexIterator(); -} - -CompleteIndexVector::const_iterator CompleteIndexVector::cend() const { - return IndexIterator(numElements_); -} - -std::unique_ptr CompleteIndexVector::createRuleRefinement(IThresholdsSubset& thresholdsSubset, - uint32 featureIndex) const { - return thresholdsSubset.createRuleRefinement(*this, featureIndex); -} diff --git a/cpp/subprojects/common/src/common/indices/index_vector_partial.cpp b/cpp/subprojects/common/src/common/indices/index_vector_partial.cpp deleted file mode 100644 index a77fe8c1..00000000 --- a/cpp/subprojects/common/src/common/indices/index_vector_partial.cpp +++ /dev/null @@ -1,45 +0,0 @@ -#include "common/indices/index_vector_partial.hpp" - -#include "common/thresholds/thresholds_subset.hpp" - -PartialIndexVector::PartialIndexVector(uint32 numElements) : PartialIndexVector(numElements, false) {} - -PartialIndexVector::PartialIndexVector(uint32 numElements, bool init) - : vector_(DenseVector(numElements, init)) {} - -bool PartialIndexVector::isPartial() const { - return true; -} - -uint32 PartialIndexVector::getNumElements() const { - return vector_.getNumElements(); -} - -void PartialIndexVector::setNumElements(uint32 numElements, bool freeMemory) { - vector_.setNumElements(numElements, freeMemory); -} - -uint32 PartialIndexVector::getIndex(uint32 pos) const { - return vector_[pos]; -} - -PartialIndexVector::iterator PartialIndexVector::begin() { - return vector_.begin(); -} - -PartialIndexVector::iterator PartialIndexVector::end() { - return vector_.end(); -} - -PartialIndexVector::const_iterator PartialIndexVector::cbegin() const { - return vector_.cbegin(); -} - -PartialIndexVector::const_iterator PartialIndexVector::cend() const { - return vector_.cend(); -} - -std::unique_ptr PartialIndexVector::createRuleRefinement(IThresholdsSubset& thresholdsSubset, - uint32 featureIndex) const { - return thresholdsSubset.createRuleRefinement(*this, featureIndex); -} diff --git a/cpp/subprojects/common/src/common/input/feature_info_equal.cpp b/cpp/subprojects/common/src/common/input/feature_info_equal.cpp deleted file mode 100644 index 59f277cc..00000000 --- a/cpp/subprojects/common/src/common/input/feature_info_equal.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "common/input/feature_info_equal.hpp" - -#include "common/input/feature_type_nominal.hpp" -#include "common/input/feature_type_numerical.hpp" -#include "common/input/feature_type_ordinal.hpp" - -/** - * An implementation of the type `IEqualFeatureInfo` that stores the type of all features. - * - * @tparam FeatureType The type of all features - */ -template -class EqualFeatureInfo final : public IEqualFeatureInfo { - public: - - std::unique_ptr createFeatureType(uint32 featureIndex) const override { - return std::make_unique(); - } -}; - -std::unique_ptr createOrdinalFeatureInfo() { - return std::make_unique>(); -} - -std::unique_ptr createNominalFeatureInfo() { - return std::make_unique>(); -} - -std::unique_ptr createNumericalFeatureInfo() { - return std::make_unique>(); -} diff --git a/cpp/subprojects/common/src/common/input/feature_info_mixed.cpp b/cpp/subprojects/common/src/common/input/feature_info_mixed.cpp deleted file mode 100644 index 20e506f9..00000000 --- a/cpp/subprojects/common/src/common/input/feature_info_mixed.cpp +++ /dev/null @@ -1,55 +0,0 @@ -#include "common/input/feature_info_mixed.hpp" - -#include "common/data/vector_bit.hpp" -#include "common/input/feature_type_nominal.hpp" -#include "common/input/feature_type_numerical.hpp" -#include "common/input/feature_type_ordinal.hpp" - -/** - * An implementation of the type `IMixedFeatureInfo` that uses `BitVector`s to store whether individual features are - * ordinal, nominal or numerical. - */ -class BitFeatureInfo final : public IMixedFeatureInfo { - private: - - BitVector ordinalBitVector_; - - BitVector nominalBitVector_; - - public: - - /** - * @param numFeatures The total number of available features - */ - BitFeatureInfo(uint32 numFeatures) - : ordinalBitVector_(BitVector(numFeatures, true)), nominalBitVector_(BitVector(numFeatures, true)) {} - - std::unique_ptr createFeatureType(uint32 featureIndex) const override { - if (ordinalBitVector_[featureIndex]) { - return std::make_unique(); - } else if (nominalBitVector_[featureIndex]) { - return std::make_unique(); - } else { - return std::make_unique(); - } - } - - void setNumerical(uint32 featureIndex) override { - ordinalBitVector_.set(featureIndex, false); - nominalBitVector_.set(featureIndex, false); - } - - void setOrdinal(uint32 featureIndex) override { - ordinalBitVector_.set(featureIndex, true); - nominalBitVector_.set(featureIndex, false); - } - - void setNominal(uint32 featureIndex) override { - ordinalBitVector_.set(featureIndex, false); - nominalBitVector_.set(featureIndex, true); - } -}; - -std::unique_ptr createMixedFeatureInfo(uint32 numFeatures) { - return std::make_unique(numFeatures); -} diff --git a/cpp/subprojects/common/src/common/input/feature_matrix_c_contiguous.cpp b/cpp/subprojects/common/src/common/input/feature_matrix_c_contiguous.cpp deleted file mode 100644 index 553e8158..00000000 --- a/cpp/subprojects/common/src/common/input/feature_matrix_c_contiguous.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "common/input/feature_matrix_c_contiguous.hpp" - -#include "common/prediction/predictor_binary.hpp" -#include "common/prediction/predictor_probability.hpp" -#include "common/prediction/predictor_score.hpp" - -CContiguousFeatureMatrix::CContiguousFeatureMatrix(uint32 numRows, uint32 numCols, const float32* array) - : CContiguousConstView(numRows, numCols, array) {} - -bool CContiguousFeatureMatrix::isSparse() const { - return false; -} - -std::unique_ptr CContiguousFeatureMatrix::createBinaryPredictor( - const IBinaryPredictorFactory& factory, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return ruleModel.createBinaryPredictor(factory, *this, labelSpaceInfo, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr CContiguousFeatureMatrix::createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return ruleModel.createSparseBinaryPredictor(factory, *this, labelSpaceInfo, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr CContiguousFeatureMatrix::createScorePredictor(const IScorePredictorFactory& factory, - const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const { - return ruleModel.createScorePredictor(factory, *this, labelSpaceInfo, numLabels); -} - -std::unique_ptr CContiguousFeatureMatrix::createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return ruleModel.createProbabilityPredictor(factory, *this, labelSpaceInfo, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr createCContiguousFeatureMatrix(uint32 numRows, uint32 numCols, - const float32* array) { - return std::make_unique(numRows, numCols, array); -} diff --git a/cpp/subprojects/common/src/common/input/feature_matrix_csc.cpp b/cpp/subprojects/common/src/common/input/feature_matrix_csc.cpp deleted file mode 100644 index 6c0a362d..00000000 --- a/cpp/subprojects/common/src/common/input/feature_matrix_csc.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "common/input/feature_matrix_csc.hpp" - -#include "common/data/view_csc.hpp" - -/** - * An implementation of the type `ICscFeatureMatrix` that provides column-wise read-only access to the feature values of - * examples that are stored in a pre-allocated sparse matrix in the compressed sparse column (CSC) format. - */ -class CscFeatureMatrix final : public CscConstView, - virtual public ICscFeatureMatrix { - public: - - /** - * @param numRows The number of rows in the feature matrix - * @param numCols The number of columns in the feature matrix - * @param data A pointer to an array of type `float32`, shape `(num_non_zero_values)`, that stores all - * non-zero feature values - * @param rowIndices A pointer to an array of type `uint32`, shape `(num_non_zero_values)`, that stores the - * row-indices, the values in `data` correspond to - * @param colIndices A pointer to an array of type `uint32`, shape `(numCols + 1)`, that stores the indices - * of the first element in `data` and `rowIndices` that corresponds to a certain column. - * The index at the last position is equal to `num_non_zero_values` - */ - CscFeatureMatrix(uint32 numRows, uint32 numCols, const float32* data, uint32* rowIndices, uint32* colIndices) - : CscConstView(numRows, numCols, data, rowIndices, colIndices) {} - - bool isSparse() const override { - return true; - } - - void fetchFeatureVector(uint32 featureIndex, std::unique_ptr& featureVectorPtr) const override { - CscConstView::index_const_iterator indexIterator = this->indices_cbegin(featureIndex); - CscConstView::index_const_iterator indicesEnd = this->indices_cend(featureIndex); - CscConstView::value_const_iterator valueIterator = this->values_cbegin(featureIndex); - uint32 numElements = indicesEnd - indexIterator; - featureVectorPtr = std::make_unique(numElements); - FeatureVector::iterator vectorIterator = featureVectorPtr->begin(); - uint32 i = 0; - - for (uint32 j = 0; j < numElements; j++) { - uint32 index = indexIterator[j]; - float32 value = valueIterator[j]; - - if (std::isnan(value)) { - featureVectorPtr->addMissingIndex(index); - } else { - vectorIterator[i].index = index; - vectorIterator[i].value = value; - i++; - } - } - - featureVectorPtr->setNumElements(i, true); - } -}; - -std::unique_ptr createCscFeatureMatrix(uint32 numRows, uint32 numCols, const float32* data, - uint32* rowIndices, uint32* colIndices) { - return std::make_unique(numRows, numCols, data, rowIndices, colIndices); -} - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/common/src/common/input/feature_matrix_csr.cpp b/cpp/subprojects/common/src/common/input/feature_matrix_csr.cpp deleted file mode 100644 index 34657119..00000000 --- a/cpp/subprojects/common/src/common/input/feature_matrix_csr.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include "common/input/feature_matrix_csr.hpp" - -#include "common/prediction/predictor_binary.hpp" -#include "common/prediction/predictor_probability.hpp" -#include "common/prediction/predictor_score.hpp" - -CsrFeatureMatrix::CsrFeatureMatrix(uint32 numRows, uint32 numCols, const float32* data, uint32* rowIndices, - uint32* colIndices) - : CsrConstView(numRows, numCols, data, rowIndices, colIndices) {} - -bool CsrFeatureMatrix::isSparse() const { - return true; -} - -std::unique_ptr CsrFeatureMatrix::createBinaryPredictor( - const IBinaryPredictorFactory& factory, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return ruleModel.createBinaryPredictor(factory, *this, labelSpaceInfo, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr CsrFeatureMatrix::createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return ruleModel.createSparseBinaryPredictor(factory, *this, labelSpaceInfo, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr CsrFeatureMatrix::createScorePredictor(const IScorePredictorFactory& factory, - const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const { - return ruleModel.createScorePredictor(factory, *this, labelSpaceInfo, numLabels); -} - -std::unique_ptr CsrFeatureMatrix::createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return ruleModel.createProbabilityPredictor(factory, *this, labelSpaceInfo, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr createCsrFeatureMatrix(uint32 numRows, uint32 numCols, const float32* data, - uint32* rowIndices, uint32* colIndices) { - return std::make_unique(numRows, numCols, data, rowIndices, colIndices); -} diff --git a/cpp/subprojects/common/src/common/input/feature_matrix_fortran_contiguous.cpp b/cpp/subprojects/common/src/common/input/feature_matrix_fortran_contiguous.cpp deleted file mode 100644 index 323e5662..00000000 --- a/cpp/subprojects/common/src/common/input/feature_matrix_fortran_contiguous.cpp +++ /dev/null @@ -1,61 +0,0 @@ -#ifdef _WIN32 - #pragma warning(push) - #pragma warning(disable : 4250) -#endif - -#include "common/input/feature_matrix_fortran_contiguous.hpp" - -#include "common/data/view_fortran_contiguous.hpp" - -/** - * An implementation of the type `IFortranContiguousFeatureMatrix` that provides column-wise read-only access to the - * feature values of examples that are stored in a pre-allocated Fortran-contiguous array. - */ -class FortranContiguousFeatureMatrix final : public FortranContiguousConstView, - virtual public IFortranContiguousFeatureMatrix { - public: - - /** - * @param numRows The number of rows in the feature matrix - * @param numCols The number of columns in the feature matrix - * @param array A pointer to a Fortran-contiguous array of type `float32` that stores the feature values - */ - FortranContiguousFeatureMatrix(uint32 numRows, uint32 numCols, const float32* array) - : FortranContiguousConstView(numRows, numCols, array) {} - - bool isSparse() const override { - return false; - } - - void fetchFeatureVector(uint32 featureIndex, std::unique_ptr& featureVectorPtr) const override { - FortranContiguousConstView::value_const_iterator columnIterator = - this->values_cbegin(featureIndex); - uint32 numElements = this->getNumRows(); - featureVectorPtr = std::make_unique(numElements); - FeatureVector::iterator vectorIterator = featureVectorPtr->begin(); - uint32 i = 0; - - for (uint32 j = 0; j < numElements; j++) { - float32 value = columnIterator[j]; - - if (std::isnan(value)) { - featureVectorPtr->addMissingIndex(j); - } else { - vectorIterator[i].index = j; - vectorIterator[i].value = value; - i++; - } - } - - featureVectorPtr->setNumElements(i, true); - } -}; - -std::unique_ptr createFortranContiguousFeatureMatrix(uint32 numRows, uint32 numCols, - const float32* array) { - return std::make_unique(numRows, numCols, array); -} - -#ifdef _WIN32 - #pragma warning(pop) -#endif diff --git a/cpp/subprojects/common/src/common/input/feature_type_nominal.cpp b/cpp/subprojects/common/src/common/input/feature_type_nominal.cpp deleted file mode 100644 index 784fa722..00000000 --- a/cpp/subprojects/common/src/common/input/feature_type_nominal.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "common/input/feature_type_nominal.hpp" - -bool NominalFeatureType::isNominal() const { - return true; -} diff --git a/cpp/subprojects/common/src/common/input/feature_type_numerical.cpp b/cpp/subprojects/common/src/common/input/feature_type_numerical.cpp deleted file mode 100644 index ecd263d4..00000000 --- a/cpp/subprojects/common/src/common/input/feature_type_numerical.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "common/input/feature_type_numerical.hpp" - -bool NumericalFeatureType::isNominal() const { - return false; -} diff --git a/cpp/subprojects/common/src/common/input/feature_type_ordinal.cpp b/cpp/subprojects/common/src/common/input/feature_type_ordinal.cpp deleted file mode 100644 index 6b82be5c..00000000 --- a/cpp/subprojects/common/src/common/input/feature_type_ordinal.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "common/input/feature_type_ordinal.hpp" - -bool OrdinalFeatureType::isNominal() const { - return false; -} diff --git a/cpp/subprojects/common/src/common/input/feature_vector.cpp b/cpp/subprojects/common/src/common/input/feature_vector.cpp deleted file mode 100644 index 7783668f..00000000 --- a/cpp/subprojects/common/src/common/input/feature_vector.cpp +++ /dev/null @@ -1,33 +0,0 @@ -#include "common/input/feature_vector.hpp" - -#include - -FeatureVector::FeatureVector(uint32 numElements) : vector_(SparseArrayVector(numElements)) {} - -FeatureVector::iterator FeatureVector::begin() { - return vector_.begin(); -} - -FeatureVector::iterator FeatureVector::end() { - return vector_.end(); -} - -FeatureVector::const_iterator FeatureVector::cbegin() const { - return vector_.cbegin(); -} - -FeatureVector::const_iterator FeatureVector::cend() const { - return vector_.cend(); -} - -uint32 FeatureVector::getNumElements() const { - return vector_.getNumElements(); -} - -void FeatureVector::setNumElements(uint32 numElements, bool freeMemory) { - return vector_.setNumElements(numElements, freeMemory); -} - -void FeatureVector::sortByValues() { - std::sort(vector_.begin(), vector_.end(), IndexedValue::CompareValue()); -} diff --git a/cpp/subprojects/common/src/common/input/label_matrix_c_contiguous.cpp b/cpp/subprojects/common/src/common/input/label_matrix_c_contiguous.cpp deleted file mode 100644 index 044c4728..00000000 --- a/cpp/subprojects/common/src/common/input/label_matrix_c_contiguous.cpp +++ /dev/null @@ -1,109 +0,0 @@ -#include "common/input/label_matrix_c_contiguous.hpp" - -#include "common/math/math.hpp" -#include "common/prediction/probability_calibration_joint.hpp" -#include "common/sampling/instance_sampling.hpp" -#include "common/sampling/partition_sampling.hpp" -#include "common/statistics/statistics_provider.hpp" - -CContiguousLabelMatrix::View::View(const CContiguousLabelMatrix& labelMatrix, uint32 row) - : VectorConstView(labelMatrix.getNumCols(), labelMatrix.values_cbegin(row)) {} - -CContiguousLabelMatrix::CContiguousLabelMatrix(uint32 numRows, uint32 numCols, const uint8* array) - : CContiguousConstView(numRows, numCols, array) {} - -bool CContiguousLabelMatrix::isSparse() const { - return false; -} - -float32 CContiguousLabelMatrix::calculateLabelCardinality() const { - uint32 numRows = this->getNumRows(); - uint32 numCols = this->getNumCols(); - float32 labelCardinality = 0; - - for (uint32 i = 0; i < numRows; i++) { - value_const_iterator labelIterator = this->values_cbegin(i); - uint32 numRelevantLabels = 0; - - for (uint32 j = 0; j < numCols; j++) { - if (labelIterator[j]) { - numRelevantLabels++; - } - } - - labelCardinality = iterativeArithmeticMean(i + 1, (float32) numRelevantLabels, labelCardinality); - } - - return labelCardinality; -} - -CContiguousLabelMatrix::view_type CContiguousLabelMatrix::createView(uint32 row) const { - return CContiguousLabelMatrix::view_type(*this, row); -} - -std::unique_ptr CContiguousLabelMatrix::createLabelVector(uint32 row) const { - uint32 numCols = this->getNumCols(); - std::unique_ptr labelVectorPtr = std::make_unique(numCols); - LabelVector::iterator iterator = labelVectorPtr->begin(); - value_const_iterator labelIterator = this->values_cbegin(row); - uint32 n = 0; - - for (uint32 i = 0; i < numCols; i++) { - if (labelIterator[i]) { - iterator[n] = i; - n++; - } - } - - labelVectorPtr->setNumElements(n, true); - return labelVectorPtr; -} - -std::unique_ptr CContiguousLabelMatrix::createStatisticsProvider( - const IStatisticsProviderFactory& factory) const { - return factory.create(*this); -} - -std::unique_ptr CContiguousLabelMatrix::createPartitionSampling( - const IPartitionSamplingFactory& factory) const { - return factory.create(*this); -} - -std::unique_ptr CContiguousLabelMatrix::createInstanceSampling( - const IInstanceSamplingFactory& factory, const SinglePartition& partition, IStatistics& statistics) const { - return factory.create(*this, partition, statistics); -} - -std::unique_ptr CContiguousLabelMatrix::createInstanceSampling( - const IInstanceSamplingFactory& factory, BiPartition& partition, IStatistics& statistics) const { - return factory.create(*this, partition, statistics); -} - -std::unique_ptr CContiguousLabelMatrix::fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const { - return probabilityCalibrator.fitProbabilityCalibrationModel(partition, *this, statistics); -} - -std::unique_ptr CContiguousLabelMatrix::fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const { - return probabilityCalibrator.fitProbabilityCalibrationModel(partition, *this, statistics); -} - -std::unique_ptr CContiguousLabelMatrix::fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const { - return probabilityCalibrator.fitProbabilityCalibrationModel(partition, *this, statistics); -} - -std::unique_ptr CContiguousLabelMatrix::fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const { - return probabilityCalibrator.fitProbabilityCalibrationModel(partition, *this, statistics); -} - -std::unique_ptr createCContiguousLabelMatrix(uint32 numRows, uint32 numCols, - const uint8* array) { - return std::make_unique(numRows, numCols, array); -} diff --git a/cpp/subprojects/common/src/common/input/label_matrix_csc.cpp b/cpp/subprojects/common/src/common/input/label_matrix_csc.cpp deleted file mode 100644 index 7d9149d3..00000000 --- a/cpp/subprojects/common/src/common/input/label_matrix_csc.cpp +++ /dev/null @@ -1,131 +0,0 @@ -#include "common/input/label_matrix_csc.hpp" - -#include "common/data/arrays.hpp" - -#include - -template -static inline uint32* copyLabelMatrix(uint32* rowIndices, uint32* colIndices, - const CContiguousConstView& labelMatrix, IndexIterator indicesBegin, - IndexIterator indicesEnd) { - uint32 numExamples = indicesEnd - indicesBegin; - uint32 numLabels = labelMatrix.getNumCols(); - uint32 n = 0; - - for (uint32 i = 0; i < numLabels; i++) { - colIndices[i] = n; - - for (uint32 j = 0; j < numExamples; j++) { - uint32 exampleIndex = indicesBegin[j]; - - if (labelMatrix.values_cbegin(exampleIndex)[i]) { - rowIndices[n] = exampleIndex; - n++; - } - } - } - - colIndices[numLabels] = n; - return (uint32*) realloc(rowIndices, n * sizeof(uint32)); -} - -template -static inline uint32* copyLabelMatrix(uint32* rowIndices, uint32* colIndices, const BinaryCsrConstView& labelMatrix, - IndexIterator indicesBegin, IndexIterator indicesEnd) { - uint32 numExamples = indicesEnd - indicesBegin; - uint32 numLabels = labelMatrix.getNumCols(); - - // Set column indices of the CSC matrix to zero... - setArrayToZeros(colIndices, numLabels); - - // Determine the number of non-zero elements per column... - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indicesBegin[i]; - BinaryCsrConstView::index_const_iterator labelIndexIterator = labelMatrix.indices_cbegin(exampleIndex); - uint32 numRelevantLabels = labelMatrix.indices_cend(exampleIndex) - labelIndexIterator; - - for (uint32 j = 0; j < numRelevantLabels; j++) { - uint32 labelIndex = labelIndexIterator[j]; - colIndices[labelIndex]++; - } - } - - // Update the column indices of the CSC matrix with respect to the number of non-zero elements that correspond to - // previous columns... - uint32 tmp = 0; - - for (uint32 i = 0; i < numLabels; i++) { - uint32 labelIndex = colIndices[i]; - colIndices[i] = tmp; - tmp += labelIndex; - } - - // Set the row indices of the CSC matrix. This will modify the column indices... - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indicesBegin[i]; - BinaryCsrConstView::index_const_iterator labelIndexIterator = labelMatrix.indices_cbegin(exampleIndex); - uint32 numRelevantLabels = labelMatrix.indices_cend(exampleIndex) - labelIndexIterator; - - for (uint32 j = 0; j < numRelevantLabels; j++) { - uint32 originalLabelIndex = labelIndexIterator[j]; - uint32 labelIndex = colIndices[originalLabelIndex]; - rowIndices[labelIndex] = exampleIndex; - colIndices[originalLabelIndex]++; - } - } - - // Reset the column indices to the previous values... - tmp = 0; - - for (uint32 i = 0; i < numLabels; i++) { - uint32 labelIndex = colIndices[i]; - colIndices[i] = tmp; - tmp = labelIndex; - } - - colIndices[numLabels] = tmp; - return (uint32*) realloc(rowIndices, tmp * sizeof(uint32)); -} - -CscLabelMatrix::CscLabelMatrix(const CContiguousConstView& labelMatrix, - CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd) - : BinaryCscConstView(labelMatrix.getNumRows(), labelMatrix.getNumCols(), - (uint32*) malloc(labelMatrix.getNumRows() * labelMatrix.getNumCols() * sizeof(uint32)), - (uint32*) malloc((labelMatrix.getNumCols() + 1) * sizeof(uint32))) { - this->rowIndices_ = copyLabelMatrix(this->rowIndices_, this->colIndices_, - labelMatrix, indicesBegin, indicesEnd); -} - -CscLabelMatrix::CscLabelMatrix(const CContiguousConstView& labelMatrix, - PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd) - : BinaryCscConstView(labelMatrix.getNumRows(), labelMatrix.getNumCols(), - (uint32*) malloc(labelMatrix.getNumRows() * labelMatrix.getNumCols() * sizeof(uint32)), - (uint32*) malloc((labelMatrix.getNumCols() + 1) * sizeof(uint32))) { - this->rowIndices_ = copyLabelMatrix(this->rowIndices_, this->colIndices_, - labelMatrix, indicesBegin, indicesEnd); -} - -CscLabelMatrix::CscLabelMatrix(const BinaryCsrConstView& labelMatrix, CompleteIndexVector::const_iterator indicesBegin, - CompleteIndexVector::const_iterator indicesEnd) - : BinaryCscConstView(labelMatrix.getNumRows(), labelMatrix.getNumCols(), - (uint32*) malloc(labelMatrix.getNumNonZeroElements() * sizeof(uint32)), - (uint32*) malloc((labelMatrix.getNumCols() + 1) * sizeof(uint32))) { - this->rowIndices_ = copyLabelMatrix(this->rowIndices_, this->colIndices_, - labelMatrix, indicesBegin, indicesEnd); -} - -CscLabelMatrix::CscLabelMatrix(const BinaryCsrConstView& labelMatrix, PartialIndexVector::const_iterator indicesBegin, - PartialIndexVector::const_iterator indicesEnd) - : BinaryCscConstView(labelMatrix.getNumRows(), labelMatrix.getNumCols(), - (uint32*) malloc(labelMatrix.getNumNonZeroElements() * sizeof(uint32)), - (uint32*) malloc((labelMatrix.getNumCols() + 1) * sizeof(uint32))) { - this->rowIndices_ = copyLabelMatrix(this->rowIndices_, this->colIndices_, - labelMatrix, indicesBegin, indicesEnd); -} - -CscLabelMatrix::~CscLabelMatrix() { - free(this->rowIndices_); - free(this->colIndices_); -} diff --git a/cpp/subprojects/common/src/common/input/label_matrix_csr.cpp b/cpp/subprojects/common/src/common/input/label_matrix_csr.cpp deleted file mode 100644 index c0f52363..00000000 --- a/cpp/subprojects/common/src/common/input/label_matrix_csr.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include "common/input/label_matrix_csr.hpp" - -#include "common/data/arrays.hpp" -#include "common/math/math.hpp" -#include "common/prediction/probability_calibration_joint.hpp" -#include "common/sampling/instance_sampling.hpp" -#include "common/sampling/partition_sampling.hpp" -#include "common/statistics/statistics_provider.hpp" - -CsrLabelMatrix::View::View(const CsrLabelMatrix& labelMatrix, uint32 row) - : VectorConstView(labelMatrix.indices_cend(row) - labelMatrix.indices_cbegin(row), - labelMatrix.indices_cbegin(row)) {} - -CsrLabelMatrix::CsrLabelMatrix(uint32 numRows, uint32 numCols, uint32* rowIndices, uint32* colIndices) - : BinaryCsrConstView(numRows, numCols, rowIndices, colIndices) {} - -bool CsrLabelMatrix::isSparse() const { - return true; -} - -float32 CsrLabelMatrix::calculateLabelCardinality() const { - uint32 numRows = this->getNumRows(); - float32 labelCardinality = 0; - - for (uint32 i = 0; i < numRows; i++) { - index_const_iterator indicesBegin = this->indices_cbegin(i); - index_const_iterator indicesEnd = this->indices_cend(i); - uint32 numRelevantLabels = indicesEnd - indicesBegin; - labelCardinality = iterativeArithmeticMean(i + 1, (float32) numRelevantLabels, labelCardinality); - } - - return labelCardinality; -} - -CsrLabelMatrix::view_type CsrLabelMatrix::createView(uint32 row) const { - return CsrLabelMatrix::view_type(*this, row); -} - -std::unique_ptr CsrLabelMatrix::createLabelVector(uint32 row) const { - index_const_iterator indexIterator = this->indices_cbegin(row); - index_const_iterator indicesEnd = this->indices_cend(row); - uint32 numElements = indicesEnd - indexIterator; - std::unique_ptr labelVectorPtr = std::make_unique(numElements); - LabelVector::iterator iterator = labelVectorPtr->begin(); - copyArray(indexIterator, iterator, numElements); - return labelVectorPtr; -} - -std::unique_ptr CsrLabelMatrix::createStatisticsProvider( - const IStatisticsProviderFactory& factory) const { - return factory.create(*this); -} - -std::unique_ptr CsrLabelMatrix::createPartitionSampling( - const IPartitionSamplingFactory& factory) const { - return factory.create(*this); -} - -std::unique_ptr CsrLabelMatrix::createInstanceSampling(const IInstanceSamplingFactory& factory, - const SinglePartition& partition, - IStatistics& statistics) const { - return factory.create(*this, partition, statistics); -} - -std::unique_ptr CsrLabelMatrix::createInstanceSampling(const IInstanceSamplingFactory& factory, - BiPartition& partition, - IStatistics& statistics) const { - return factory.create(*this, partition, statistics); -} - -std::unique_ptr CsrLabelMatrix::fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const { - return probabilityCalibrator.fitProbabilityCalibrationModel(partition, *this, statistics); -} - -std::unique_ptr CsrLabelMatrix::fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const { - return probabilityCalibrator.fitProbabilityCalibrationModel(partition, *this, statistics); -} - -std::unique_ptr CsrLabelMatrix::fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const SinglePartition& partition, - const IStatistics& statistics) const { - return probabilityCalibrator.fitProbabilityCalibrationModel(partition, *this, statistics); -} - -std::unique_ptr CsrLabelMatrix::fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, BiPartition& partition, - const IStatistics& statistics) const { - return probabilityCalibrator.fitProbabilityCalibrationModel(partition, *this, statistics); -} - -std::unique_ptr createCsrLabelMatrix(uint32 numRows, uint32 numCols, uint32* rowIndices, - uint32* colIndices) { - return std::make_unique(numRows, numCols, rowIndices, colIndices); -} diff --git a/cpp/subprojects/common/src/common/input/missing_feature_vector.cpp b/cpp/subprojects/common/src/common/input/missing_feature_vector.cpp deleted file mode 100644 index dc2c5ca2..00000000 --- a/cpp/subprojects/common/src/common/input/missing_feature_vector.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "common/input/missing_feature_vector.hpp" - -MissingFeatureVector::MissingFeatureVector() : missingIndicesPtr_(std::make_unique()) {} - -MissingFeatureVector::MissingFeatureVector(MissingFeatureVector& missingFeatureVector) - : missingIndicesPtr_(std::move(missingFeatureVector.missingIndicesPtr_)) {} - -MissingFeatureVector::missing_index_const_iterator MissingFeatureVector::missing_indices_cbegin() const { - return missingIndicesPtr_->indices_cbegin(); -} - -MissingFeatureVector::missing_index_const_iterator MissingFeatureVector::missing_indices_cend() const { - return missingIndicesPtr_->indices_cend(); -} - -void MissingFeatureVector::addMissingIndex(uint32 index) { - missingIndicesPtr_->set(index, true); -} - -bool MissingFeatureVector::isMissing(uint32 index) const { - return (*missingIndicesPtr_)[index]; -} - -void MissingFeatureVector::clearMissingIndices() { - missingIndicesPtr_->clear(); -} diff --git a/cpp/subprojects/common/src/common/learner.cpp b/cpp/subprojects/common/src/common/learner.cpp deleted file mode 100644 index 5090859b..00000000 --- a/cpp/subprojects/common/src/common/learner.cpp +++ /dev/null @@ -1,591 +0,0 @@ -#include "common/learner.hpp" - -#include "common/prediction/label_space_info_no.hpp" -#include "common/stopping/stopping_criterion_size.hpp" -#include "common/util/validation.hpp" - -/** - * An implementation of the type `ITrainingResult` that provides access to the result of training an - * `AbstractRuleLearner`. - */ -class TrainingResult final : public ITrainingResult { - private: - - const uint32 numLabels_; - - std::unique_ptr ruleModelPtr_; - - std::unique_ptr labelSpaceInfoPtr_; - - std::unique_ptr marginalProbabilityCalibrationModelPtr_; - - std::unique_ptr jointProbabilityCalibrationModelPtr_; - - public: - - /** - * @param numLabels The number of labels for which a model has been trained - * @param ruleModelPtr An unique pointer to an object of type `IRuleModel` that has - * been trained - * @param labelSpaceInfoPtr An unique pointer to an object of type `ILabelSpaceInfo` - * that may be used as a basis for making predictions - * @param marginalProbabilityCalibrationModelPtr An unique pointer to an object of type - * `IMarginalProbabilityCalibrationModel` that may be used for - * the calibration of marginal probabilities - * @param jointProbabilityCalibrationModelPtr An unique pointer to an object of type - * `IJointProbabilityCalibrationModel` that may be used for the - * calibration of joint probabilities - */ - TrainingResult(uint32 numLabels, std::unique_ptr ruleModelPtr, - std::unique_ptr labelSpaceInfoPtr, - std::unique_ptr marginalProbabilityCalibrationModelPtr, - std::unique_ptr jointProbabilityCalibrationModelPtr) - : numLabels_(numLabels), ruleModelPtr_(std::move(ruleModelPtr)), - labelSpaceInfoPtr_(std::move(labelSpaceInfoPtr)), - marginalProbabilityCalibrationModelPtr_(std::move(marginalProbabilityCalibrationModelPtr)), - jointProbabilityCalibrationModelPtr_(std::move(jointProbabilityCalibrationModelPtr)) {} - - uint32 getNumLabels() const override { - return numLabels_; - } - - std::unique_ptr& getRuleModel() override { - return ruleModelPtr_; - } - - const std::unique_ptr& getRuleModel() const override { - return ruleModelPtr_; - } - - std::unique_ptr& getLabelSpaceInfo() override { - return labelSpaceInfoPtr_; - } - - const std::unique_ptr& getLabelSpaceInfo() const override { - return labelSpaceInfoPtr_; - } - - std::unique_ptr& getMarginalProbabilityCalibrationModel() override { - return marginalProbabilityCalibrationModelPtr_; - } - - const std::unique_ptr& getMarginalProbabilityCalibrationModel() - const override { - return marginalProbabilityCalibrationModelPtr_; - } - - std::unique_ptr& getJointProbabilityCalibrationModel() override { - return jointProbabilityCalibrationModelPtr_; - } - - const std::unique_ptr& getJointProbabilityCalibrationModel() const override { - return jointProbabilityCalibrationModelPtr_; - } -}; - -AbstractRuleLearner::Config::Config(RuleCompareFunction ruleCompareFunction) - : ruleCompareFunction_(ruleCompareFunction), defaultRuleConfigPtr_(std::make_unique(true)), - ruleModelAssemblageConfigPtr_(std::make_unique(defaultRuleConfigPtr_)), - ruleInductionConfigPtr_( - std::make_unique(ruleCompareFunction_, parallelRuleRefinementConfigPtr_)), - featureBinningConfigPtr_(std::make_unique(parallelStatisticUpdateConfigPtr_)), - labelSamplingConfigPtr_(std::make_unique()), - instanceSamplingConfigPtr_(std::make_unique()), - featureSamplingConfigPtr_(std::make_unique()), - partitionSamplingConfigPtr_(std::make_unique()), - rulePruningConfigPtr_(std::make_unique()), - postProcessorConfigPtr_(std::make_unique()), - parallelRuleRefinementConfigPtr_(std::make_unique()), - parallelStatisticUpdateConfigPtr_(std::make_unique()), - parallelPredictionConfigPtr_(std::make_unique()), - unusedRuleRemovalConfigPtr_(std::make_unique()), - marginalProbabilityCalibratorConfigPtr_(std::make_unique()), - jointProbabilityCalibratorConfigPtr_(std::make_unique()) {} - -RuleCompareFunction AbstractRuleLearner::Config::getRuleCompareFunction() const { - return ruleCompareFunction_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getDefaultRuleConfigPtr() { - return defaultRuleConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getRuleModelAssemblageConfigPtr() { - return ruleModelAssemblageConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getRuleInductionConfigPtr() { - return ruleInductionConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getFeatureBinningConfigPtr() { - return featureBinningConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getLabelSamplingConfigPtr() { - return labelSamplingConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getInstanceSamplingConfigPtr() { - return instanceSamplingConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getFeatureSamplingConfigPtr() { - return featureSamplingConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getPartitionSamplingConfigPtr() { - return partitionSamplingConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getRulePruningConfigPtr() { - return rulePruningConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getPostProcessorConfigPtr() { - return postProcessorConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getParallelRuleRefinementConfigPtr() { - return parallelRuleRefinementConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getParallelStatisticUpdateConfigPtr() { - return parallelStatisticUpdateConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getParallelPredictionConfigPtr() { - return parallelPredictionConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getSizeStoppingCriterionConfigPtr() { - return sizeStoppingCriterionConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getTimeStoppingCriterionConfigPtr() { - return timeStoppingCriterionConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getGlobalPruningConfigPtr() { - return globalPruningConfigPtr_; -} - -std::unique_ptr& - AbstractRuleLearner::Config::getSequentialPostOptimizationConfigPtr() { - return sequentialPostOptimizationConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getUnusedRuleRemovalConfigPtr() { - return unusedRuleRemovalConfigPtr_; -} - -std::unique_ptr& - AbstractRuleLearner::Config::getMarginalProbabilityCalibratorConfigPtr() { - return marginalProbabilityCalibratorConfigPtr_; -} - -std::unique_ptr& - AbstractRuleLearner::Config::getJointProbabilityCalibratorConfigPtr() { - return jointProbabilityCalibratorConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getBinaryPredictorConfigPtr() { - return binaryPredictorConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getScorePredictorConfigPtr() { - return scorePredictorConfigPtr_; -} - -std::unique_ptr& AbstractRuleLearner::Config::getProbabilityPredictorConfigPtr() { - return probabilityPredictorConfigPtr_; -} - -AbstractRuleLearner::AbstractRuleLearner(IRuleLearner::IConfig& config) : config_(config) {} - -std::unique_ptr AbstractRuleLearner::createRuleModelAssemblageFactory( - const IRowWiseLabelMatrix& labelMatrix) const { - return config_.getRuleModelAssemblageConfigPtr()->createRuleModelAssemblageFactory(labelMatrix); -} - -std::unique_ptr AbstractRuleLearner::createThresholdsFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - return config_.getFeatureBinningConfigPtr()->createThresholdsFactory(featureMatrix, labelMatrix); -} - -std::unique_ptr AbstractRuleLearner::createRuleInductionFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - return config_.getRuleInductionConfigPtr()->createRuleInductionFactory(featureMatrix, labelMatrix); -} - -std::unique_ptr AbstractRuleLearner::createLabelSamplingFactory( - const ILabelMatrix& labelMatrix) const { - return config_.getLabelSamplingConfigPtr()->createLabelSamplingFactory(labelMatrix); -} - -std::unique_ptr AbstractRuleLearner::createInstanceSamplingFactory() const { - return config_.getInstanceSamplingConfigPtr()->createInstanceSamplingFactory(); -} - -std::unique_ptr AbstractRuleLearner::createFeatureSamplingFactory( - const IFeatureMatrix& featureMatrix) const { - return config_.getFeatureSamplingConfigPtr()->createFeatureSamplingFactory(featureMatrix); -} - -std::unique_ptr AbstractRuleLearner::createPartitionSamplingFactory() const { - return config_.getPartitionSamplingConfigPtr()->createPartitionSamplingFactory(); -} - -std::unique_ptr AbstractRuleLearner::createRulePruningFactory() const { - return config_.getRulePruningConfigPtr()->createRulePruningFactory(); -} - -std::unique_ptr AbstractRuleLearner::createPostProcessorFactory() const { - return config_.getPostProcessorConfigPtr()->createPostProcessorFactory(); -} - -std::unique_ptr AbstractRuleLearner::createSizeStoppingCriterionFactory() const { - std::unique_ptr& configPtr = config_.getSizeStoppingCriterionConfigPtr(); - return configPtr.get() != nullptr ? configPtr->createStoppingCriterionFactory() : nullptr; -} - -std::unique_ptr AbstractRuleLearner::createTimeStoppingCriterionFactory() const { - std::unique_ptr& configPtr = config_.getTimeStoppingCriterionConfigPtr(); - return configPtr.get() != nullptr ? configPtr->createStoppingCriterionFactory() : nullptr; -} - -std::unique_ptr AbstractRuleLearner::createGlobalPruningFactory() const { - std::unique_ptr& configPtr = config_.getGlobalPruningConfigPtr(); - return configPtr.get() != nullptr ? configPtr->createStoppingCriterionFactory() : nullptr; -} - -std::unique_ptr AbstractRuleLearner::createSequentialPostOptimizationFactory() const { - std::unique_ptr& configPtr = config_.getSequentialPostOptimizationConfigPtr(); - return configPtr.get() != nullptr ? configPtr->createPostOptimizationPhaseFactory() : nullptr; -} - -std::unique_ptr AbstractRuleLearner::createUnusedRuleRemovalFactory() const { - std::unique_ptr& globalPruningConfigPtr = config_.getGlobalPruningConfigPtr(); - - if (globalPruningConfigPtr && globalPruningConfigPtr->shouldRemoveUnusedRules()) { - std::unique_ptr& configPtr = config_.getUnusedRuleRemovalConfigPtr(); - return configPtr->createPostOptimizationPhaseFactory(); - } - - return nullptr; -} - -std::unique_ptr AbstractRuleLearner::createMarginalProbabilityCalibratorFactory() - const { - return config_.getMarginalProbabilityCalibratorConfigPtr()->createMarginalProbabilityCalibratorFactory(); -} - -std::unique_ptr AbstractRuleLearner::createJointProbabilityCalibratorFactory() - const { - return config_.getJointProbabilityCalibratorConfigPtr()->createJointProbabilityCalibratorFactory(); -} - -void AbstractRuleLearner::createStoppingCriterionFactories(StoppingCriterionListFactory& factory) const { - std::unique_ptr stoppingCriterionFactory = this->createSizeStoppingCriterionFactory(); - - if (stoppingCriterionFactory) { - factory.addStoppingCriterionFactory(std::move(stoppingCriterionFactory)); - } - - stoppingCriterionFactory = this->createTimeStoppingCriterionFactory(); - - if (stoppingCriterionFactory) { - factory.addStoppingCriterionFactory(std::move(stoppingCriterionFactory)); - } - - stoppingCriterionFactory = this->createGlobalPruningFactory(); - - if (stoppingCriterionFactory) { - factory.addStoppingCriterionFactory(std::move(stoppingCriterionFactory)); - } -} - -void AbstractRuleLearner::createPostOptimizationPhaseFactories(PostOptimizationPhaseListFactory& factory) const { - std::unique_ptr postOptimizationPhaseFactory = - this->createUnusedRuleRemovalFactory(); - - if (postOptimizationPhaseFactory) { - factory.addPostOptimizationPhaseFactory(std::move(postOptimizationPhaseFactory)); - } - - postOptimizationPhaseFactory = this->createSequentialPostOptimizationFactory(); - - if (postOptimizationPhaseFactory) { - factory.addPostOptimizationPhaseFactory(std::move(postOptimizationPhaseFactory)); - } -} - -std::unique_ptr AbstractRuleLearner::createLabelSpaceInfo( - const IRowWiseLabelMatrix& labelMatrix) const { - const IBinaryPredictorConfig* binaryPredictorConfig = config_.getBinaryPredictorConfigPtr().get(); - const IScorePredictorConfig* scorePredictorConfig = config_.getScorePredictorConfigPtr().get(); - const IProbabilityPredictorConfig* probabilityPredictorConfig = config_.getProbabilityPredictorConfigPtr().get(); - const IJointProbabilityCalibratorConfig& jointProbabilityCalibratorConfig = - *config_.getJointProbabilityCalibratorConfigPtr(); - - if ((binaryPredictorConfig && binaryPredictorConfig->isLabelVectorSetNeeded()) - || (scorePredictorConfig && scorePredictorConfig->isLabelVectorSetNeeded()) - || (probabilityPredictorConfig && probabilityPredictorConfig->isLabelVectorSetNeeded()) - || (jointProbabilityCalibratorConfig.isLabelVectorSetNeeded())) { - return std::make_unique(labelMatrix); - } else { - return createNoLabelSpaceInfo(); - } -} - -std::unique_ptr AbstractRuleLearner::createBinaryPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - const IBinaryPredictorConfig* config = config_.getBinaryPredictorConfigPtr().get(); - return config ? config->createPredictorFactory(featureMatrix, numLabels) : nullptr; -} - -std::unique_ptr AbstractRuleLearner::createSparseBinaryPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - const IBinaryPredictorConfig* config = config_.getBinaryPredictorConfigPtr().get(); - return config ? config->createSparsePredictorFactory(featureMatrix, numLabels) : nullptr; -} - -std::unique_ptr AbstractRuleLearner::createScorePredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - const IScorePredictorConfig* config = config_.getScorePredictorConfigPtr().get(); - return config ? config->createPredictorFactory(featureMatrix, numLabels) : nullptr; -} - -std::unique_ptr AbstractRuleLearner::createProbabilityPredictorFactory( - const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - const IProbabilityPredictorConfig* config = config_.getProbabilityPredictorConfigPtr().get(); - return config ? config->createPredictorFactory(featureMatrix, numLabels) : nullptr; -} - -std::unique_ptr AbstractRuleLearner::fit(const IFeatureInfo& featureInfo, - const IColumnWiseFeatureMatrix& featureMatrix, - const IRowWiseLabelMatrix& labelMatrix, - uint32 randomState) const { - assertGreaterOrEqual("randomState", randomState, 1); - RNG rng(randomState); - - // Create stopping criteria... - std::unique_ptr stoppingCriterionFactoryPtr = - std::make_unique(); - this->createStoppingCriterionFactories(*stoppingCriterionFactoryPtr); - - // Create post-optimization phases... - std::unique_ptr postOptimizationFactoryPtr = - std::make_unique(); - this->createPostOptimizationPhaseFactories(*postOptimizationFactoryPtr); - - // Create label space info... - std::unique_ptr labelSpaceInfoPtr = this->createLabelSpaceInfo(labelMatrix); - - // Partition training data... - std::unique_ptr partitionSamplingFactoryPtr = this->createPartitionSamplingFactory(); - std::unique_ptr partitionSamplingPtr = - labelMatrix.createPartitionSampling(*partitionSamplingFactoryPtr); - IPartition& partition = partitionSamplingPtr->partition(rng); - - // Create post-optimization and model builder... - std::unique_ptr modelBuilderFactoryPtr = this->createModelBuilderFactory(); - std::unique_ptr postOptimizationPtr = - postOptimizationFactoryPtr->create(*modelBuilderFactoryPtr); - IModelBuilder& modelBuilder = postOptimizationPtr->getModelBuilder(); - - // Create statistics provider... - std::unique_ptr statisticsProviderFactoryPtr = - this->createStatisticsProviderFactory(featureMatrix, labelMatrix); - std::unique_ptr statisticsProviderPtr = - labelMatrix.createStatisticsProvider(*statisticsProviderFactoryPtr); - - // Create thresholds... - std::unique_ptr thresholdsFactoryPtr = - this->createThresholdsFactory(featureMatrix, labelMatrix); - std::unique_ptr thresholdsPtr = - thresholdsFactoryPtr->create(featureMatrix, featureInfo, *statisticsProviderPtr); - - // Create rule induction... - std::unique_ptr ruleInductionFactoryPtr = - this->createRuleInductionFactory(featureMatrix, labelMatrix); - std::unique_ptr ruleInductionPtr = ruleInductionFactoryPtr->create(); - - // Create label sampling... - std::unique_ptr labelSamplingFactoryPtr = this->createLabelSamplingFactory(labelMatrix); - std::unique_ptr labelSamplingPtr = labelSamplingFactoryPtr->create(); - - // Create instance sampling... - std::unique_ptr instanceSamplingFactoryPtr = this->createInstanceSamplingFactory(); - std::unique_ptr instanceSamplingPtr = - partition.createInstanceSampling(*instanceSamplingFactoryPtr, labelMatrix, statisticsProviderPtr->get()); - - // Create feature sampling... - std::unique_ptr featureSamplingFactoryPtr = - this->createFeatureSamplingFactory(featureMatrix); - std::unique_ptr featureSamplingPtr = featureSamplingFactoryPtr->create(); - - // Create rule pruning... - std::unique_ptr rulePruningFactoryPtr = this->createRulePruningFactory(); - std::unique_ptr rulePruningPtr = rulePruningFactoryPtr->create(); - - // Create post-processor... - std::unique_ptr postProcessorFactoryPtr = this->createPostProcessorFactory(); - std::unique_ptr postProcessorPtr = postProcessorFactoryPtr->create(); - - // Assemble rule model... - std::unique_ptr ruleModelAssemblageFactoryPtr = - this->createRuleModelAssemblageFactory(labelMatrix); - std::unique_ptr ruleModelAssemblagePtr = - ruleModelAssemblageFactoryPtr->create(std::move(stoppingCriterionFactoryPtr)); - ruleModelAssemblagePtr->induceRules(*ruleInductionPtr, *rulePruningPtr, *postProcessorPtr, partition, - *labelSamplingPtr, *instanceSamplingPtr, *featureSamplingPtr, - *statisticsProviderPtr, *thresholdsPtr, modelBuilder, rng); - - // Post-optimize the model... - postOptimizationPtr->optimizeModel(*thresholdsPtr, *ruleInductionPtr, partition, *labelSamplingPtr, - *instanceSamplingPtr, *featureSamplingPtr, *rulePruningPtr, *postProcessorPtr, - rng); - - // Fit model for the calibration of marginal probabilities... - std::unique_ptr marginalProbabilityCalibratorFactoryPtr = - this->createMarginalProbabilityCalibratorFactory(); - std::unique_ptr marginalProbabilityCalibratorPtr = - marginalProbabilityCalibratorFactoryPtr->create(); - std::unique_ptr marginalProbabilityCalibrationModelPtr = - partition.fitMarginalProbabilityCalibrationModel(*marginalProbabilityCalibratorPtr, labelMatrix, - statisticsProviderPtr->get()); - - // Fit model for the calibration of joint probabilities... - std::unique_ptr jointProbabilityCalibratorFactoryPtr = - this->createJointProbabilityCalibratorFactory(); - std::unique_ptr jointProbabilityCalibratorPtr = - labelSpaceInfoPtr->createJointProbabilityCalibrator(*jointProbabilityCalibratorFactoryPtr, - *marginalProbabilityCalibrationModelPtr); - std::unique_ptr jointProbabilityCalibrationModelPtr = - partition.fitJointProbabilityCalibrationModel(*jointProbabilityCalibratorPtr, labelMatrix, - statisticsProviderPtr->get()); - - return std::make_unique( - labelMatrix.getNumCols(), modelBuilder.buildModel(), std::move(labelSpaceInfoPtr), - std::move(marginalProbabilityCalibrationModelPtr), std::move(jointProbabilityCalibrationModelPtr)); -} - -bool AbstractRuleLearner::canPredictBinary(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const { - return this->canPredictBinary(featureMatrix, trainingResult.getNumLabels()); -} - -bool AbstractRuleLearner::canPredictBinary(const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - return this->createBinaryPredictorFactory(featureMatrix, numLabels) != nullptr; -} - -std::unique_ptr AbstractRuleLearner::createBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const ITrainingResult& trainingResult) const { - return this->createBinaryPredictor( - featureMatrix, *trainingResult.getRuleModel(), *trainingResult.getLabelSpaceInfo(), - *trainingResult.getMarginalProbabilityCalibrationModel(), *trainingResult.getJointProbabilityCalibrationModel(), - trainingResult.getNumLabels()); -} - -std::unique_ptr AbstractRuleLearner::createBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - std::unique_ptr predictorFactoryPtr = - this->createBinaryPredictorFactory(featureMatrix, numLabels); - - if (predictorFactoryPtr) { - return featureMatrix.createBinaryPredictor(*predictorFactoryPtr, ruleModel, labelSpaceInfo, - marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); - } - - throw std::runtime_error("The rule learner does not support to predict binary labels"); -} - -std::unique_ptr AbstractRuleLearner::createSparseBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const ITrainingResult& trainingResult) const { - return this->createSparseBinaryPredictor( - featureMatrix, *trainingResult.getRuleModel(), *trainingResult.getLabelSpaceInfo(), - *trainingResult.getMarginalProbabilityCalibrationModel(), *trainingResult.getJointProbabilityCalibrationModel(), - trainingResult.getNumLabels()); -} - -std::unique_ptr AbstractRuleLearner::createSparseBinaryPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - std::unique_ptr predictorFactoryPtr = - this->createSparseBinaryPredictorFactory(featureMatrix, numLabels); - - if (predictorFactoryPtr) { - return featureMatrix.createSparseBinaryPredictor(*predictorFactoryPtr, ruleModel, labelSpaceInfo, - marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); - } - - throw std::runtime_error("The rule learner does not support to predict sparse binary labels"); -} - -bool AbstractRuleLearner::canPredictScores(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const { - return this->canPredictScores(featureMatrix, trainingResult.getNumLabels()); -} - -bool AbstractRuleLearner::canPredictScores(const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - return this->createScorePredictorFactory(featureMatrix, numLabels) != nullptr; -} - -std::unique_ptr AbstractRuleLearner::createScorePredictor( - const IRowWiseFeatureMatrix& featureMatrix, const ITrainingResult& trainingResult) const { - return this->createScorePredictor(featureMatrix, *trainingResult.getRuleModel(), - *trainingResult.getLabelSpaceInfo(), trainingResult.getNumLabels()); -} - -std::unique_ptr AbstractRuleLearner::createScorePredictor(const IRowWiseFeatureMatrix& featureMatrix, - const IRuleModel& ruleModel, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const { - std::unique_ptr predictorFactoryPtr = - this->createScorePredictorFactory(featureMatrix, numLabels); - - if (predictorFactoryPtr) { - return featureMatrix.createScorePredictor(*predictorFactoryPtr, ruleModel, labelSpaceInfo, numLabels); - } - - throw std::runtime_error("The rule learner does not support to predict regression scores"); -} - -bool AbstractRuleLearner::canPredictProbabilities(const IRowWiseFeatureMatrix& featureMatrix, - const ITrainingResult& trainingResult) const { - return this->canPredictProbabilities(featureMatrix, trainingResult.getNumLabels()); -} - -bool AbstractRuleLearner::canPredictProbabilities(const IRowWiseFeatureMatrix& featureMatrix, uint32 numLabels) const { - return this->createProbabilityPredictorFactory(featureMatrix, numLabels) != nullptr; -} - -std::unique_ptr AbstractRuleLearner::createProbabilityPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const ITrainingResult& trainingResult) const { - return this->createProbabilityPredictor( - featureMatrix, *trainingResult.getRuleModel(), *trainingResult.getLabelSpaceInfo(), - *trainingResult.getMarginalProbabilityCalibrationModel(), *trainingResult.getJointProbabilityCalibrationModel(), - trainingResult.getNumLabels()); -} - -std::unique_ptr AbstractRuleLearner::createProbabilityPredictor( - const IRowWiseFeatureMatrix& featureMatrix, const IRuleModel& ruleModel, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - std::unique_ptr predictorFactoryPtr = - this->createProbabilityPredictorFactory(featureMatrix, numLabels); - - if (predictorFactoryPtr) { - return featureMatrix.createProbabilityPredictor(*predictorFactoryPtr, ruleModel, labelSpaceInfo, - marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); - } - - throw std::runtime_error("The rule learner does not support to predict probability estimates"); -} diff --git a/cpp/subprojects/common/src/common/model/body_conjunctive.cpp b/cpp/subprojects/common/src/common/model/body_conjunctive.cpp deleted file mode 100644 index 55f5c117..00000000 --- a/cpp/subprojects/common/src/common/model/body_conjunctive.cpp +++ /dev/null @@ -1,274 +0,0 @@ -#include "common/model/body_conjunctive.hpp" - -ConjunctiveBody::ConjunctiveBody(uint32 numLeq, uint32 numGr, uint32 numEq, uint32 numNeq) - : numLeq_(numLeq), leqFeatureIndices_(new uint32[numLeq_]), leqThresholds_(new float32[numLeq_]), numGr_(numGr), - grFeatureIndices_(new uint32[numGr_]), grThresholds_(new float32[numGr_]), numEq_(numEq), - eqFeatureIndices_(new uint32[numEq_]), eqThresholds_(new float32[numEq_]), numNeq_(numNeq), - neqFeatureIndices_(new uint32[numNeq_]), neqThresholds_(new float32[numNeq_]) {} - -ConjunctiveBody::~ConjunctiveBody() { - delete[] leqFeatureIndices_; - delete[] leqThresholds_; - delete[] grFeatureIndices_; - delete[] grThresholds_; - delete[] eqFeatureIndices_; - delete[] eqThresholds_; - delete[] neqFeatureIndices_; - delete[] neqThresholds_; -} - -uint32 ConjunctiveBody::getNumLeq() const { - return numLeq_; -} - -ConjunctiveBody::threshold_iterator ConjunctiveBody::leq_thresholds_begin() { - return leqThresholds_; -} - -ConjunctiveBody::threshold_iterator ConjunctiveBody::leq_thresholds_end() { - return &leqThresholds_[numLeq_]; -} - -ConjunctiveBody::threshold_const_iterator ConjunctiveBody::leq_thresholds_cbegin() const { - return leqThresholds_; -} - -ConjunctiveBody::threshold_const_iterator ConjunctiveBody::leq_thresholds_cend() const { - return &leqThresholds_[numLeq_]; -} - -ConjunctiveBody::index_iterator ConjunctiveBody::leq_indices_begin() { - return leqFeatureIndices_; -} - -ConjunctiveBody::index_iterator ConjunctiveBody::leq_indices_end() { - return &leqFeatureIndices_[numLeq_]; -} - -ConjunctiveBody::index_const_iterator ConjunctiveBody::leq_indices_cbegin() const { - return leqFeatureIndices_; -} - -ConjunctiveBody::index_const_iterator ConjunctiveBody::leq_indices_cend() const { - return &leqFeatureIndices_[numLeq_]; -} - -uint32 ConjunctiveBody::getNumGr() const { - return numGr_; -} - -ConjunctiveBody::threshold_iterator ConjunctiveBody::gr_thresholds_begin() { - return grThresholds_; -} - -ConjunctiveBody::threshold_iterator ConjunctiveBody::gr_thresholds_end() { - return &grThresholds_[numLeq_]; -} - -ConjunctiveBody::threshold_const_iterator ConjunctiveBody::gr_thresholds_cbegin() const { - return grThresholds_; -} - -ConjunctiveBody::threshold_const_iterator ConjunctiveBody::gr_thresholds_cend() const { - return &grThresholds_[numLeq_]; -} - -ConjunctiveBody::index_iterator ConjunctiveBody::gr_indices_begin() { - return grFeatureIndices_; -} - -ConjunctiveBody::index_iterator ConjunctiveBody::gr_indices_end() { - return &grFeatureIndices_[numLeq_]; -} - -ConjunctiveBody::index_const_iterator ConjunctiveBody::gr_indices_cbegin() const { - return grFeatureIndices_; -} - -ConjunctiveBody::index_const_iterator ConjunctiveBody::gr_indices_cend() const { - return &grFeatureIndices_[numLeq_]; -} - -uint32 ConjunctiveBody::getNumEq() const { - return numEq_; -} - -ConjunctiveBody::threshold_iterator ConjunctiveBody::eq_thresholds_begin() { - return eqThresholds_; -} - -ConjunctiveBody::threshold_iterator ConjunctiveBody::eq_thresholds_end() { - return &eqThresholds_[numLeq_]; -} - -ConjunctiveBody::threshold_const_iterator ConjunctiveBody::eq_thresholds_cbegin() const { - return eqThresholds_; -} - -ConjunctiveBody::threshold_const_iterator ConjunctiveBody::eq_thresholds_cend() const { - return &eqThresholds_[numLeq_]; -} - -ConjunctiveBody::index_iterator ConjunctiveBody::eq_indices_begin() { - return eqFeatureIndices_; -} - -ConjunctiveBody::index_iterator ConjunctiveBody::eq_indices_end() { - return &eqFeatureIndices_[numLeq_]; -} - -ConjunctiveBody::index_const_iterator ConjunctiveBody::eq_indices_cbegin() const { - return eqFeatureIndices_; -} - -ConjunctiveBody::index_const_iterator ConjunctiveBody::eq_indices_cend() const { - return &eqFeatureIndices_[numLeq_]; -} - -uint32 ConjunctiveBody::getNumNeq() const { - return numNeq_; -} - -ConjunctiveBody::threshold_iterator ConjunctiveBody::neq_thresholds_begin() { - return neqThresholds_; -} - -ConjunctiveBody::threshold_iterator ConjunctiveBody::neq_thresholds_end() { - return &neqThresholds_[numLeq_]; -} - -ConjunctiveBody::threshold_const_iterator ConjunctiveBody::neq_thresholds_cbegin() const { - return neqThresholds_; -} - -ConjunctiveBody::threshold_const_iterator ConjunctiveBody::neq_thresholds_cend() const { - return &neqThresholds_[numLeq_]; -} - -ConjunctiveBody::index_iterator ConjunctiveBody::neq_indices_begin() { - return neqFeatureIndices_; -} - -ConjunctiveBody::index_iterator ConjunctiveBody::neq_indices_end() { - return &neqFeatureIndices_[numLeq_]; -} - -ConjunctiveBody::index_const_iterator ConjunctiveBody::neq_indices_cbegin() const { - return neqFeatureIndices_; -} - -ConjunctiveBody::index_const_iterator ConjunctiveBody::neq_indices_cend() const { - return &neqFeatureIndices_[numLeq_]; -} - -bool ConjunctiveBody::covers(VectorConstView::const_iterator begin, - VectorConstView::const_iterator end) const { - // Test conditions using the <= operator... - for (uint32 i = 0; i < numLeq_; i++) { - uint32 featureIndex = leqFeatureIndices_[i]; - float32 threshold = leqThresholds_[i]; - - if (begin[featureIndex] > threshold) { - return false; - } - } - - // Test conditions using the > operator... - for (uint32 i = 0; i < numGr_; i++) { - uint32 featureIndex = grFeatureIndices_[i]; - float32 threshold = grThresholds_[i]; - - if (begin[featureIndex] <= threshold) { - return false; - } - } - - // Test conditions using the == operator... - for (uint32 i = 0; i < numEq_; i++) { - uint32 featureIndex = eqFeatureIndices_[i]; - float32 threshold = eqThresholds_[i]; - - if (begin[featureIndex] != threshold) { - return false; - } - } - - // Test conditions using the != operator... - for (uint32 i = 0; i < numNeq_; i++) { - uint32 featureIndex = neqFeatureIndices_[i]; - float32 threshold = neqThresholds_[i]; - - if (begin[featureIndex] == threshold) { - return false; - } - } - - return true; -} - -bool ConjunctiveBody::covers(CsrConstView::index_const_iterator indicesBegin, - CsrConstView::index_const_iterator indicesEnd, - CsrConstView::value_const_iterator valuesBegin, - CsrConstView::value_const_iterator valuesEnd, float32* tmpArray1, - uint32* tmpArray2, uint32 n) const { - // Copy non-zero feature values to the temporary arrays... - auto valueIterator = valuesBegin; - - for (auto indexIterator = indicesBegin; indexIterator != indicesEnd; indexIterator++) { - uint32 featureIndex = *indexIterator; - float32 featureValue = *valueIterator; - tmpArray1[featureIndex] = featureValue; - tmpArray2[featureIndex] = n; - valueIterator++; - } - - // Test conditions using the <= operator... - for (uint32 i = 0; i < numLeq_; i++) { - uint32 featureIndex = leqFeatureIndices_[i]; - float32 threshold = leqThresholds_[i]; - float32 featureValue = tmpArray2[featureIndex] == n ? tmpArray1[featureIndex] : 0; - - if (featureValue > threshold) { - return false; - } - } - - // Test conditions using the > operator... - for (uint32 i = 0; i < numGr_; i++) { - uint32 featureIndex = grFeatureIndices_[i]; - float32 threshold = grThresholds_[i]; - float32 featureValue = tmpArray2[featureIndex] == n ? tmpArray1[featureIndex] : 0; - - if (featureValue <= threshold) { - return false; - } - } - - // Test conditions using the == operator... - for (uint32 i = 0; i < numEq_; i++) { - uint32 featureIndex = eqFeatureIndices_[i]; - float32 threshold = eqThresholds_[i]; - float32 featureValue = tmpArray2[featureIndex] == n ? tmpArray1[featureIndex] : 0; - - if (featureValue != threshold) { - return false; - } - } - - // Test conditions using the != operator... - for (uint32 i = 0; i < numNeq_; i++) { - uint32 featureIndex = neqFeatureIndices_[i]; - float32 threshold = neqThresholds_[i]; - float32 featureValue = tmpArray2[featureIndex] == n ? tmpArray1[featureIndex] : 0; - - if (featureValue == threshold) { - return false; - } - } - - return true; -} - -void ConjunctiveBody::visit(EmptyBodyVisitor emptyBodyVisitor, ConjunctiveBodyVisitor conjunctiveBodyVisitor) const { - conjunctiveBodyVisitor(*this); -} diff --git a/cpp/subprojects/common/src/common/model/body_empty.cpp b/cpp/subprojects/common/src/common/model/body_empty.cpp deleted file mode 100644 index 9268627a..00000000 --- a/cpp/subprojects/common/src/common/model/body_empty.cpp +++ /dev/null @@ -1,18 +0,0 @@ -#include "common/model/body_empty.hpp" - -bool EmptyBody::covers(VectorConstView::const_iterator begin, - VectorConstView::const_iterator end) const { - return true; -} - -bool EmptyBody::covers(CsrConstView::index_const_iterator indicesBegin, - CsrConstView::index_const_iterator indicesEnd, - CsrConstView::value_const_iterator valuesBegin, - CsrConstView::value_const_iterator valuesEnd, float32* tmpArray1, - uint32* tmpArray2, uint32 n) const { - return true; -} - -void EmptyBody::visit(EmptyBodyVisitor emptyBodyVisitor, ConjunctiveBodyVisitor conjunctiveBodyVisitor) const { - emptyBodyVisitor(*this); -} diff --git a/cpp/subprojects/common/src/common/model/condition_list.cpp b/cpp/subprojects/common/src/common/model/condition_list.cpp deleted file mode 100644 index 8fff0cb9..00000000 --- a/cpp/subprojects/common/src/common/model/condition_list.cpp +++ /dev/null @@ -1,80 +0,0 @@ -#include "common/model/condition_list.hpp" - -ConditionList::ConditionList() : numConditionsPerComparator_({0, 0, 0, 0}) {} - -ConditionList::ConditionList(const ConditionList& conditionList) - : vector_(conditionList.vector_), - numConditionsPerComparator_( - {conditionList.numConditionsPerComparator_[0], conditionList.numConditionsPerComparator_[1], - conditionList.numConditionsPerComparator_[2], conditionList.numConditionsPerComparator_[3]}) {} - -ConditionList::const_iterator ConditionList::cbegin() const { - return vector_.cbegin(); -} - -ConditionList::const_iterator ConditionList::cend() const { - return vector_.cend(); -} - -uint32 ConditionList::getNumConditions() const { - return (uint32) vector_.size(); -} - -void ConditionList::addCondition(const Condition& condition) { - numConditionsPerComparator_[condition.comparator] += 1; - vector_.emplace_back(condition); -} - -void ConditionList::removeLastCondition() { - const Condition& condition = vector_.back(); - numConditionsPerComparator_[condition.comparator] -= 1; - vector_.pop_back(); -}; - -std::unique_ptr ConditionList::createConjunctiveBody() const { - std::unique_ptr bodyPtr = - std::make_unique(numConditionsPerComparator_[LEQ], numConditionsPerComparator_[GR], - numConditionsPerComparator_[EQ], numConditionsPerComparator_[NEQ]); - uint32 leqIndex = 0; - uint32 grIndex = 0; - uint32 eqIndex = 0; - uint32 neqIndex = 0; - - for (auto it = vector_.cbegin(); it != vector_.cend(); it++) { - const Condition& condition = *it; - uint32 featureIndex = condition.featureIndex; - float32 threshold = condition.threshold; - - switch (condition.comparator) { - case LEQ: { - bodyPtr->leq_indices_begin()[leqIndex] = featureIndex; - bodyPtr->leq_thresholds_begin()[leqIndex] = threshold; - leqIndex++; - break; - } - case GR: { - bodyPtr->gr_indices_begin()[grIndex] = featureIndex; - bodyPtr->gr_thresholds_begin()[grIndex] = threshold; - grIndex++; - break; - } - case EQ: { - bodyPtr->eq_indices_begin()[eqIndex] = featureIndex; - bodyPtr->eq_thresholds_begin()[eqIndex] = threshold; - eqIndex++; - break; - } - case NEQ: { - bodyPtr->neq_indices_begin()[neqIndex] = featureIndex; - bodyPtr->neq_thresholds_begin()[neqIndex] = threshold; - neqIndex++; - break; - } - default: { - break; - } - } - } - - return bodyPtr; -} diff --git a/cpp/subprojects/common/src/common/model/head_complete.cpp b/cpp/subprojects/common/src/common/model/head_complete.cpp deleted file mode 100644 index b7f1189c..00000000 --- a/cpp/subprojects/common/src/common/model/head_complete.cpp +++ /dev/null @@ -1,31 +0,0 @@ -#include "common/model/head_complete.hpp" - -CompleteHead::CompleteHead(uint32 numElements) : numElements_(numElements), scores_(new float64[numElements]) {} - -CompleteHead::~CompleteHead() { - delete[] scores_; -} - -uint32 CompleteHead::getNumElements() const { - return numElements_; -} - -CompleteHead::score_iterator CompleteHead::scores_begin() { - return scores_; -} - -CompleteHead::score_iterator CompleteHead::scores_end() { - return &scores_[numElements_]; -} - -CompleteHead::score_const_iterator CompleteHead::scores_cbegin() const { - return scores_; -} - -CompleteHead::score_const_iterator CompleteHead::scores_cend() const { - return &scores_[numElements_]; -} - -void CompleteHead::visit(CompleteHeadVisitor completeHeadVisitor, PartialHeadVisitor partialHeadVisitor) const { - completeHeadVisitor(*this); -} diff --git a/cpp/subprojects/common/src/common/model/head_partial.cpp b/cpp/subprojects/common/src/common/model/head_partial.cpp deleted file mode 100644 index 11b4db28..00000000 --- a/cpp/subprojects/common/src/common/model/head_partial.cpp +++ /dev/null @@ -1,49 +0,0 @@ -#include "common/model/head_partial.hpp" - -PartialHead::PartialHead(uint32 numElements) - : numElements_(numElements), scores_(new float64[numElements]), labelIndices_(new uint32[numElements]) {} - -PartialHead::~PartialHead() { - delete[] scores_; - delete[] labelIndices_; -} - -uint32 PartialHead::getNumElements() const { - return numElements_; -} - -PartialHead::score_iterator PartialHead::scores_begin() { - return scores_; -} - -PartialHead::score_iterator PartialHead::scores_end() { - return &scores_[numElements_]; -} - -PartialHead::score_const_iterator PartialHead::scores_cbegin() const { - return scores_; -} - -PartialHead::score_const_iterator PartialHead::scores_cend() const { - return &scores_[numElements_]; -} - -PartialHead::index_iterator PartialHead::indices_begin() { - return labelIndices_; -} - -PartialHead::index_iterator PartialHead::indices_end() { - return &labelIndices_[numElements_]; -} - -PartialHead::index_const_iterator PartialHead::indices_cbegin() const { - return labelIndices_; -} - -PartialHead::index_const_iterator PartialHead::indices_cend() const { - return &labelIndices_[numElements_]; -} - -void PartialHead::visit(CompleteHeadVisitor completeHeadVisitor, PartialHeadVisitor partialHeadVisitor) const { - partialHeadVisitor(*this); -} diff --git a/cpp/subprojects/common/src/common/model/rule_list.cpp b/cpp/subprojects/common/src/common/model/rule_list.cpp deleted file mode 100644 index 77d4917e..00000000 --- a/cpp/subprojects/common/src/common/model/rule_list.cpp +++ /dev/null @@ -1,221 +0,0 @@ -#include "common/model/rule_list.hpp" - -#include "common/model/body_empty.hpp" -#include "common/prediction/label_space_info.hpp" -#include "common/prediction/predictor_binary.hpp" -#include "common/prediction/predictor_probability.hpp" -#include "common/prediction/predictor_score.hpp" - -RuleList::Rule::Rule(std::unique_ptr bodyPtr, std::unique_ptr headPtr) - : bodyPtr_(std::move(bodyPtr)), headPtr_(std::move(headPtr)) {} - -const IBody& RuleList::Rule::getBody() const { - return *bodyPtr_; -} - -const IHead& RuleList::Rule::getHead() const { - return *headPtr_; -} - -void RuleList::Rule::visit(IBody::EmptyBodyVisitor emptyBodyVisitor, - IBody::ConjunctiveBodyVisitor conjunctiveBodyVisitor, - IHead::CompleteHeadVisitor completeHeadVisitor, - IHead::PartialHeadVisitor partialHeadVisitor) const { - bodyPtr_->visit(emptyBodyVisitor, conjunctiveBodyVisitor); - headPtr_->visit(completeHeadVisitor, partialHeadVisitor); -} - -RuleList::ConstIterator::ConstIterator(bool defaultRuleTakesPrecedence, const Rule* defaultRule, - std::vector::const_iterator iterator, uint32 start, uint32 end) - : defaultRule_(defaultRule), iterator_(iterator), - offset_(defaultRuleTakesPrecedence && defaultRule != nullptr ? 1 : 0), - defaultRuleIndex_(offset_ > 0 ? 0 : end - (defaultRule != nullptr ? 1 : 0)), index_(start) {} - -RuleList::ConstIterator::reference RuleList::ConstIterator::operator*() const { - uint32 index = index_; - - if (index == defaultRuleIndex_) { - return *defaultRule_; - } else { - return iterator_[index - offset_]; - } -} - -RuleList::ConstIterator& RuleList::ConstIterator::operator++() { - ++index_; - return *this; -} - -RuleList::ConstIterator& RuleList::ConstIterator::operator++(int n) { - index_++; - return *this; -} - -RuleList::ConstIterator RuleList::ConstIterator::operator+(const uint32 difference) const { - ConstIterator iterator(*this); - iterator += difference; - return iterator; -} - -RuleList::ConstIterator& RuleList::ConstIterator::operator+=(const uint32 difference) { - index_ += difference; - return *this; -} - -bool RuleList::ConstIterator::operator!=(const ConstIterator& rhs) const { - return index_ != rhs.index_; -} - -bool RuleList::ConstIterator::operator==(const ConstIterator& rhs) const { - return index_ == rhs.index_; -} - -RuleList::ConstIterator::difference_type RuleList::ConstIterator::operator-(const ConstIterator& rhs) const { - return index_ - rhs.index_; -} - -RuleList::RuleList(bool defaultRuleTakesPrecedence) - : numUsedRules_(0), defaultRuleTakesPrecedence_(defaultRuleTakesPrecedence) {} - -RuleList::const_iterator RuleList::cbegin(uint32 maxRules) const { - uint32 numRules = maxRules > 0 ? std::min(this->getNumRules(), maxRules) : this->getNumRules(); - return ConstIterator(defaultRuleTakesPrecedence_, defaultRulePtr_.get(), ruleList_.cbegin(), 0, numRules); -} - -RuleList::const_iterator RuleList::cend(uint32 maxRules) const { - uint32 numRules = maxRules > 0 ? std::min(this->getNumRules(), maxRules) : this->getNumRules(); - return ConstIterator(defaultRuleTakesPrecedence_, defaultRulePtr_.get(), ruleList_.cbegin(), numRules, numRules); -} - -RuleList::const_iterator RuleList::used_cbegin(uint32 maxRules) const { - uint32 numRules = maxRules > 0 ? std::min(this->getNumUsedRules(), maxRules) : this->getNumUsedRules(); - return ConstIterator(defaultRuleTakesPrecedence_, defaultRulePtr_.get(), ruleList_.cbegin(), 0, numRules); -} - -RuleList::const_iterator RuleList::used_cend(uint32 maxRules) const { - uint32 numRules = maxRules > 0 ? std::min(this->getNumUsedRules(), maxRules) : this->getNumUsedRules(); - return ConstIterator(defaultRuleTakesPrecedence_, defaultRulePtr_.get(), ruleList_.cbegin(), numRules, numRules); -} - -uint32 RuleList::getNumRules() const { - uint32 numRules = (uint32) ruleList_.size(); - - if (this->containsDefaultRule()) { - numRules++; - } - - return numRules; -} - -uint32 RuleList::getNumUsedRules() const { - return numUsedRules_ > 0 ? numUsedRules_ : this->getNumRules(); -} - -void RuleList::setNumUsedRules(uint32 numUsedRules) { - numUsedRules_ = numUsedRules; -} - -void RuleList::addDefaultRule(std::unique_ptr headPtr) { - defaultRulePtr_ = std::make_unique(std::make_unique(), std::move(headPtr)); -} - -void RuleList::addRule(std::unique_ptr bodyPtr, std::unique_ptr headPtr) { - ruleList_.emplace_back(std::move(bodyPtr), std::move(headPtr)); -} - -bool RuleList::containsDefaultRule() const { - return defaultRulePtr_ != nullptr; -} - -bool RuleList::isDefaultRuleTakingPrecedence() const { - return defaultRuleTakesPrecedence_; -} - -void RuleList::visit(IBody::EmptyBodyVisitor emptyBodyVisitor, IBody::ConjunctiveBodyVisitor conjunctiveBodyVisitor, - IHead::CompleteHeadVisitor completeHeadVisitor, - IHead::PartialHeadVisitor partialHeadVisitor) const { - for (auto it = this->cbegin(); it != this->cend(); it++) { - const Rule& rule = *it; - rule.visit(emptyBodyVisitor, conjunctiveBodyVisitor, completeHeadVisitor, partialHeadVisitor); - } -} - -void RuleList::visitUsed(IBody::EmptyBodyVisitor emptyBodyVisitor, IBody::ConjunctiveBodyVisitor conjunctiveBodyVisitor, - IHead::CompleteHeadVisitor completeHeadVisitor, - IHead::PartialHeadVisitor partialHeadVisitor) const { - for (auto it = this->used_cbegin(); it != this->used_cend(); it++) { - const Rule& rule = *it; - rule.visit(emptyBodyVisitor, conjunctiveBodyVisitor, completeHeadVisitor, partialHeadVisitor); - } -} - -std::unique_ptr RuleList::createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return labelSpaceInfo.createBinaryPredictor(factory, featureMatrix, *this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr RuleList::createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return labelSpaceInfo.createBinaryPredictor(factory, featureMatrix, *this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr RuleList::createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return labelSpaceInfo.createSparseBinaryPredictor( - factory, featureMatrix, *this, marginalProbabilityCalibrationModel, jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr RuleList::createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return labelSpaceInfo.createSparseBinaryPredictor( - factory, featureMatrix, *this, marginalProbabilityCalibrationModel, jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr RuleList::createScorePredictor(const IScorePredictorFactory& factory, - const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const { - return labelSpaceInfo.createScorePredictor(factory, featureMatrix, *this, numLabels); -} - -std::unique_ptr RuleList::createScorePredictor(const IScorePredictorFactory& factory, - const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - uint32 numLabels) const { - return labelSpaceInfo.createScorePredictor(factory, featureMatrix, *this, numLabels); -} - -std::unique_ptr RuleList::createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return labelSpaceInfo.createProbabilityPredictor(factory, featureMatrix, *this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr RuleList::createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, - const ILabelSpaceInfo& labelSpaceInfo, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return labelSpaceInfo.createProbabilityPredictor(factory, featureMatrix, *this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr createRuleList(bool defaultRuleTakesPrecedence) { - return std::make_unique(defaultRuleTakesPrecedence); -} diff --git a/cpp/subprojects/common/src/common/multi_threading/multi_threading_manual.cpp b/cpp/subprojects/common/src/common/multi_threading/multi_threading_manual.cpp deleted file mode 100644 index 264b27c9..00000000 --- a/cpp/subprojects/common/src/common/multi_threading/multi_threading_manual.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include "common/multi_threading/multi_threading_manual.hpp" - -#include "common/util/threads.hpp" -#include "common/util/validation.hpp" - -ManualMultiThreadingConfig::ManualMultiThreadingConfig() : numThreads_(0) {} - -uint32 ManualMultiThreadingConfig::getNumThreads() const { - return numThreads_; -} - -IManualMultiThreadingConfig& ManualMultiThreadingConfig::setNumThreads(uint32 numThreads) { - if (numThreads != 0) assertGreaterOrEqual("numThreads", numThreads, 1); - numThreads_ = numThreads; - return *this; -} - -uint32 ManualMultiThreadingConfig::getNumThreads(const IFeatureMatrix& featureMatrix, uint32 numLabels) const { - return getNumAvailableThreads(numThreads_); -} diff --git a/cpp/subprojects/common/src/common/multi_threading/multi_threading_no.cpp b/cpp/subprojects/common/src/common/multi_threading/multi_threading_no.cpp deleted file mode 100644 index ada05b97..00000000 --- a/cpp/subprojects/common/src/common/multi_threading/multi_threading_no.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "common/multi_threading/multi_threading_no.hpp" - -uint32 NoMultiThreadingConfig::getNumThreads(const IFeatureMatrix& featureMatrix, uint32 numLabels) const { - return 1; -} diff --git a/cpp/subprojects/common/src/common/post_optimization/model_builder_intermediate.cpp b/cpp/subprojects/common/src/common/post_optimization/model_builder_intermediate.cpp deleted file mode 100644 index ed5dfde0..00000000 --- a/cpp/subprojects/common/src/common/post_optimization/model_builder_intermediate.cpp +++ /dev/null @@ -1,59 +0,0 @@ -#include "common/post_optimization/model_builder_intermediate.hpp" - -IntermediateModelBuilder::IntermediateModelBuilder(std::unique_ptr modelBuilderPtr) - : modelBuilderPtr_(std::move(modelBuilderPtr)), numUsedRules_(0) {} - -IntermediateModelBuilder::iterator IntermediateModelBuilder::begin() { - return intermediateRuleList_.begin(); -} - -IntermediateModelBuilder::iterator IntermediateModelBuilder::end() { - return intermediateRuleList_.end(); -} - -void IntermediateModelBuilder::setDefaultRule(std::unique_ptr& predictionPtr) { - defaultPredictionPtr_ = std::move(predictionPtr); -} - -void IntermediateModelBuilder::addRule(std::unique_ptr& conditionListPtr, - std::unique_ptr& predictionPtr) { - intermediateRuleList_.emplace_back(std::move(conditionListPtr), std::move(predictionPtr)); -} - -void IntermediateModelBuilder::removeLastRule() { - intermediateRuleList_.pop_back(); -} - -uint32 IntermediateModelBuilder::getNumRules() const { - uint32 numRules = (uint32) intermediateRuleList_.size(); - - if (defaultPredictionPtr_) { - numRules++; - } - - return numRules; -} - -uint32 IntermediateModelBuilder::getNumUsedRules() const { - return numUsedRules_; -} - -void IntermediateModelBuilder::setNumUsedRules(uint32 numUsedRules) { - numUsedRules_ = numUsedRules; -} - -std::unique_ptr IntermediateModelBuilder::buildModel() { - if (defaultPredictionPtr_) { - modelBuilderPtr_->setDefaultRule(defaultPredictionPtr_); - defaultPredictionPtr_.release(); - } - - for (auto it = intermediateRuleList_.begin(); it != intermediateRuleList_.end(); it++) { - IntermediateRule& intermediateRule = *it; - modelBuilderPtr_->addRule(intermediateRule.first, intermediateRule.second); - } - - intermediateRuleList_.clear(); - modelBuilderPtr_->setNumUsedRules(numUsedRules_); - return modelBuilderPtr_->buildModel(); -} diff --git a/cpp/subprojects/common/src/common/post_optimization/post_optimization_phase_list.cpp b/cpp/subprojects/common/src/common/post_optimization/post_optimization_phase_list.cpp deleted file mode 100644 index 51c300e1..00000000 --- a/cpp/subprojects/common/src/common/post_optimization/post_optimization_phase_list.cpp +++ /dev/null @@ -1,96 +0,0 @@ -#include "common/post_optimization/post_optimization_phase_list.hpp" - -/** - * An implementation of the class `IPostOptimization` that carries out several post-optimization phases. - */ -class PostOptimizationPhaseList final : public IPostOptimization { - private: - - const std::unique_ptr intermediateModelBuilderPtr_; - - std::vector> postOptimizationPhases_; - - public: - - /** - * @param modelBuilderPtr An unique pointer to an object of type `IModelBuilder` that should - * be used to build the final model - * @param postOptimizationPhaseFactories A reference to a vector that stores the factories that allow to - * create instances of the optimization phases to be carried out - */ - PostOptimizationPhaseList( - std::unique_ptr modelBuilderPtr, - const std::vector>& postOptimizationPhaseFactories) - : intermediateModelBuilderPtr_(std::make_unique(std::move(modelBuilderPtr))) { - postOptimizationPhases_.reserve(postOptimizationPhaseFactories.size()); - - for (auto it = postOptimizationPhaseFactories.cbegin(); it != postOptimizationPhaseFactories.cend(); it++) { - const std::unique_ptr& postOptimizationPhaseFactoryPtr = *it; - std::unique_ptr postOptimizationPhasePtr = - postOptimizationPhaseFactoryPtr->create(*intermediateModelBuilderPtr_); - postOptimizationPhases_.push_back(std::move(postOptimizationPhasePtr)); - } - } - - IModelBuilder& getModelBuilder() const override { - return *intermediateModelBuilderPtr_; - } - - void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, - ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, - IFeatureSampling& featureSampling, const IRulePruning& rulePruning, - const IPostProcessor& postProcessor, RNG& rng) const override { - for (auto it = postOptimizationPhases_.cbegin(); it != postOptimizationPhases_.cend(); it++) { - const std::unique_ptr& postOptimizationPhasePtr = *it; - postOptimizationPhasePtr->optimizeModel(thresholds, ruleInduction, partition, labelSampling, - instanceSampling, featureSampling, rulePruning, postProcessor, - rng); - } - } -}; - -/** - * An implementation of the class `IPostOptimization` that does not perform any optimizations, but retains a previously - * learned rule-based model. - */ -class NoPostOptimization final : public IPostOptimization { - private: - - const std::unique_ptr modelBuilderPtr_; - - public: - - /** - * @param modelBuilderPtr An unique pointer to an object of type `IModelBuilder` that should be used to build - * the model - */ - NoPostOptimization(std::unique_ptr modelBuilderPtr) - : modelBuilderPtr_(std::move(modelBuilderPtr)) {} - - IModelBuilder& getModelBuilder() const override { - return *modelBuilderPtr_; - } - - void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, - ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, - IFeatureSampling& featureSampling, const IRulePruning& rulePruning, - const IPostProcessor& postProcessor, RNG& rng) const override { - return; - } -}; - -void PostOptimizationPhaseListFactory::addPostOptimizationPhaseFactory( - std::unique_ptr postOptimizationPhaseFactoryPtr) { - postOptimizationPhaseFactories_.push_back(std::move(postOptimizationPhaseFactoryPtr)); -} - -std::unique_ptr PostOptimizationPhaseListFactory::create( - const IModelBuilderFactory& modelBuilderFactory) const { - std::unique_ptr modelBuilderPtr = modelBuilderFactory.create(); - - if (postOptimizationPhaseFactories_.empty()) { - return std::make_unique(std::move(modelBuilderPtr)); - } else { - return std::make_unique(std::move(modelBuilderPtr), postOptimizationPhaseFactories_); - } -} diff --git a/cpp/subprojects/common/src/common/post_optimization/post_optimization_sequential.cpp b/cpp/subprojects/common/src/common/post_optimization/post_optimization_sequential.cpp deleted file mode 100644 index 61218117..00000000 --- a/cpp/subprojects/common/src/common/post_optimization/post_optimization_sequential.cpp +++ /dev/null @@ -1,190 +0,0 @@ -#include "common/post_optimization/post_optimization_sequential.hpp" - -#include "common/sampling/feature_sampling_predefined.hpp" -#include "common/util/validation.hpp" - -#include - -/** - * An implementation of the class `IModelBuilder` that allows to replace a single rule of an `IntermediateModelBuilder`. - */ -class RuleReplacementBuilder final : public IModelBuilder { - private: - - IntermediateModelBuilder::IntermediateRule& intermediateRule_; - - public: - - /** - * @param intermediateRule A reference to an object of type `IntermediateModelBuilder::IntermediateRule` that - * should be replaced - */ - RuleReplacementBuilder(IntermediateModelBuilder::IntermediateRule& intermediateRule) - : intermediateRule_(intermediateRule) {} - - void setDefaultRule(std::unique_ptr& predictionPtr) override {} - - void addRule(std::unique_ptr& conditionListPtr, - std::unique_ptr& predictionPtr) override { - intermediateRule_.first = std::move(conditionListPtr); - intermediateRule_.second = std::move(predictionPtr); - } - - void setNumUsedRules(uint32 numUsedRules) override {} - - std::unique_ptr buildModel() override { - return nullptr; - } -}; - -/** - * An implementation of the class `IPostOptimizationPhase` that optimizes each rule in a model by relearning it in the - * context of the other rules. - */ -class SequentialPostOptimization final : public IPostOptimizationPhase { - private: - - IntermediateModelBuilder& modelBuilder_; - - const uint32 numIterations_; - - const bool refineHeads_; - - const bool resampleFeatures_; - - public: - - /** - * @param modelBuilder A reference to an object of type `IntermediateModelBuilder` that provides access to - * the existing rules - * @param numIterations The number of iterations to be performed. Must be at least 1 - * @param refineHeads True, if the heads of rules should be refined when being relearned, false otherwise - * @param resampleFeatures True, if a new sample of the available features should be created when refining a - * new rule, false otherwise - */ - SequentialPostOptimization(IntermediateModelBuilder& modelBuilder, uint32 numIterations, bool refineHeads, - bool resampleFeatures) - : modelBuilder_(modelBuilder), numIterations_(numIterations), refineHeads_(refineHeads), - resampleFeatures_(resampleFeatures) {} - - void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, - ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, - IFeatureSampling& featureSampling, const IRulePruning& rulePruning, - const IPostProcessor& postProcessor, RNG& rng) const override { - for (uint32 i = 0; i < numIterations_; i++) { - for (auto it = modelBuilder_.begin(); it != modelBuilder_.end(); it++) { - IntermediateModelBuilder::IntermediateRule& intermediateRule = *it; - const ConditionList& conditionList = *intermediateRule.first; - const AbstractEvaluatedPrediction& prediction = *intermediateRule.second; - - // Create a new subset of the given thresholds... - const IWeightVector& weights = instanceSampling.sample(rng); - std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(thresholds); - - // Filter the thresholds subset according to the conditions of the current rule... - for (auto it2 = conditionList.cbegin(); it2 != conditionList.cend(); it2++) { - const Condition& condition = *it2; - thresholdsSubsetPtr->filterThresholds(condition); - } - - // Revert the statistics based on the predictions of the current rule... - thresholdsSubsetPtr->revertPrediction(prediction); - - // Learn a new rule... - const IIndexVector& labelIndices = refineHeads_ ? labelSampling.sample(rng) : prediction; - RuleReplacementBuilder ruleReplacementBuilder(intermediateRule); - - if (resampleFeatures_) { - ruleInduction.induceRule(thresholds, labelIndices, weights, partition, featureSampling, - rulePruning, postProcessor, rng, ruleReplacementBuilder); - } else { - std::unordered_set uniqueFeatureIndices; - - for (auto it2 = conditionList.cbegin(); it2 != conditionList.cend(); it2++) { - const Condition& condition = *it2; - uniqueFeatureIndices.emplace(condition.featureIndex); - } - - PartialIndexVector indexVector(uniqueFeatureIndices.size()); - PartialIndexVector::iterator indexIterator = indexVector.begin(); - - for (auto it2 = uniqueFeatureIndices.cbegin(); it2 != uniqueFeatureIndices.cend(); it2++) { - *indexIterator = *it2; - indexIterator++; - } - - PredefinedFeatureSampling predefinedFeatureSampling(indexVector); - ruleInduction.induceRule(thresholds, labelIndices, weights, partition, - predefinedFeatureSampling, rulePruning, postProcessor, rng, - ruleReplacementBuilder); - } - } - } - } -}; - -/** - * Allows to create instances of the type `IPostOptimizationPhase` that optimize each rule in a model by relearning it - * in the context of the other rules. - */ -class SequentialPostOptimizationFactory final : public IPostOptimizationPhaseFactory { - private: - - const uint32 numIterations_; - - const bool refineHeads_; - - const bool resampleFeatures_; - - public: - - /** - * @param numIterations The number of iterations to be performed. Must be at least 1 - * @param refineHeads True, if the heads of rules should be refined when being relearned, false otherwise - * @param resampleFeatures True, if a new sample of the available features should be created when refining a - * new rule, false otherwise - */ - SequentialPostOptimizationFactory(uint32 numIterations, bool refineHeads, bool resampleFeatures) - : numIterations_(numIterations), refineHeads_(refineHeads), resampleFeatures_(resampleFeatures) {} - - std::unique_ptr create(IntermediateModelBuilder& modelBuilder) const override { - return std::make_unique(modelBuilder, numIterations_, refineHeads_, - resampleFeatures_); - } -}; - -SequentialPostOptimizationConfig::SequentialPostOptimizationConfig() - : numIterations_(2), refineHeads_(false), resampleFeatures_(true) {} - -uint32 SequentialPostOptimizationConfig::getNumIterations() const { - return numIterations_; -} - -ISequentialPostOptimizationConfig& SequentialPostOptimizationConfig::setNumIterations(uint32 numIterations) { - assertGreaterOrEqual("numIterations", numIterations, 1); - numIterations_ = numIterations; - return *this; -} - -bool SequentialPostOptimizationConfig::areHeadsRefined() const { - return refineHeads_; -} - -ISequentialPostOptimizationConfig& SequentialPostOptimizationConfig::setRefineHeads(bool refineHeads) { - refineHeads_ = refineHeads; - return *this; -} - -bool SequentialPostOptimizationConfig::areFeaturesResampled() const { - return resampleFeatures_; -} - -ISequentialPostOptimizationConfig& SequentialPostOptimizationConfig::setResampleFeatures(bool resampleFeatures) { - resampleFeatures_ = resampleFeatures; - return *this; -} - -std::unique_ptr SequentialPostOptimizationConfig::createPostOptimizationPhaseFactory() - const { - return std::make_unique(numIterations_, refineHeads_, resampleFeatures_); -} diff --git a/cpp/subprojects/common/src/common/post_optimization/post_optimization_unused_rule_removal.cpp b/cpp/subprojects/common/src/common/post_optimization/post_optimization_unused_rule_removal.cpp deleted file mode 100644 index bfd9607c..00000000 --- a/cpp/subprojects/common/src/common/post_optimization/post_optimization_unused_rule_removal.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "common/post_optimization/post_optimization_unused_rule_removal.hpp" - -/** - * An implementation of the class `IPostOptimizationPhase` that removes unused rules from a model. - */ -class UnusedRuleRemoval final : public IPostOptimizationPhase { - private: - - IntermediateModelBuilder& modelBuilder_; - - public: - - /** - * @param modelBuilder A reference to an object of type `IntermediateModelBuilder` that provides access to the - * rules in a model - */ - UnusedRuleRemoval(IntermediateModelBuilder& modelBuilder) : modelBuilder_(modelBuilder) {} - - void optimizeModel(IThresholds& thresholds, const IRuleInduction& ruleInduction, IPartition& partition, - ILabelSampling& labelSampling, IInstanceSampling& instanceSampling, - IFeatureSampling& featureSampling, const IRulePruning& rulePruning, - const IPostProcessor& postProcessor, RNG& rng) const override { - uint32 numUsedRules = modelBuilder_.getNumUsedRules(); - - if (numUsedRules > 0) { - while (modelBuilder_.getNumRules() > numUsedRules) { - modelBuilder_.removeLastRule(); - } - - modelBuilder_.setNumUsedRules(0); - } - } -}; - -/** - * Allows to create instances of the type `IPostOptimizationPhase` that remove unused rules from a model. - */ -class UnusedRuleRemovalFactory final : public IPostOptimizationPhaseFactory { - public: - - std::unique_ptr create(IntermediateModelBuilder& modelBuilder) const override { - return std::make_unique(modelBuilder); - } -}; - -std::unique_ptr UnusedRuleRemovalConfig::createPostOptimizationPhaseFactory() const { - return std::make_unique(); -} diff --git a/cpp/subprojects/common/src/common/post_processing/post_processor_no.cpp b/cpp/subprojects/common/src/common/post_processing/post_processor_no.cpp deleted file mode 100644 index e085bcf4..00000000 --- a/cpp/subprojects/common/src/common/post_processing/post_processor_no.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include "common/post_processing/post_processor_no.hpp" - -/** - * An implementation of the class `IPostProcessor` that does not perform any post-processing, but retains the original - * predictions of rules. - */ -class NoPostProcessor final : public IPostProcessor { - public: - - void postProcess(AbstractPrediction& prediction) const override { - return; - } -}; - -/** - * Allows to create instances of the type `IPostProcessor` that do not perform any post-processing, but retain the - * original predictions of rules. - */ -class NoPostProcessorFactory final : public IPostProcessorFactory { - public: - - std::unique_ptr create() const override { - return std::make_unique(); - } -}; - -std::unique_ptr NoPostProcessorConfig::createPostProcessorFactory() const { - return std::make_unique(); -} diff --git a/cpp/subprojects/common/src/common/prediction/label_space_info_no.cpp b/cpp/subprojects/common/src/common/prediction/label_space_info_no.cpp deleted file mode 100644 index fb70703f..00000000 --- a/cpp/subprojects/common/src/common/prediction/label_space_info_no.cpp +++ /dev/null @@ -1,86 +0,0 @@ -#include "common/prediction/label_space_info_no.hpp" - -#include "common/input/feature_matrix_c_contiguous.hpp" -#include "common/input/feature_matrix_csr.hpp" -#include "common/model/rule_list.hpp" -#include "common/prediction/predictor_binary.hpp" -#include "common/prediction/predictor_probability.hpp" -#include "common/prediction/predictor_score.hpp" -#include "common/prediction/probability_calibration_joint.hpp" - -/** - * An implementation of the type `INoLabelSpaceInfo` that does not provide any information about the label space. - */ -class NoLabelSpaceInfo final : public INoLabelSpaceInfo { - public: - - std::unique_ptr createJointProbabilityCalibrator( - const IJointProbabilityCalibratorFactory& factory, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const override { - return factory.create(marginalProbabilityCalibrationModel, nullptr); - } - - std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override { - return factory.create(featureMatrix, model, nullptr, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); - } - - std::unique_ptr createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override { - return factory.create(featureMatrix, model, nullptr, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); - } - - std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const RuleList& model, const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override { - return factory.create(featureMatrix, model, nullptr, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); - } - - std::unique_ptr createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override { - return factory.create(featureMatrix, model, nullptr, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); - } - - std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CContiguousFeatureMatrix& featureMatrix, - const RuleList& model, uint32 numLabels) const override { - return factory.create(featureMatrix, model, nullptr, numLabels); - } - - std::unique_ptr createScorePredictor(const IScorePredictorFactory& factory, - const CsrFeatureMatrix& featureMatrix, - const RuleList& model, uint32 numLabels) const override { - return factory.create(featureMatrix, model, nullptr, numLabels); - } - - std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, - const RuleList& model, const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override { - return factory.create(featureMatrix, model, nullptr, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); - } - - std::unique_ptr createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const override { - return factory.create(featureMatrix, model, nullptr, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); - } -}; - -std::unique_ptr createNoLabelSpaceInfo() { - return std::make_unique(); -} diff --git a/cpp/subprojects/common/src/common/prediction/label_vector_set.cpp b/cpp/subprojects/common/src/common/prediction/label_vector_set.cpp deleted file mode 100644 index 50e20610..00000000 --- a/cpp/subprojects/common/src/common/prediction/label_vector_set.cpp +++ /dev/null @@ -1,173 +0,0 @@ -#include "common/prediction/label_vector_set.hpp" - -#include "common/input/feature_matrix_c_contiguous.hpp" -#include "common/input/feature_matrix_csr.hpp" -#include "common/model/rule_list.hpp" -#include "common/prediction/predictor_binary.hpp" -#include "common/prediction/predictor_probability.hpp" -#include "common/prediction/predictor_score.hpp" -#include "common/prediction/probability_calibration_joint.hpp" -#include "common/prediction/probability_calibration_marginal.hpp" - -#include - -/** - * Allows to compute hashes for objects of type `LabelVector`. - */ -struct LabelVectorHash final { - public: - - /** - * Computes and returns a hash for an object of type `LabelVector`. - * - * @param v A reference to an object of type `LabelVector` - * @return The hash that has been computed - */ - inline std::size_t operator()(const LabelVector& v) const { - return hashArray(v.cbegin(), v.getNumElements()); - } -}; - -/** - * Allows to check whether two objects of type `LabelVector` are equal or not. - */ -struct LabelVectorPred final { - public: - - /** - * Returns whether two objects of type `LabelVector` are equal or not. - * - * @param lhs A reference to the first object of type `LabelVector` - * @param rhs A reference to the second object of type `LabelVector` - * @return True, if the given objects are equal, false otherwise - */ - inline bool operator()(const LabelVector& lhs, const LabelVector& rhs) const { - return compareArrays(lhs.cbegin(), lhs.getNumElements(), rhs.cbegin(), rhs.getNumElements()); - } -}; - -LabelVectorSet::LabelVectorSet() {} - -LabelVectorSet::LabelVectorSet(const IRowWiseLabelMatrix& labelMatrix) { - std::unordered_map, uint32, LabelVectorHash, LabelVectorPred> map; - uint32 numRows = labelMatrix.getNumRows(); - - for (uint32 i = 0; i < numRows; i++) { - std::unique_ptr labelVectorPtr = labelMatrix.createLabelVector(i); - auto it = map.find(*labelVectorPtr); - - if (it == map.end()) { - map.emplace(*labelVectorPtr, (uint32) frequencies_.size()); - frequencies_.emplace_back(1); - labelVectors_.push_back(std::move(labelVectorPtr)); - } else { - uint32 index = (*it).second; - frequencies_[index] += 1; - } - } -} - -LabelVectorSet::const_iterator LabelVectorSet::cbegin() const { - return labelVectors_.cbegin(); -} - -LabelVectorSet::const_iterator LabelVectorSet::cend() const { - return labelVectors_.cend(); -} - -LabelVectorSet::frequency_const_iterator LabelVectorSet::frequencies_cbegin() const { - return frequencies_.cbegin(); -} - -LabelVectorSet::frequency_const_iterator LabelVectorSet::frequencies_cend() const { - return frequencies_.cend(); -} - -uint32 LabelVectorSet::getNumLabelVectors() const { - return (uint32) labelVectors_.size(); -} - -void LabelVectorSet::addLabelVector(std::unique_ptr labelVectorPtr, uint32 frequency) { - labelVectors_.push_back(std::move(labelVectorPtr)); - frequencies_.emplace_back(frequency); -} - -void LabelVectorSet::visit(LabelVectorVisitor visitor) const { - uint32 numLabelVectors = this->getNumLabelVectors(); - - for (uint32 i = 0; i < numLabelVectors; i++) { - const std::unique_ptr& labelVectorPtr = labelVectors_[i]; - uint32 frequency = frequencies_[i]; - visitor(*labelVectorPtr, frequency); - } -} - -std::unique_ptr LabelVectorSet::createJointProbabilityCalibrator( - const IJointProbabilityCalibratorFactory& factory, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel) const { - return factory.create(marginalProbabilityCalibrationModel, this); -} - -std::unique_ptr LabelVectorSet::createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return factory.create(featureMatrix, model, this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr LabelVectorSet::createBinaryPredictor( - const IBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return factory.create(featureMatrix, model, this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr LabelVectorSet::createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return factory.create(featureMatrix, model, this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr LabelVectorSet::createSparseBinaryPredictor( - const ISparseBinaryPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return factory.create(featureMatrix, model, this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr LabelVectorSet::createScorePredictor(const IScorePredictorFactory& factory, - const CContiguousFeatureMatrix& featureMatrix, - const RuleList& model, uint32 numLabels) const { - return factory.create(featureMatrix, model, this, numLabels); -} - -std::unique_ptr LabelVectorSet::createScorePredictor(const IScorePredictorFactory& factory, - const CsrFeatureMatrix& featureMatrix, - const RuleList& model, uint32 numLabels) const { - return factory.create(featureMatrix, model, this, numLabels); -} - -std::unique_ptr LabelVectorSet::createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CContiguousFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return factory.create(featureMatrix, model, this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr LabelVectorSet::createProbabilityPredictor( - const IProbabilityPredictorFactory& factory, const CsrFeatureMatrix& featureMatrix, const RuleList& model, - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const IJointProbabilityCalibrationModel& jointProbabilityCalibrationModel, uint32 numLabels) const { - return factory.create(featureMatrix, model, this, marginalProbabilityCalibrationModel, - jointProbabilityCalibrationModel, numLabels); -} - -std::unique_ptr createLabelVectorSet() { - return std::make_unique(); -} diff --git a/cpp/subprojects/common/src/common/prediction/prediction_matrix_dense.cpp b/cpp/subprojects/common/src/common/prediction/prediction_matrix_dense.cpp deleted file mode 100644 index ba6fa32f..00000000 --- a/cpp/subprojects/common/src/common/prediction/prediction_matrix_dense.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "common/prediction/prediction_matrix_dense.hpp" - -#include - -template -DensePredictionMatrix::DensePredictionMatrix(uint32 numRows, uint32 numCols) - : DensePredictionMatrix(numRows, numCols, false) {} - -template -DensePredictionMatrix::DensePredictionMatrix(uint32 numRows, uint32 numCols, bool init) - : CContiguousView(numRows, numCols, - (T*) (init ? calloc(numRows * numCols, sizeof(T)) : malloc(numRows * numCols * sizeof(T)))), - array_(CContiguousView::array_) {} - -template -DensePredictionMatrix::~DensePredictionMatrix() { - free(array_); -} - -template -T* DensePredictionMatrix::get() { - return array_; -} - -template -T* DensePredictionMatrix::release() { - T* ptr = array_; - array_ = nullptr; - return ptr; -} - -template class DensePredictionMatrix; -template class DensePredictionMatrix; -template class DensePredictionMatrix; -template class DensePredictionMatrix; diff --git a/cpp/subprojects/common/src/common/prediction/prediction_matrix_sparse_binary.cpp b/cpp/subprojects/common/src/common/prediction/prediction_matrix_sparse_binary.cpp deleted file mode 100644 index 5b9ab4a6..00000000 --- a/cpp/subprojects/common/src/common/prediction/prediction_matrix_sparse_binary.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "common/prediction/prediction_matrix_sparse_binary.hpp" - -#include - -BinarySparsePredictionMatrix::BinarySparsePredictionMatrix(uint32 numRows, uint32 numCols, uint32* rowIndices, - uint32* colIndices) - : BinaryCsrConstView(numRows, numCols, rowIndices, colIndices), rowIndices_(rowIndices), colIndices_(colIndices) {} - -BinarySparsePredictionMatrix::~BinarySparsePredictionMatrix() { - free(rowIndices_); - free(colIndices_); -} - -uint32* BinarySparsePredictionMatrix::getRowIndices() { - return rowIndices_; -} - -uint32* BinarySparsePredictionMatrix::releaseRowIndices() { - uint32* ptr = rowIndices_; - rowIndices_ = nullptr; - return ptr; -} - -uint32* BinarySparsePredictionMatrix::getColIndices() { - return colIndices_; -} - -uint32* BinarySparsePredictionMatrix::releaseColIndices() { - uint32* ptr = colIndices_; - colIndices_ = nullptr; - return ptr; -} - -std::unique_ptr createBinarySparsePredictionMatrix(const BinaryLilMatrix& lilMatrix, - uint32 numCols, - uint32 numNonZeroElements) { - uint32 numRows = lilMatrix.getNumRows(); - uint32* rowIndices = (uint32*) malloc((numRows + 1) * sizeof(uint32)); - uint32* colIndices = (uint32*) malloc(numNonZeroElements * sizeof(uint32)); - uint32 n = 0; - - for (uint32 i = 0; i < numRows; i++) { - rowIndices[i] = n; - - for (auto it = lilMatrix.cbegin(i); it != lilMatrix.cend(i); it++) { - colIndices[n] = *it; - n++; - } - } - - rowIndices[numRows] = n; - return std::make_unique(numRows, numCols, rowIndices, colIndices); -} diff --git a/cpp/subprojects/common/src/common/prediction/probability_calibration_isotonic.cpp b/cpp/subprojects/common/src/common/prediction/probability_calibration_isotonic.cpp deleted file mode 100644 index 06eb159f..00000000 --- a/cpp/subprojects/common/src/common/prediction/probability_calibration_isotonic.cpp +++ /dev/null @@ -1,189 +0,0 @@ -#include "common/prediction/probability_calibration_isotonic.hpp" - -#include "common/math/math.hpp" - -static inline void sortByThresholdsAndEliminateDuplicates(ListOfLists>::row bins) { - // Sort bins in increasing order by their threshold... - std::sort(bins.begin(), bins.end(), [=](const Tuple& lhs, const Tuple& rhs) { - return lhs.first < rhs.first; - }); - - // Aggregate adjacent bins with identical thresholds by averaging their probabilities... - uint32 numBins = (uint32) bins.size(); - uint32 previousIndex = 0; - Tuple previousBin = bins[previousIndex]; - uint32 n = 0; - - for (uint32 j = 1; j < numBins; j++) { - const Tuple& currentBin = bins[j]; - - if (isEqual(currentBin.first, previousBin.first)) { - uint32 numAggregated = j - previousIndex + 1; - previousBin.second = iterativeArithmeticMean(numAggregated, currentBin.second, previousBin.second); - } else { - bins[n] = previousBin; - n++; - previousIndex = j; - previousBin = currentBin; - } - } - - bins[n] = bins[numBins - 1]; - n++; - bins.resize(n); -} - -static inline void aggregateNonIncreasingBins(ListOfLists>::row bins) { - // We apply the "pool adjacent violators algorithm" (PAVA) to merge adjacent bins with non-increasing - // probabilities. A temporary array `pools` is used to mark the beginning and end of subsequences with - // non-increasing probabilities. If such a subsequence was found in range [i, j] then `pools[i] = j` and - // `pools[j] = i`... - uint32 numBins = (uint32) bins.size(); - uint32* pools = new uint32[numBins]; - setArrayToIncreasingValues(pools, numBins, 0, 1); - uint32 i = 0; - uint32 j = 0; - - while (i < numBins && j < numBins && (j = pools[i] + 1) < numBins) { - Tuple& previousBin = bins[i]; - Tuple& currentBin = bins[j]; - - // Check if the probabilities of the adjacent bins are monotonically increasing... - if (currentBin.second > previousBin.second) { - // The probabilities are increasing, i.e., the monotonicity constraint is not violated, and we can - // continue with the subsequent bins... - i = j; - } else { - // The probabilities are not increasing, i.e., the monotonicity constraint is violated, and we have to - // average the probabilities of all bins within the non-increasing subsequence... - uint32 numBinsInSubsequence = 2; - previousBin.second = iterativeArithmeticMean(numBinsInSubsequence, currentBin.second, previousBin.second); - - // Search for the end of the non-increasing subsequence... - while ((j = pools[j] + 1) < numBins) { - Tuple& nextBin = bins[j]; - - if (nextBin.second > currentBin.second) { - // We reached the end of the non-increasing subsequence... - break; - } else { - // We are still within the non-increasing subsequence... - numBinsInSubsequence++; - previousBin.second = - iterativeArithmeticMean(numBinsInSubsequence, nextBin.second, previousBin.second); - currentBin = nextBin; - } - } - - // Store the beginning and end of the current subsequence... - pools[i] = j - 1; - pools[j - 1] = i; - - // Restart at the previous subsequence if there is one... - if (i > 0) { - j = pools[i - 1]; - i = j; - } - } - } - - // Only keep the first bin within each subsequence... - j = 0; - - for (i = 0; i < numBins; i = pools[i] + 1) { - bins[j] = bins[i]; - j++; - } - - delete[] pools; - bins.resize(j); - bins.shrink_to_fit(); -} - -static inline float64 calibrateProbability(ListOfLists>::const_row bins, float64 probability) { - // Find the bins that impose a lower and upper bound on the probability... - ListOfLists>::const_iterator begin = bins.cbegin(); - ListOfLists>::const_iterator end = bins.cend(); - ListOfLists>::const_iterator it = - std::lower_bound(begin, end, probability, [=](const Tuple& lhs, const float64& rhs) { - return lhs.first < rhs; - }); - uint32 offset = it - begin; - Tuple lowerBound; - Tuple upperBound; - - if (it == end) { - lowerBound = begin[offset - 1]; - upperBound = 1; - } else { - if (offset > 0) { - lowerBound = begin[offset - 1]; - } else { - lowerBound = 0; - } - - upperBound = *it; - } - - // Interpolate linearly between the probabilities associated with the lower and upper bound... - float64 t = (probability - lowerBound.first) / (upperBound.first - lowerBound.first); - return lowerBound.second + (t * (upperBound.second - lowerBound.second)); -} - -IsotonicProbabilityCalibrationModel::IsotonicProbabilityCalibrationModel(uint32 numLists) - : binsPerList_(ListOfLists>(numLists)) {} - -IsotonicProbabilityCalibrationModel::bin_list IsotonicProbabilityCalibrationModel::operator[](uint32 listIndex) { - return binsPerList_[listIndex]; -} - -IsotonicProbabilityCalibrationModel::const_bin_list IsotonicProbabilityCalibrationModel::operator[]( - uint32 listIndex) const { - return binsPerList_[listIndex]; -} - -void IsotonicProbabilityCalibrationModel::fit() { - uint32 numLists = binsPerList_.getNumRows(); - - for (uint32 i = 0; i < numLists; i++) { - ListOfLists>::row bins = binsPerList_[i]; - sortByThresholdsAndEliminateDuplicates(bins); - aggregateNonIncreasingBins(bins); - } -} - -float64 IsotonicProbabilityCalibrationModel::calibrateMarginalProbability(uint32 labelIndex, - float64 marginalProbability) const { - return calibrateProbability(binsPerList_[labelIndex], marginalProbability); -} - -float64 IsotonicProbabilityCalibrationModel::calibrateJointProbability(uint32 labelVectorIndex, - float64 jointProbability) const { - return calibrateProbability(binsPerList_[labelVectorIndex], jointProbability); -} - -uint32 IsotonicProbabilityCalibrationModel::getNumBinLists() const { - return binsPerList_.getNumRows(); -} - -void IsotonicProbabilityCalibrationModel::addBin(uint32 listIndex, float64 threshold, float64 probability) { - ListOfLists>::row row = binsPerList_[listIndex]; - row.emplace_back(threshold, probability); -} - -void IsotonicProbabilityCalibrationModel::visit(BinVisitor visitor) const { - uint32 numLists = binsPerList_.getNumRows(); - - for (uint32 i = 0; i < numLists; i++) { - ListOfLists>::const_row bins = binsPerList_[i]; - - for (auto it = bins.cbegin(); it != bins.cend(); it++) { - const Tuple& bin = *it; - visitor(i, bin.first, bin.second); - } - } -} - -std::unique_ptr createIsotonicProbabilityCalibrationModel(uint32 numLists) { - return std::make_unique(numLists); -} diff --git a/cpp/subprojects/common/src/common/prediction/probability_calibration_no.cpp b/cpp/subprojects/common/src/common/prediction/probability_calibration_no.cpp deleted file mode 100644 index 7131e43a..00000000 --- a/cpp/subprojects/common/src/common/prediction/probability_calibration_no.cpp +++ /dev/null @@ -1,114 +0,0 @@ -#include "common/prediction/probability_calibration_no.hpp" - -/** - * A model for the calibration of marginal or joint probabilities that does not make any adjustments. - */ -class NoProbabilityCalibrationModel final : public INoProbabilityCalibrationModel { - public: - - float64 calibrateMarginalProbability(uint32 labelIndex, float64 marginalProbability) const override { - return marginalProbability; - } - - float64 calibrateJointProbability(uint32 labelVectorIndex, float64 jointProbability) const override { - return jointProbability; - } -}; - -/** - * An implementation of the type `IMarginalProbabilityCalibrator` that does not fit a model for the calibration of - * marginal probabilities. - */ -class NoMarginalProbabilityCalibrator final : public IMarginalProbabilityCalibrator { - public: - - std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CContiguousLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return std::make_unique(); - } - - std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CsrLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return std::make_unique(); - } - - std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CContiguousLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return std::make_unique(); - } - - std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CsrLabelMatrix& labelMatrix, const IStatistics& statistics) const override { - return std::make_unique(); - } -}; - -std::unique_ptr NoMarginalProbabilityCalibratorFactory::create() const { - return std::make_unique(); -} - -bool NoMarginalProbabilityCalibratorConfig::shouldUseHoldoutSet() const { - return false; -} - -std::unique_ptr - NoMarginalProbabilityCalibratorConfig::createMarginalProbabilityCalibratorFactory() const { - return std::make_unique(); -} - -/** - * An implementation of the type `IJointProbabilityCalibrator` that does not fit a model for the calibration of joint - * probabilities. - */ -class NoJointProbabilityCalibrator final : public IJointProbabilityCalibrator { - public: - - std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CContiguousLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return std::make_unique(); - } - - std::unique_ptr fitProbabilityCalibrationModel( - const SinglePartition& partition, const CsrLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return std::make_unique(); - } - - std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CContiguousLabelMatrix& labelMatrix, - const IStatistics& statistics) const override { - return std::make_unique(); - } - - std::unique_ptr fitProbabilityCalibrationModel( - BiPartition& partition, const CsrLabelMatrix& labelMatrix, const IStatistics& statistics) const override { - return std::make_unique(); - } -}; - -std::unique_ptr NoJointProbabilityCalibratorFactory::create( - const IMarginalProbabilityCalibrationModel& marginalProbabilityCalibrationModel, - const LabelVectorSet* labelVectorSet) const { - return std::make_unique(); -} - -bool NoJointProbabilityCalibratorConfig::shouldUseHoldoutSet() const { - return false; -} - -bool NoJointProbabilityCalibratorConfig::isLabelVectorSetNeeded() const { - return false; -} - -std::unique_ptr - NoJointProbabilityCalibratorConfig::createJointProbabilityCalibratorFactory() const { - return std::make_unique(); -} - -std::unique_ptr createNoProbabilityCalibrationModel() { - return std::make_unique(); -} diff --git a/cpp/subprojects/common/src/common/rule_evaluation/score_vector_binned_dense.cpp b/cpp/subprojects/common/src/common/rule_evaluation/score_vector_binned_dense.cpp deleted file mode 100644 index 0a0c0a0e..00000000 --- a/cpp/subprojects/common/src/common/rule_evaluation/score_vector_binned_dense.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include "common/rule_evaluation/score_vector_binned_dense.hpp" - -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" -#include "common/rule_refinement/prediction.hpp" -#include "common/rule_refinement/score_processor.hpp" - -template -DenseBinnedScoreVector::DenseBinnedScoreVector(const IndexVector& labelIndices, uint32 numBins, - bool sorted) - : labelIndices_(labelIndices), binnedVector_(DenseBinnedVector(labelIndices.getNumElements(), numBins)), - sorted_(sorted) {} - -template -typename DenseBinnedScoreVector::index_const_iterator DenseBinnedScoreVector::indices_cbegin() - const { - return labelIndices_.cbegin(); -} - -template -typename DenseBinnedScoreVector::index_const_iterator DenseBinnedScoreVector::indices_cend() - const { - return labelIndices_.cend(); -} - -template -typename DenseBinnedScoreVector::score_const_iterator DenseBinnedScoreVector::scores_cbegin() - const { - return binnedVector_.cbegin(); -} - -template -typename DenseBinnedScoreVector::score_const_iterator DenseBinnedScoreVector::scores_cend() - const { - return DenseBinnedVector::ValueConstIterator(this->indices_binned_cend(), binnedVector_.values_cbegin()); -} - -template -typename DenseBinnedScoreVector::index_binned_iterator - DenseBinnedScoreVector::indices_binned_begin() { - return binnedVector_.indices_begin(); -} - -template -typename DenseBinnedScoreVector::index_binned_iterator - DenseBinnedScoreVector::indices_binned_end() { - return &binnedVector_.indices_begin()[labelIndices_.getNumElements()]; -} - -template -typename DenseBinnedScoreVector::index_binned_const_iterator - DenseBinnedScoreVector::indices_binned_cbegin() const { - return binnedVector_.indices_cbegin(); -} - -template -typename DenseBinnedScoreVector::index_binned_const_iterator - DenseBinnedScoreVector::indices_binned_cend() const { - return &binnedVector_.indices_cbegin()[labelIndices_.getNumElements()]; -} - -template -typename DenseBinnedScoreVector::score_binned_iterator - DenseBinnedScoreVector::scores_binned_begin() { - return binnedVector_.values_begin(); -} - -template -typename DenseBinnedScoreVector::score_binned_iterator - DenseBinnedScoreVector::scores_binned_end() { - return binnedVector_.values_end(); -} - -template -typename DenseBinnedScoreVector::score_binned_const_iterator - DenseBinnedScoreVector::scores_binned_cbegin() const { - return binnedVector_.values_cbegin(); -} - -template -typename DenseBinnedScoreVector::score_binned_const_iterator - DenseBinnedScoreVector::scores_binned_cend() const { - return binnedVector_.values_cend(); -} - -template -uint32 DenseBinnedScoreVector::getNumElements() const { - return labelIndices_.getNumElements(); -} - -template -uint32 DenseBinnedScoreVector::getNumBins() const { - return binnedVector_.getNumBins(); -} - -template -void DenseBinnedScoreVector::setNumBins(uint32 numBins, bool freeMemory) { - binnedVector_.setNumBins(numBins, freeMemory); -} - -template -bool DenseBinnedScoreVector::isPartial() const { - return labelIndices_.isPartial(); -} - -template -bool DenseBinnedScoreVector::isSorted() const { - return sorted_; -} - -template -void DenseBinnedScoreVector::updatePrediction(AbstractPrediction& prediction) const { - prediction.set(this->scores_cbegin(), this->scores_cend()); -} - -template -void DenseBinnedScoreVector::processScores(ScoreProcessor& scoreProcessor) const { - scoreProcessor.processScores(*this); -} - -template class DenseBinnedScoreVector; -template class DenseBinnedScoreVector; diff --git a/cpp/subprojects/common/src/common/rule_evaluation/score_vector_dense.cpp b/cpp/subprojects/common/src/common/rule_evaluation/score_vector_dense.cpp deleted file mode 100644 index 0987cd45..00000000 --- a/cpp/subprojects/common/src/common/rule_evaluation/score_vector_dense.cpp +++ /dev/null @@ -1,69 +0,0 @@ -#include "common/rule_evaluation/score_vector_dense.hpp" - -#include "common/indices/index_vector_complete.hpp" -#include "common/indices/index_vector_partial.hpp" -#include "common/rule_refinement/prediction.hpp" -#include "common/rule_refinement/score_processor.hpp" - -template -DenseScoreVector::DenseScoreVector(const IndexVector& labelIndices, bool sorted) - : labelIndices_(labelIndices), predictedScoreVector_(DenseVector(labelIndices.getNumElements())), - sorted_(sorted) {} - -template -typename DenseScoreVector::index_const_iterator DenseScoreVector::indices_cbegin() const { - return labelIndices_.cbegin(); -} - -template -typename DenseScoreVector::index_const_iterator DenseScoreVector::indices_cend() const { - return labelIndices_.cend(); -} - -template -typename DenseScoreVector::score_iterator DenseScoreVector::scores_begin() { - return predictedScoreVector_.begin(); -} - -template -typename DenseScoreVector::score_iterator DenseScoreVector::scores_end() { - return &predictedScoreVector_.begin()[labelIndices_.getNumElements()]; -} - -template -typename DenseScoreVector::score_const_iterator DenseScoreVector::scores_cbegin() const { - return predictedScoreVector_.cbegin(); -} - -template -typename DenseScoreVector::score_const_iterator DenseScoreVector::scores_cend() const { - return &predictedScoreVector_.cbegin()[labelIndices_.getNumElements()]; -} - -template -uint32 DenseScoreVector::getNumElements() const { - return labelIndices_.getNumElements(); -} - -template -bool DenseScoreVector::isPartial() const { - return labelIndices_.isPartial(); -} - -template -bool DenseScoreVector::isSorted() const { - return sorted_; -} - -template -void DenseScoreVector::updatePrediction(AbstractPrediction& prediction) const { - prediction.set(this->scores_cbegin(), this->scores_cend()); -} - -template -void DenseScoreVector::processScores(ScoreProcessor& scoreProcessor) const { - scoreProcessor.processScores(*this); -} - -template class DenseScoreVector; -template class DenseScoreVector; diff --git a/cpp/subprojects/common/src/common/rule_induction/rule_induction_common.hpp b/cpp/subprojects/common/src/common/rule_induction/rule_induction_common.hpp deleted file mode 100644 index 871bcfe3..00000000 --- a/cpp/subprojects/common/src/common/rule_induction/rule_induction_common.hpp +++ /dev/null @@ -1,123 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector_complete.hpp" -#include "common/rule_induction/rule_induction.hpp" -#include "common/rule_refinement/score_processor.hpp" - -/** - * An abstract base class for all classes that implement an algorithm for the induction of individual rules. - */ -class AbstractRuleInduction : public IRuleInduction { - private: - - const bool recalculatePredictions_; - - protected: - - /** - * Must be implemented by subclasses in order to grow a rule. - * - * @param thresholds A reference to an object of type `IThresholds` that provides access to the - * thresholds that may be used by the conditions of the rule - * @param labelIndices A reference to an object of type `IIndexVector` that provides access to the indices - * of the labels for which the rule may predict - * @param weights A reference to an object of type `IWeightVector` that provides access to the weights - * of individual training examples - * @param partition A reference to an object of type `IPartition` that provides access to the indices of - * the training examples that belong to the training set and the holdout set, - * respectively - * @param featureSampling A reference to an object of type `IFeatureSampling` that should be used for sampling - * the features that may be used by a new condition - * @param rng A reference to an object of type `RNG` that implements the random number generator - * to be used - * @param conditionListPtr A reference to an unique pointer of type `ConditionList` that should be used to - * store the conditions of the rule - * @param headPtr A reference to an unique pointer of type `AbstractEvaluatedPrediction` that should - * be used to store the head of the rule - * @return An unique pointer to an object of type `IThresholdsSubset` that has been used to - * grow the rule - */ - virtual std::unique_ptr growRule( - IThresholds& thresholds, const IIndexVector& labelIndices, const IWeightVector& weights, - IPartition& partition, IFeatureSampling& featureSampling, RNG& rng, - std::unique_ptr& conditionListPtr, - std::unique_ptr& headPtr) const = 0; - - public: - - /** - * @param recalculatePredictions True, if the predictions of rules should be recalculated on all training - * examples, if some of the examples have zero weights, false otherwise - */ - AbstractRuleInduction(bool recalculatePredictions) : recalculatePredictions_(recalculatePredictions) {} - - virtual ~AbstractRuleInduction() override {}; - - void induceDefaultRule(IStatistics& statistics, IModelBuilder& modelBuilder) const override final { - uint32 numStatistics = statistics.getNumStatistics(); - uint32 numLabels = statistics.getNumLabels(); - CompleteIndexVector labelIndices(numLabels); - EqualWeightVector weights(numStatistics); - std::unique_ptr statisticsSubsetPtr = statistics.createSubset(labelIndices, weights); - - for (uint32 i = 0; i < numStatistics; i++) { - statisticsSubsetPtr->addToSubset(i); - } - - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - std::unique_ptr defaultPredictionPtr; - ScoreProcessor scoreProcessor(defaultPredictionPtr); - scoreProcessor.processScores(scoreVector); - - for (uint32 i = 0; i < numStatistics; i++) { - defaultPredictionPtr->apply(statistics, i); - } - - modelBuilder.setDefaultRule(defaultPredictionPtr); - } - - bool induceRule(IThresholds& thresholds, const IIndexVector& labelIndices, const IWeightVector& weights, - IPartition& partition, IFeatureSampling& featureSampling, const IRulePruning& rulePruning, - const IPostProcessor& postProcessor, RNG& rng, - IModelBuilder& modelBuilder) const override final { - std::unique_ptr conditionListPtr; - std::unique_ptr headPtr; - std::unique_ptr thresholdsSubsetPtr = this->growRule( - thresholds, labelIndices, weights, partition, featureSampling, rng, conditionListPtr, headPtr); - - if (headPtr) { - if (weights.hasZeroWeights()) { - // Prune rule... - IStatisticsProvider& statisticsProvider = thresholds.getStatisticsProvider(); - statisticsProvider.switchToPruningRuleEvaluation(); - std::unique_ptr coverageStatePtr = - rulePruning.prune(*thresholdsSubsetPtr, partition, *conditionListPtr, *headPtr); - statisticsProvider.switchToRegularRuleEvaluation(); - - // Re-calculate the scores in the head based on the entire training data... - if (recalculatePredictions_) { - const ICoverageState& coverageState = - coverageStatePtr ? *coverageStatePtr : thresholdsSubsetPtr->getCoverageState(); - partition.recalculatePrediction(*thresholdsSubsetPtr, coverageState, *headPtr); - } - } - - // Apply post-processor... - postProcessor.postProcess(*headPtr); - - // Update the statistics by applying the predictions of the new rule... - thresholdsSubsetPtr->applyPrediction(*headPtr); - - // Add the induced rule to the model... - modelBuilder.addRule(conditionListPtr, headPtr); - return true; - } else { - // No rule could be induced, because no useful condition could be found. This might be the case, if all - // examples have the same values for the considered features. - return false; - } - } -}; diff --git a/cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_beam_search.cpp b/cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_beam_search.cpp deleted file mode 100644 index 0fcec254..00000000 --- a/cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_beam_search.cpp +++ /dev/null @@ -1,512 +0,0 @@ -#include "common/rule_induction/rule_induction_top_down_beam_search.hpp" - -#include "common/math/math.hpp" -#include "common/util/validation.hpp" -#include "rule_induction_common.hpp" -#include "rule_induction_top_down_common.hpp" - -#include - -/** - * A single entry of a beam, corresponding to a rule that may be further refined. It stores the conditions and the head - * of the current rule, as well as an object of type `IThresholdsSubset` that is required to search for potential - * refinements of the rule and an `IIndexVector` that provides access to the indices of the labels for which these - * refinements may predict. - */ -struct BeamEntry final { - public: - - /** - * An unique pointer to an object of type `ConditionList` that stores the conditions of the rule. - */ - std::unique_ptr conditionListPtr; - - /** - * An unique pointer to an object of type `AbstractEvaluatedPrediction` that stores the prediction of the rule, - * as well as its quality. - */ - std::unique_ptr headPtr; - - /** - * An unique pointer to an object of type `IThresholdsSubset` that may be used to search for potential - * refinements of the rule. - */ - std::unique_ptr thresholdsSubsetPtr; - - /** - * A pointer to an object of type `IIndexVector` that provides access to the indices of the labels for which - * potential refinements of the rule may predict. - */ - const IIndexVector* labelIndices; -}; - -static inline void initializeEntry(BeamEntry& entry, Refinement& refinement, - std::unique_ptr thresholdsSubsetPtr, - const IIndexVector& labelIndices, bool keepHead) { - thresholdsSubsetPtr->filterThresholds(refinement); - entry.thresholdsSubsetPtr = std::move(thresholdsSubsetPtr); - entry.conditionListPtr = std::make_unique(); - entry.conditionListPtr->addCondition(refinement); - entry.headPtr = std::move(refinement.headPtr); - entry.labelIndices = keepHead ? entry.headPtr.get() : &labelIndices; -} - -static inline void copyEntry(BeamEntry& newEntry, BeamEntry& oldEntry, Refinement& refinement, - std::unique_ptr thresholdsSubsetPtr, - std::unique_ptr conditionListPtr, bool keepHead, uint32 minCoverage) { - thresholdsSubsetPtr->filterThresholds(refinement); - newEntry.thresholdsSubsetPtr = std::move(thresholdsSubsetPtr); - newEntry.conditionListPtr = std::move(conditionListPtr); - newEntry.conditionListPtr->addCondition(refinement); - newEntry.headPtr = std::move(refinement.headPtr); - - if (refinement.numCovered <= minCoverage) { - newEntry.labelIndices = nullptr; - } else { - newEntry.labelIndices = keepHead ? newEntry.headPtr.get() : oldEntry.labelIndices; - } -} - -static inline void copyEntry(BeamEntry& newEntry, BeamEntry& oldEntry) { - newEntry.thresholdsSubsetPtr = std::move(oldEntry.thresholdsSubsetPtr); - newEntry.conditionListPtr = std::move(oldEntry.conditionListPtr); - newEntry.headPtr = std::move(oldEntry.headPtr); - newEntry.labelIndices = nullptr; -} - -static inline const Quality& updateOrder(RuleCompareFunction ruleCompareFunction, - std::vector>& order) { - std::sort(order.begin(), order.end(), [=](const BeamEntry& a, const BeamEntry& b) { - return ruleCompareFunction.compare(*a.headPtr, *b.headPtr); - }); - const BeamEntry& worstEntry = order.back(); - return *worstEntry.headPtr; -} - -/** - * A beam that keeps track of several rules that may be further refined. - */ -class Beam final { - private: - - uint32 numEntries_; - - BeamEntry* entries_; - - std::vector> order_; - - public: - - /** - * @param beamWidth The maximum number of rules to keep track of - */ - Beam(uint32 beamWidth) : numEntries_(beamWidth), entries_(new BeamEntry[numEntries_]) { - order_.reserve(numEntries_); - } - - /** - * @param refinementComparator A reference to an object of type `FixedRefinementComparator` that keeps track of - * existing refinements of rules - * @param thresholdsSubsetPtr An unique pointer to an object of type `IThresholdsSubset` that has been used to - * find the existing refinements of rules - * @param labelIndices A reference to an object of type `IIndexVector` that provides access to the - * indices of the labels for which further refinement may predict - * @param keepHeads True, if further refinements should predict for the same labels as before, false - * otherwise - */ - Beam(FixedRefinementComparator& refinementComparator, std::unique_ptr thresholdsSubsetPtr, - const IIndexVector& labelIndices, bool keepHeads) - : Beam(refinementComparator.getNumElements()) { - FixedRefinementComparator::iterator iterator = refinementComparator.begin(); - uint32 i = 0; - - for (; i < numEntries_ - 1; i++) { - Refinement& refinement = iterator[i]; - BeamEntry& entry = entries_[i]; - initializeEntry(entry, refinement, thresholdsSubsetPtr->copy(), labelIndices, keepHeads); - order_.push_back(entry); - } - - Refinement& refinement = iterator[i]; - BeamEntry& entry = entries_[i]; - initializeEntry(entry, refinement, std::move(thresholdsSubsetPtr), labelIndices, keepHeads); - order_.push_back(entry); - } - - ~Beam() { - delete[] entries_; - } - - /** - * Searches for the best refinements of the rules that are kept track of by a given beam and updates the beam - * accordingly. - * - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that should be - * used for comparing the quality of different rules - * @param beamPtr A reference to an unique pointer of type `Beam` that represents the beam to be - * updated - * @param beamWidth The number of rules the new beam should keep track of - * @param featureSampling A reference to an object of type `IFeatureSampling` that should be used for - * sampling the features that may be used by potential refinements - * @param keepHeads True, if further refinements should predict for the same labels as before, false - * otherwise - * @param minCoverage The number of training examples that must be covered by potential refinements - * @param numThreads The number of CPU threads to be used to search for potential refinements of a - * rule in parallel - * @param rng A reference to an object of type `RNG` that implements the random number - * generator to be used - * @return True, if any refinements have been found, false otherwise - */ - static bool refine(RuleCompareFunction ruleCompareFunction, std::unique_ptr& beamPtr, uint32 beamWidth, - IFeatureSampling& featureSampling, bool keepHeads, uint32 minCoverage, uint32 numThreads, - RNG& rng) { - std::vector>& order = beamPtr->order_; - std::unique_ptr newBeamPtr = std::make_unique(beamWidth); - BeamEntry* newEntries = newBeamPtr->entries_; - std::vector>& newOrder = newBeamPtr->order_; - const BeamEntry& worstEntry = order.back(); - Quality minQuality(*worstEntry.headPtr); - uint32 n = 0; - bool result = false; - - // Traverse the existing beam entries.... - for (auto it = order.begin(); it != order.end(); it++) { - BeamEntry& entry = *it; - bool foundRefinement = false; - - // Check if existing beam entry can be refined... - if (entry.labelIndices) { - // Sample features... - const IIndexVector& featureIndices = featureSampling.sample(rng); - - // Search for refinements of the existing beam entry... - FixedRefinementComparator refinementComparator(ruleCompareFunction, beamWidth, minQuality); - foundRefinement = findRefinement(refinementComparator, *entry.thresholdsSubsetPtr, featureIndices, - *entry.labelIndices, minCoverage, numThreads); - - if (foundRefinement) { - result = true; - uint32 numRefinements = refinementComparator.getNumElements(); - FixedRefinementComparator::iterator iterator = refinementComparator.begin(); - uint32 i = 0; - - // Include all refinements, except for the last one, in the new beam. The corresponding - // `IThresholdsSubset` and `ConditionList` are copied... - for (; i < numRefinements - 1; i++) { - Refinement& refinement = iterator[i]; - - if (n < beamWidth) { - BeamEntry& newEntry = newEntries[n]; - copyEntry(newEntry, entry, refinement, entry.thresholdsSubsetPtr->copy(), - std::make_unique(*entry.conditionListPtr), keepHeads, - minCoverage); - newOrder.push_back(newEntry); - n++; - } else { - BeamEntry& newEntry = newOrder.back(); - copyEntry(newEntry, entry, refinement, entry.thresholdsSubsetPtr->copy(), - std::make_unique(*entry.conditionListPtr), keepHeads, - minCoverage); - minQuality = updateOrder(ruleCompareFunction, newOrder); - } - } - - // Include the last refinement in the beam. The corresponding `IThresholdsSubset` and - // `ConditionList` are reused... - Refinement& refinement = iterator[i]; - - if (n < beamWidth) { - BeamEntry& newEntry = newEntries[n]; - copyEntry(newEntry, entry, refinement, std::move(entry.thresholdsSubsetPtr), - std::move(entry.conditionListPtr), keepHeads, minCoverage); - newOrder.push_back(newEntry); - n++; - } else { - BeamEntry& newEntry = newOrder.back(); - copyEntry(newEntry, entry, refinement, std::move(entry.thresholdsSubsetPtr), - std::move(entry.conditionListPtr), keepHeads, minCoverage); - minQuality = updateOrder(ruleCompareFunction, newOrder); - } - } - } - - // If no refinement has been found, include the existing beam entry in the new beam unless it is worse - // than the worst entry currently included. If there is a tie, the existing beam entry is preferred, as - // it corresponds to a more general rule... - if (!foundRefinement) { - if (n < beamWidth) { - BeamEntry& newEntry = newEntries[n]; - copyEntry(newEntry, entry); - newOrder.push_back(newEntry); - n++; - } else if (!ruleCompareFunction.compare(minQuality, *entry.headPtr)) { - BeamEntry& newEntry = newOrder.back(); - copyEntry(newEntry, entry); - minQuality = updateOrder(ruleCompareFunction, newOrder); - } - } - } - - newBeamPtr->numEntries_ = n; - beamPtr = std::move(newBeamPtr); - return result; - } - - /** - * Returns the entry that corresponds to the best rule that is currently kept track of by the beam. - * - * @return A reference to an object of type `BeamEntry` that corresponds to the best rule - */ - BeamEntry& getBestEntry() { - return order_.front(); - } -}; - -/** - * An implementation of the type `IRuleInduction` that allows to induce individual rules by using a top-down beam - * search. - */ -class BeamSearchTopDownRuleInduction final : public AbstractRuleInduction { - private: - - const RuleCompareFunction ruleCompareFunction_; - - const uint32 beamWidth_; - - const bool resampleFeatures_; - - const uint32 minCoverage_; - - const uint32 maxConditions_; - - const uint32 maxHeadRefinements_; - - const uint32 numThreads_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that - * should be used for comparing the quality of different rules - * @param beamWidth The width that should be used by the beam search. Must be at least 2 - * @param resampleFeatures True, if a new sample of the available features should be created for each - * rule that is refined during the beam search, false otherwise - * @param minCoverage The minimum number of training examples that must be covered by a rule. Must - * be at least 1 - * @param maxConditions The maximum number of conditions to be included in a rule's body. Must be at - * least 2 or 0, if the number of conditions should not be restricted - * @param maxHeadRefinements The maximum number of times, the head of a rule may be refinement after a - * new condition has been added to its body. Must be at least 1 or 0, if the - * number of refinements should not be restricted - * @param recalculatePredictions True, if the predictions of rules should be recalculated on all training - * examples, if some of the examples have zero weights, false otherwise - * @param numThreads The number of CPU threads to be used to search for potential refinements of - * a rule in parallel. Must be at least 1 - */ - BeamSearchTopDownRuleInduction(RuleCompareFunction ruleCompareFunction, uint32 beamWidth, bool resampleFeatures, - uint32 minCoverage, uint32 maxConditions, uint32 maxHeadRefinements, - bool recalculatePredictions, uint32 numThreads) - : AbstractRuleInduction(recalculatePredictions), ruleCompareFunction_(ruleCompareFunction), - beamWidth_(beamWidth), resampleFeatures_(resampleFeatures), minCoverage_(minCoverage), - maxConditions_(maxConditions), maxHeadRefinements_(maxHeadRefinements), numThreads_(numThreads) {} - - protected: - - std::unique_ptr growRule( - IThresholds& thresholds, const IIndexVector& labelIndices, const IWeightVector& weights, - IPartition& partition, IFeatureSampling& featureSampling, RNG& rng, - std::unique_ptr& conditionListPtr, - std::unique_ptr& headPtr) const override { - // Create a new subset of the given thresholds... - std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(thresholds); - - // Sample features... - const IIndexVector& sampledFeatureIndices = featureSampling.sample(rng); - - // Search for the best refinements using a single condition... - FixedRefinementComparator refinementComparator(ruleCompareFunction_, beamWidth_); - bool foundRefinement = findRefinement(refinementComparator, *thresholdsSubsetPtr, sampledFeatureIndices, - labelIndices, minCoverage_, numThreads_); - - if (foundRefinement) { - bool keepHeads = maxHeadRefinements_ == 1; - std::unique_ptr beamPtr = - std::make_unique(refinementComparator, std::move(thresholdsSubsetPtr), labelIndices, keepHeads); - uint32 searchDepth = 1; - - while (foundRefinement && (maxConditions_ == 0 || searchDepth < maxConditions_)) { - searchDepth++; - keepHeads = maxHeadRefinements_ > 0 && searchDepth >= maxHeadRefinements_; - - // Create a `IFeatureSampling` to be used for refining the current beam... - std::unique_ptr beamSearchFeatureSamplingPtr = - featureSampling.createBeamSearchFeatureSampling(rng, resampleFeatures_); - - // Search for the best refinements within the current beam... - foundRefinement = - beamPtr->refine(ruleCompareFunction_, beamPtr, beamWidth_, *beamSearchFeatureSamplingPtr, - keepHeads, minCoverage_, numThreads_, rng); - } - - BeamEntry& entry = beamPtr->getBestEntry(); - conditionListPtr = std::move(entry.conditionListPtr); - headPtr = std::move(entry.headPtr); - return std::move(entry.thresholdsSubsetPtr); - } - - return thresholdsSubsetPtr; - } -}; - -/** - * Allows to create instances of the type `IRuleInduction` that induce individual rules by using a top-down beam search, - * where new conditions are added iteratively to the (initially empty) body of a rule. At each iteration, the refinement - * that improves the rule the most is chosen. The search stops if no refinement results in an improvement. - */ -class BeamSearchTopDownRuleInductionFactory final : public IRuleInductionFactory { - private: - - const RuleCompareFunction ruleCompareFunction_; - - const uint32 beamWidth_; - - const bool resampleFeatures_; - - const uint32 minCoverage_; - - const uint32 maxConditions_; - - const uint32 maxHeadRefinements_; - - const bool recalculatePredictions_; - - const uint32 numThreads_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that - * should be used for comparing the quality of different rules - * @param beamWidth The width that should be used by the beam search. Must be at least 2 - * @param resampleFeatures True, if a new sample of the available features should be created for each - * rule that is refined during the beam search, false otherwise - * @param minCoverage The minimum number of training examples that must be covered by a rule. Must - * be at least 1 - * @param maxConditions The maximum number of conditions to be included in a rule's body. Must be at - * least 2 or 0, if the number of conditions should not be restricted - * @param maxHeadRefinements The maximum number of times, the head of a rule may be refined after a new - * condition has been added to its body. Must be at least 1 or 0, if the number - * of refinements should not be restricted - * @param recalculatePredictions True, if the predictions of rules should be recalculated on all training - * examples, if some of the examples have zero weights, false otherwise - * @param numThreads The number of CPU threads to be used to search for potential refinements of - * a rule in parallel. Must be at least 1 - */ - BeamSearchTopDownRuleInductionFactory(RuleCompareFunction ruleCompareFunction, uint32 beamWidth, - bool resampleFeatures, uint32 minCoverage, uint32 maxConditions, - uint32 maxHeadRefinements, bool recalculatePredictions, uint32 numThreads) - : ruleCompareFunction_(ruleCompareFunction), beamWidth_(beamWidth), resampleFeatures_(resampleFeatures), - minCoverage_(minCoverage), maxConditions_(maxConditions), maxHeadRefinements_(maxHeadRefinements), - recalculatePredictions_(recalculatePredictions), numThreads_(numThreads) {} - - std::unique_ptr create() const override { - return std::make_unique(ruleCompareFunction_, beamWidth_, resampleFeatures_, - minCoverage_, maxConditions_, maxHeadRefinements_, - recalculatePredictions_, numThreads_); - } -}; - -BeamSearchTopDownRuleInductionConfig::BeamSearchTopDownRuleInductionConfig( - RuleCompareFunction ruleCompareFunction, const std::unique_ptr& multiThreadingConfigPtr) - : ruleCompareFunction_(ruleCompareFunction), beamWidth_(4), resampleFeatures_(false), minCoverage_(1), - minSupport_(0.0f), maxConditions_(0), maxHeadRefinements_(1), recalculatePredictions_(true), - multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - -uint32 BeamSearchTopDownRuleInductionConfig::getBeamWidth() const { - return beamWidth_; -} - -IBeamSearchTopDownRuleInductionConfig& BeamSearchTopDownRuleInductionConfig::setBeamWidth(uint32 beamWidth) { - assertGreaterOrEqual("beamWidth", beamWidth, 2); - beamWidth_ = beamWidth; - return *this; -} - -bool BeamSearchTopDownRuleInductionConfig::areFeaturesResampled() const { - return resampleFeatures_; -} - -IBeamSearchTopDownRuleInductionConfig& BeamSearchTopDownRuleInductionConfig::setResampleFeatures( - bool resampleFeatures) { - resampleFeatures_ = resampleFeatures; - return *this; -} - -uint32 BeamSearchTopDownRuleInductionConfig::getMinCoverage() const { - return minCoverage_; -} - -IBeamSearchTopDownRuleInductionConfig& BeamSearchTopDownRuleInductionConfig::setMinCoverage(uint32 minCoverage) { - assertGreaterOrEqual("minCoverage", minCoverage, 1); - minCoverage_ = minCoverage; - return *this; -} - -float32 BeamSearchTopDownRuleInductionConfig::getMinSupport() const { - return minSupport_; -} - -IBeamSearchTopDownRuleInductionConfig& BeamSearchTopDownRuleInductionConfig::setMinSupport(float32 minSupport) { - if (minSupport != 0) assertGreater("minSupport", minSupport, 0); - if (minSupport != 0) assertLess("minSupport", minSupport, 1); - minSupport_ = minSupport; - return *this; -} - -uint32 BeamSearchTopDownRuleInductionConfig::getMaxConditions() const { - return maxConditions_; -} - -IBeamSearchTopDownRuleInductionConfig& BeamSearchTopDownRuleInductionConfig::setMaxConditions(uint32 maxConditions) { - if (maxConditions != 0) assertGreaterOrEqual("maxConditions", maxConditions, 2); - maxConditions_ = maxConditions; - return *this; -} - -uint32 BeamSearchTopDownRuleInductionConfig::getMaxHeadRefinements() const { - return maxHeadRefinements_; -} - -IBeamSearchTopDownRuleInductionConfig& BeamSearchTopDownRuleInductionConfig::setMaxHeadRefinements( - uint32 maxHeadRefinements) { - if (maxHeadRefinements != 0) assertGreaterOrEqual("maxHeadRefinements", maxHeadRefinements, 1); - maxHeadRefinements_ = maxHeadRefinements; - return *this; -} - -bool BeamSearchTopDownRuleInductionConfig::arePredictionsRecalculated() const { - return recalculatePredictions_; -} - -IBeamSearchTopDownRuleInductionConfig& BeamSearchTopDownRuleInductionConfig::setRecalculatePredictions( - bool recalculatePredictions) { - recalculatePredictions_ = recalculatePredictions; - return *this; -} - -std::unique_ptr BeamSearchTopDownRuleInductionConfig::createRuleInductionFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - uint32 numExamples = featureMatrix.getNumRows(); - uint32 minCoverage; - - if (minSupport_ > 0) { - minCoverage = calculateBoundedFraction(numExamples, minSupport_, minCoverage_, numExamples); - } else { - minCoverage = std::min(numExamples, minCoverage_); - } - - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - return std::make_unique(ruleCompareFunction_, beamWidth_, resampleFeatures_, - minCoverage, maxConditions_, maxHeadRefinements_, - recalculatePredictions_, numThreads); -} diff --git a/cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_common.hpp b/cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_common.hpp deleted file mode 100644 index f0bd5491..00000000 --- a/cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_common.hpp +++ /dev/null @@ -1,83 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/thresholds/thresholds_subset.hpp" -#include "omp.h" - -/** - * Stores an unique pointer to an object of type `IRuleRefinement` that may be used to search for potential refinements - * of a rule, as well as to an object of template type `RefinementComparator` that allows comparing different - * refinements and keeping track of the best one(s). - * - * @tparam The type of the comparator that allows comparing different refinements and keeping track of the best one(s) - */ -template -struct RuleRefinement final { - public: - - /** - * An unique pointer to an object of type `IRuleRefinement` that may be used to search for potential refinements - * of a rule. - */ - std::unique_ptr ruleRefinementPtr; - - /** - * An unique pointer to an object of template type `RefinementComparator` that allows comparing different - * refinements and keeping track of the best one(s). - */ - std::unique_ptr comparatorPtr; -}; - -/** - * Finds the best refinement(s) of an existing rule across multiple features. - * - * @tparam RefinementComparator The type of the comparator that is used to compare the potential refinements - * @param refinementComparator A reference to an object of template type `RefinementComparator` that should be used to - * compare the potential refinements - * @param thresholdsSubset A reference to an object of type `IThresholdsSubset` that should be used to search for - * the potential refinements - * @param featureIndices A reference to an object of type `IIndexVector` that provides access to the indices of - * the features that should be considered - * @param labelIndices A reference to an object of type `IIndexVector` that provides access to the indices of - * the labels for which the refinement(s) may predict - * @param minCoverage The minimum number of training examples that must be covered by potential refinements - * @param numThreads The number of CPU threads to be used to search for potential refinements across multiple - * features in parallel - * @return True, if at least one refinement has been found, false otherwise - */ -template -static inline bool findRefinement(RefinementComparator& refinementComparator, IThresholdsSubset& thresholdsSubset, - const IIndexVector& featureIndices, const IIndexVector& labelIndices, - uint32 minCoverage, uint32 numThreads) { - bool foundRefinement = false; - - // For each feature, create an object of type `RuleRefinement`... - uint32 numFeatures = featureIndices.getNumElements(); - RuleRefinement* ruleRefinements = new RuleRefinement[numFeatures]; - - for (uint32 i = 0; i < numFeatures; i++) { - uint32 featureIndex = featureIndices.getIndex(i); - RuleRefinement& ruleRefinement = ruleRefinements[i]; - ruleRefinement.comparatorPtr = std::make_unique(refinementComparator); - ruleRefinement.ruleRefinementPtr = labelIndices.createRuleRefinement(thresholdsSubset, featureIndex); - } - - // Search for the best condition among all available features to be added to the current rule... -#pragma omp parallel for firstprivate(numFeatures) firstprivate(ruleRefinements) firstprivate(minCoverage) \ - schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numFeatures; i++) { - RuleRefinement& ruleRefinement = ruleRefinements[i]; - ruleRefinement.ruleRefinementPtr->findRefinement(*ruleRefinement.comparatorPtr, minCoverage); - } - - // Pick the best refinement among the refinements that have been found for the different features... - for (uint32 i = 0; i < numFeatures; i++) { - RuleRefinement& ruleRefinement = ruleRefinements[i]; - foundRefinement |= refinementComparator.merge(*ruleRefinement.comparatorPtr); - } - - delete[] ruleRefinements; - return foundRefinement; -} diff --git a/cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_greedy.cpp b/cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_greedy.cpp deleted file mode 100644 index 6cfae700..00000000 --- a/cpp/subprojects/common/src/common/rule_induction/rule_induction_top_down_greedy.cpp +++ /dev/null @@ -1,228 +0,0 @@ -#include "common/rule_induction/rule_induction_top_down_greedy.hpp" - -#include "common/math/math.hpp" -#include "common/util/validation.hpp" -#include "rule_induction_common.hpp" -#include "rule_induction_top_down_common.hpp" - -/** - * An implementation of the type `IRuleInduction` that allows to induce individual rules by using a greedy top-down - * search. - */ -class GreedyTopDownRuleInduction final : public AbstractRuleInduction { - private: - - const RuleCompareFunction ruleCompareFunction_; - - const uint32 minCoverage_; - - const uint32 maxConditions_; - - const uint32 maxHeadRefinements_; - - const uint32 numThreads_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that - * should be used for comparing the quality of different rules - * @param minCoverage The minimum number of training examples that must be covered by a rule. Must - * be at least 1 - * @param maxConditions The maximum number of conditions to be included in a rule's body. Must be at - * least 1 or 0, if the number of conditions should not be restricted - * @param maxHeadRefinements The maximum number of times, the head of a rule may be refinement after a - * new condition has been added to its body. Must be at least 1 or 0, if the - * number of refinements should not be restricted - * @param recalculatePredictions True, if the predictions of rules should be recalculated on all training - * examples, if some of the examples have zero weights, false otherwise - * @param numThreads The number of CPU threads to be used to search for potential refinements of - * a rule in parallel. Must be at least 1 - */ - GreedyTopDownRuleInduction(RuleCompareFunction ruleCompareFunction, uint32 minCoverage, uint32 maxConditions, - uint32 maxHeadRefinements, bool recalculatePredictions, uint32 numThreads) - : AbstractRuleInduction(recalculatePredictions), ruleCompareFunction_(ruleCompareFunction), - minCoverage_(minCoverage), maxConditions_(maxConditions), maxHeadRefinements_(maxHeadRefinements), - numThreads_(numThreads) {} - - protected: - - std::unique_ptr growRule( - IThresholds& thresholds, const IIndexVector& labelIndices, const IWeightVector& weights, - IPartition& partition, IFeatureSampling& featureSampling, RNG& rng, - std::unique_ptr& conditionListPtr, - std::unique_ptr& headPtr) const override { - // The label indices for which the next refinement of the rule may predict - const IIndexVector* currentLabelIndices = &labelIndices; - // A list that contains the conditions in the rule's body (in the order they have been learned) - conditionListPtr = std::make_unique(); - // The comparator that is used to keep track of the best refinement of the rule - SingleRefinementComparator refinementComparator(ruleCompareFunction_); - // Whether a refinement of the current rule has been found - bool foundRefinement = true; - - // Create a new subset of the given thresholds... - std::unique_ptr thresholdsSubsetPtr = weights.createThresholdsSubset(thresholds); - - // Search for the best refinement until no improvement in terms of the rule's quality is possible anymore or - // until the maximum number of conditions has been reached... - while (foundRefinement && (maxConditions_ == 0 || conditionListPtr->getNumConditions() < maxConditions_)) { - // Sample features... - const IIndexVector& sampledFeatureIndices = featureSampling.sample(rng); - - // Search for the best refinement... - foundRefinement = findRefinement(refinementComparator, *thresholdsSubsetPtr, sampledFeatureIndices, - *currentLabelIndices, minCoverage_, numThreads_); - - if (foundRefinement) { - Refinement& bestRefinement = *refinementComparator.begin(); - - // Sort the rule's predictions by the corresponding label indices... - bestRefinement.headPtr->sort(); - - // Filter the current subset of thresholds by applying the best refinement that has been found... - thresholdsSubsetPtr->filterThresholds(bestRefinement); - - // Add the new condition... - conditionListPtr->addCondition(bestRefinement); - - // Keep the labels for which the rule predicts, if the head should not be further refined... - if (maxHeadRefinements_ > 0 && conditionListPtr->getNumConditions() >= maxHeadRefinements_) { - currentLabelIndices = bestRefinement.headPtr.get(); - } - - // Abort refinement process if the rule is not allowed to cover less examples... - if (bestRefinement.numCovered <= minCoverage_) { - break; - } - } - } - - Refinement& bestRefinement = *refinementComparator.begin(); - headPtr = std::move(bestRefinement.headPtr); - return thresholdsSubsetPtr; - } -}; - -/** - * Allows to create instances of the type `IRuleInduction` that induce individual rules by using a greedy top-down - * search, where new conditions are added iteratively to the (initially empty) body of a rule. At each iteration, the - * refinement that improves the rule the most is chosen. The search stops if no refinement results in an improvement. - */ -class GreedyTopDownRuleInductionFactory final : public IRuleInductionFactory { - private: - - const RuleCompareFunction ruleCompareFunction_; - - const uint32 minCoverage_; - - const uint32 maxConditions_; - - const uint32 maxHeadRefinements_; - - const bool recalculatePredictions_; - - const uint32 numThreads_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that - * should be used for comparing the quality of different rules - * @param minCoverage The minimum number of training examples that must be covered by a rule. Must - * be at least 1 - * @param maxConditions The maximum number of conditions to be included in a rule's body. Must be at - * least 1 or 0, if the number of conditions should not be restricted - * @param maxHeadRefinements The maximum number of times, the head of a rule may be refined after a new - * condition has been added to its body. Must be at least 1 or 0, if the number - * of refinements should not be restricted - * @param recalculatePredictions True, if the predictions of rules should be recalculated on all training - * examples, if some of the examples have zero weights, false otherwise - * @param numThreads The number of CPU threads to be used to search for potential refinements of - * a rule in parallel. Must be at least 1 - */ - GreedyTopDownRuleInductionFactory(RuleCompareFunction ruleCompareFunction, uint32 minCoverage, - uint32 maxConditions, uint32 maxHeadRefinements, bool recalculatePredictions, - uint32 numThreads) - : ruleCompareFunction_(ruleCompareFunction), minCoverage_(minCoverage), maxConditions_(maxConditions), - maxHeadRefinements_(maxHeadRefinements), recalculatePredictions_(recalculatePredictions), - numThreads_(numThreads) {} - - std::unique_ptr create() const override { - return std::make_unique(ruleCompareFunction_, minCoverage_, maxConditions_, - maxHeadRefinements_, recalculatePredictions_, - numThreads_); - } -}; - -GreedyTopDownRuleInductionConfig::GreedyTopDownRuleInductionConfig( - RuleCompareFunction ruleCompareFunction, const std::unique_ptr& multiThreadingConfigPtr) - : ruleCompareFunction_(ruleCompareFunction), minCoverage_(1), minSupport_(0.0f), maxConditions_(0), - maxHeadRefinements_(1), recalculatePredictions_(true), multiThreadingConfigPtr_(multiThreadingConfigPtr) {} - -uint32 GreedyTopDownRuleInductionConfig::getMinCoverage() const { - return minCoverage_; -} - -IGreedyTopDownRuleInductionConfig& GreedyTopDownRuleInductionConfig::setMinCoverage(uint32 minCoverage) { - assertGreaterOrEqual("minCoverage", minCoverage, 1); - minCoverage_ = minCoverage; - return *this; -} - -float32 GreedyTopDownRuleInductionConfig::getMinSupport() const { - return minSupport_; -} - -IGreedyTopDownRuleInductionConfig& GreedyTopDownRuleInductionConfig::setMinSupport(float32 minSupport) { - if (minSupport != 0) assertGreater("minSupport", minSupport, 0); - if (minSupport != 0) assertLess("minSupport", minSupport, 1); - minSupport_ = minSupport; - return *this; -} - -uint32 GreedyTopDownRuleInductionConfig::getMaxConditions() const { - return maxConditions_; -} - -IGreedyTopDownRuleInductionConfig& GreedyTopDownRuleInductionConfig::setMaxConditions(uint32 maxConditions) { - if (maxConditions != 0) assertGreaterOrEqual("maxConditions", maxConditions, 1); - maxConditions_ = maxConditions; - return *this; -} - -uint32 GreedyTopDownRuleInductionConfig::getMaxHeadRefinements() const { - return maxHeadRefinements_; -} - -IGreedyTopDownRuleInductionConfig& GreedyTopDownRuleInductionConfig::setMaxHeadRefinements(uint32 maxHeadRefinements) { - if (maxHeadRefinements != 0) assertGreaterOrEqual("maxHeadRefinements", maxHeadRefinements, 1); - maxHeadRefinements_ = maxHeadRefinements; - return *this; -} - -bool GreedyTopDownRuleInductionConfig::arePredictionsRecalculated() const { - return recalculatePredictions_; -} - -IGreedyTopDownRuleInductionConfig& GreedyTopDownRuleInductionConfig::setRecalculatePredictions( - bool recalculatePredictions) { - recalculatePredictions_ = recalculatePredictions; - return *this; -} - -std::unique_ptr GreedyTopDownRuleInductionConfig::createRuleInductionFactory( - const IFeatureMatrix& featureMatrix, const ILabelMatrix& labelMatrix) const { - uint32 numExamples = featureMatrix.getNumRows(); - uint32 minCoverage; - - if (minSupport_ > 0) { - minCoverage = calculateBoundedFraction(numExamples, minSupport_, minCoverage_, numExamples); - } else { - minCoverage = std::min(numExamples, minCoverage_); - } - - uint32 numThreads = multiThreadingConfigPtr_->getNumThreads(featureMatrix, labelMatrix.getNumCols()); - return std::make_unique( - ruleCompareFunction_, minCoverage, maxConditions_, maxHeadRefinements_, recalculatePredictions_, numThreads); -} diff --git a/cpp/subprojects/common/src/common/rule_model_assemblage/default_rule.cpp b/cpp/subprojects/common/src/common/rule_model_assemblage/default_rule.cpp deleted file mode 100644 index 9b8e7d0c..00000000 --- a/cpp/subprojects/common/src/common/rule_model_assemblage/default_rule.cpp +++ /dev/null @@ -1,7 +0,0 @@ -#include "common/rule_model_assemblage/default_rule.hpp" - -DefaultRuleConfig::DefaultRuleConfig(bool useDefaultRule) : useDefaultRule_(useDefaultRule) {} - -bool DefaultRuleConfig::isDefaultRuleUsed(const IRowWiseLabelMatrix& labelMatrix) const { - return useDefaultRule_; -} diff --git a/cpp/subprojects/common/src/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp b/cpp/subprojects/common/src/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp deleted file mode 100644 index bb527a28..00000000 --- a/cpp/subprojects/common/src/common/rule_model_assemblage/rule_model_assemblage_sequential.cpp +++ /dev/null @@ -1,105 +0,0 @@ -#include "common/rule_model_assemblage/rule_model_assemblage_sequential.hpp" - -/** - * Allows to sequentially induce several rules, optionally starting with a default rule, that are added to a rule-based - * model. - */ -class SequentialRuleModelAssemblage final : public IRuleModelAssemblage { - private: - - const std::unique_ptr stoppingCriterionFactoryPtr_; - - const bool useDefaultRule_; - - public: - - /** - * @param stoppingCriterionFactoryPtr An unique pointer to an object of type `IStoppingCriterionFactory` that - * allows to create the implementations to be used to decide whether - * additional rules should be induced or not - * @param useDefaultRule True, if a default rule should be used, False otherwise - */ - SequentialRuleModelAssemblage(std::unique_ptr stoppingCriterionFactoryPtr, - bool useDefaultRule) - : stoppingCriterionFactoryPtr_(std::move(stoppingCriterionFactoryPtr)), useDefaultRule_(useDefaultRule) {} - - void induceRules(const IRuleInduction& ruleInduction, const IRulePruning& rulePruning, - const IPostProcessor& postProcessor, IPartition& partition, ILabelSampling& labelSampling, - IInstanceSampling& instanceSampling, IFeatureSampling& featureSampling, - IStatisticsProvider& statisticsProvider, IThresholds& thresholds, IModelBuilder& modelBuilder, - RNG& rng) const override { - uint32 numRules = useDefaultRule_ ? 1 : 0; - uint32 numUsedRules = 0; - - // Induce default rule, if necessary... - if (useDefaultRule_) { - ruleInduction.induceDefaultRule(statisticsProvider.get(), modelBuilder); - } - - statisticsProvider.switchToRegularRuleEvaluation(); - - // Induce the remaining rules... - std::unique_ptr stoppingCriterionPtr = - partition.createStoppingCriterion(*stoppingCriterionFactoryPtr_); - - while (true) { - IStoppingCriterion::Result stoppingCriterionResult = - stoppingCriterionPtr->test(statisticsProvider.get(), numRules); - - if (stoppingCriterionResult.numUsedRules != 0) { - numUsedRules = stoppingCriterionResult.numUsedRules; - } - - if (stoppingCriterionResult.stop) { - break; - } - - const IWeightVector& weights = instanceSampling.sample(rng); - const IIndexVector& labelIndices = labelSampling.sample(rng); - bool success = ruleInduction.induceRule(thresholds, labelIndices, weights, partition, featureSampling, - rulePruning, postProcessor, rng, modelBuilder); - - if (success) { - numRules++; - } else { - break; - } - } - - // Set the number of used rules... - modelBuilder.setNumUsedRules(numUsedRules); - } -}; - -/** - * A factory that allows to create instances of the class `IRuleModelAssemblage` that allow to sequentially induce - * several rules, optionally starting with a default rule, that are added to a rule-based model. - */ -class SequentialRuleModelAssemblageFactory final : public IRuleModelAssemblageFactory { - private: - - const bool useDefaultRule_; - - public: - - /** - * @param useDefaultRule True, if a default rule should be used, false otherwise - */ - SequentialRuleModelAssemblageFactory(bool useDefaultRule) : useDefaultRule_(useDefaultRule) {} - - std::unique_ptr create( - std::unique_ptr stoppingCriterionFactoryPtr) const override { - return std::make_unique(std::move(stoppingCriterionFactoryPtr), - useDefaultRule_); - } -}; - -SequentialRuleModelAssemblageConfig::SequentialRuleModelAssemblageConfig( - const std::unique_ptr& defaultRuleConfigPtr) - : defaultRuleConfigPtr_(defaultRuleConfigPtr) {} - -std::unique_ptr SequentialRuleModelAssemblageConfig::createRuleModelAssemblageFactory( - const IRowWiseLabelMatrix& labelMatrix) const { - bool useDefaultRule = defaultRuleConfigPtr_->isDefaultRuleUsed(labelMatrix); - return std::make_unique(useDefaultRule); -} diff --git a/cpp/subprojects/common/src/common/rule_pruning/rule_pruning_irep.cpp b/cpp/subprojects/common/src/common/rule_pruning/rule_pruning_irep.cpp deleted file mode 100644 index 8f957add..00000000 --- a/cpp/subprojects/common/src/common/rule_pruning/rule_pruning_irep.cpp +++ /dev/null @@ -1,104 +0,0 @@ -#include "common/rule_pruning/rule_pruning_irep.hpp" - -/** - * An implementation of the class `IRulePruning` that prunes rules by following the ideas of "incremental reduced error - * pruning" (IREP). - */ -class Irep final : public IRulePruning { - private: - - const RuleCompareFunction ruleCompareFunction_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that should be - * used for comparing the quality of different rules - */ - Irep(RuleCompareFunction ruleCompareFunction) : ruleCompareFunction_(ruleCompareFunction) {} - - std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, - ConditionList& conditions, - const AbstractPrediction& head) const override { - uint32 numConditions = conditions.getNumConditions(); - std::unique_ptr bestCoverageStatePtr; - - // Only rules with more than one condition can be pruned... - if (numConditions > 1) { - // Calculate the quality of the original rule on the prune set... - const ICoverageState& originalCoverageState = thresholdsSubset.getCoverageState(); - Quality bestQuality = partition.evaluateOutOfSample(thresholdsSubset, originalCoverageState, head); - - // Create a copy of the original coverage mask... - bestCoverageStatePtr = originalCoverageState.copy(); - - // Reset the given thresholds... - thresholdsSubset.resetThresholds(); - - // We process the existing rule's conditions (except for the last one) in the order they have been - // learned. At each iteration, we calculate the quality of a rule that only contains the conditions - // processed so far and keep track of the best rule... - ConditionList::const_iterator conditionIterator = conditions.cbegin(); - uint32 numPrunedConditions = 0; - - for (uint32 n = 1; n < numConditions; n++) { - // Filter the thresholds by applying the current condition... - const Condition& condition = *conditionIterator; - thresholdsSubset.filterThresholds(condition); - - // Calculate the quality of a rule that contains the conditions that have been processed so far... - const ICoverageState& coverageState = thresholdsSubset.getCoverageState(); - Quality quality = partition.evaluateOutOfSample(thresholdsSubset, coverageState, head); - - // Check if the quality is better than the best quality seen so far (reaching the same quality with - // fewer conditions is considered an improvement)... - if (ruleCompareFunction_.compare(quality, bestQuality) - || (numPrunedConditions == 0 && !ruleCompareFunction_.compare(bestQuality, quality))) { - bestQuality = quality; - bestCoverageStatePtr = coverageState.copy(); - numPrunedConditions = (numConditions - n); - } - - conditionIterator++; - } - - // Remove the pruned conditions... - while (numPrunedConditions > 0) { - conditions.removeLastCondition(); - numPrunedConditions--; - } - } - - return bestCoverageStatePtr; - } -}; - -/** - * Allows to create instances of the type `IRulePruning` that prune rules by following the ideas of "incremental reduced - * error pruning" (IREP). Given `n` conditions in the order of their induction, IREP may remove up to `n - 1` trailing - * conditions, depending on which of the resulting rules comes with the greatest improvement in terms of quality as - * measured on the prune set. - */ -class IrepFactory final : public IRulePruningFactory { - private: - - const RuleCompareFunction ruleCompareFunction_; - - public: - - /** - * @param ruleCompareFunction An object of type `RuleCompareFunction` that defines the function that should be - * used for comparing the quality of different rules - */ - IrepFactory(RuleCompareFunction ruleCompareFunction) : ruleCompareFunction_(ruleCompareFunction) {} - - std::unique_ptr create() const override { - return std::make_unique(ruleCompareFunction_); - } -}; - -IrepConfig::IrepConfig(RuleCompareFunction ruleCompareFunction) : ruleCompareFunction_(ruleCompareFunction) {} - -std::unique_ptr IrepConfig::createRulePruningFactory() const { - return std::make_unique(ruleCompareFunction_); -} diff --git a/cpp/subprojects/common/src/common/rule_pruning/rule_pruning_no.cpp b/cpp/subprojects/common/src/common/rule_pruning/rule_pruning_no.cpp deleted file mode 100644 index 1c63de1f..00000000 --- a/cpp/subprojects/common/src/common/rule_pruning/rule_pruning_no.cpp +++ /dev/null @@ -1,29 +0,0 @@ -#include "common/rule_pruning/rule_pruning_no.hpp" - -/** - * An implementation of the class `IRulePruning` that does not actually perform any pruning. - */ -class NoRulePruning final : public IRulePruning { - public: - - std::unique_ptr prune(IThresholdsSubset& thresholdsSubset, IPartition& partition, - ConditionList& conditions, - const AbstractPrediction& head) const override { - return nullptr; - } -}; - -/** - * Allows to create instances of the type `IRulePruning` that do not actually perform any pruning. - */ -class NoRulePruningFactory final : public IRulePruningFactory { - public: - - std::unique_ptr create() const override { - return std::make_unique(); - } -}; - -std::unique_ptr NoRulePruningConfig::createRulePruningFactory() const { - return std::make_unique(); -} diff --git a/cpp/subprojects/common/src/common/rule_refinement/prediction.cpp b/cpp/subprojects/common/src/common/rule_refinement/prediction.cpp deleted file mode 100644 index df8e844c..00000000 --- a/cpp/subprojects/common/src/common/rule_refinement/prediction.cpp +++ /dev/null @@ -1,35 +0,0 @@ -#include "common/rule_refinement/prediction.hpp" - -#include "common/data/arrays.hpp" - -AbstractPrediction::AbstractPrediction(uint32 numElements) : predictedScoreVector_(DenseVector(numElements)) {} - -uint32 AbstractPrediction::getNumElements() const { - return predictedScoreVector_.getNumElements(); -} - -AbstractPrediction::score_iterator AbstractPrediction::scores_begin() { - return predictedScoreVector_.begin(); -} - -AbstractPrediction::score_iterator AbstractPrediction::scores_end() { - return predictedScoreVector_.end(); -} - -AbstractPrediction::score_const_iterator AbstractPrediction::scores_cbegin() const { - return predictedScoreVector_.cbegin(); -} - -AbstractPrediction::score_const_iterator AbstractPrediction::scores_cend() const { - return predictedScoreVector_.cend(); -} - -void AbstractPrediction::set(AbstractPrediction::score_const_iterator begin, - AbstractPrediction::score_const_iterator end) { - copyArray(begin, predictedScoreVector_.begin(), predictedScoreVector_.getNumElements()); -} - -void AbstractPrediction::set(DenseBinnedVector::const_iterator begin, - DenseBinnedVector::const_iterator end) { - copyArray(begin, predictedScoreVector_.begin(), predictedScoreVector_.getNumElements()); -} diff --git a/cpp/subprojects/common/src/common/rule_refinement/prediction_complete.cpp b/cpp/subprojects/common/src/common/rule_refinement/prediction_complete.cpp deleted file mode 100644 index 1b8f43c5..00000000 --- a/cpp/subprojects/common/src/common/rule_refinement/prediction_complete.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "common/rule_refinement/prediction_complete.hpp" - -#include "common/data/arrays.hpp" -#include "common/model/head_complete.hpp" -#include "common/rule_refinement/rule_refinement.hpp" -#include "common/statistics/statistics.hpp" - -CompletePrediction::CompletePrediction(uint32 numElements) - : AbstractEvaluatedPrediction(numElements), indexVector_(CompleteIndexVector(numElements)) {} - -CompletePrediction::index_const_iterator CompletePrediction::indices_cbegin() const { - return indexVector_.cbegin(); -} - -CompletePrediction::index_const_iterator CompletePrediction::indices_cend() const { - return indexVector_.cend(); -} - -bool CompletePrediction::isPartial() const { - return false; -} - -uint32 CompletePrediction::getIndex(uint32 pos) const { - return indexVector_.getIndex(pos); -} - -std::unique_ptr CompletePrediction::createStatisticsSubset(const IStatistics& statistics, - const EqualWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr CompletePrediction::createStatisticsSubset(const IStatistics& statistics, - const BitWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr CompletePrediction::createStatisticsSubset( - const IStatistics& statistics, const DenseWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr CompletePrediction::createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr CompletePrediction::createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr CompletePrediction::createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector>& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr CompletePrediction::createRuleRefinement(IThresholdsSubset& thresholdsSubset, - uint32 featureIndex) const { - return indexVector_.createRuleRefinement(thresholdsSubset, featureIndex); -} - -void CompletePrediction::apply(IStatistics& statistics, uint32 statisticIndex) const { - statistics.applyPrediction(statisticIndex, *this); -} - -void CompletePrediction::revert(IStatistics& statistics, uint32 statisticIndex) const { - statistics.revertPrediction(statisticIndex, *this); -} - -void CompletePrediction::sort() {} - -std::unique_ptr CompletePrediction::createHead() const { - uint32 numElements = this->getNumElements(); - std::unique_ptr headPtr = std::make_unique(numElements); - copyArray(this->scores_cbegin(), headPtr->scores_begin(), numElements); - return headPtr; -} diff --git a/cpp/subprojects/common/src/common/rule_refinement/prediction_evaluated.cpp b/cpp/subprojects/common/src/common/rule_refinement/prediction_evaluated.cpp deleted file mode 100644 index a2229a4e..00000000 --- a/cpp/subprojects/common/src/common/rule_refinement/prediction_evaluated.cpp +++ /dev/null @@ -1,3 +0,0 @@ -#include "common/rule_refinement/prediction_evaluated.hpp" - -AbstractEvaluatedPrediction::AbstractEvaluatedPrediction(uint32 numElements) : AbstractPrediction(numElements) {} diff --git a/cpp/subprojects/common/src/common/rule_refinement/prediction_partial.cpp b/cpp/subprojects/common/src/common/rule_refinement/prediction_partial.cpp deleted file mode 100644 index a9ee406c..00000000 --- a/cpp/subprojects/common/src/common/rule_refinement/prediction_partial.cpp +++ /dev/null @@ -1,123 +0,0 @@ -#include "common/rule_refinement/prediction_partial.hpp" - -#include "common/data/arrays.hpp" -#include "common/data/vector_sparse_array.hpp" -#include "common/model/head_partial.hpp" -#include "common/rule_refinement/rule_refinement.hpp" -#include "common/statistics/statistics.hpp" - -PartialPrediction::PartialPrediction(uint32 numElements, bool sorted) - : AbstractEvaluatedPrediction(numElements), indexVector_(PartialIndexVector(numElements)), sorted_(sorted) {} - -PartialPrediction::index_iterator PartialPrediction::indices_begin() { - return indexVector_.begin(); -} - -PartialPrediction::index_iterator PartialPrediction::indices_end() { - return indexVector_.end(); -} - -PartialPrediction::index_const_iterator PartialPrediction::indices_cbegin() const { - return indexVector_.cbegin(); -} - -PartialPrediction::index_const_iterator PartialPrediction::indices_cend() const { - return indexVector_.cend(); -} - -void PartialPrediction::setNumElements(uint32 numElements, bool freeMemory) { - this->predictedScoreVector_.setNumElements(numElements, freeMemory); - indexVector_.setNumElements(numElements, freeMemory); -} - -void PartialPrediction::setSorted(bool sorted) { - sorted_ = sorted; -} - -bool PartialPrediction::isPartial() const { - return true; -} - -uint32 PartialPrediction::getIndex(uint32 pos) const { - return indexVector_.getIndex(pos); -} - -std::unique_ptr PartialPrediction::createStatisticsSubset(const IStatistics& statistics, - const EqualWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr PartialPrediction::createStatisticsSubset(const IStatistics& statistics, - const BitWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr PartialPrediction::createStatisticsSubset( - const IStatistics& statistics, const DenseWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr PartialPrediction::createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr PartialPrediction::createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr PartialPrediction::createStatisticsSubset( - const IStatistics& statistics, const OutOfSampleWeightVector>& weights) const { - return statistics.createSubset(indexVector_, weights); -} - -std::unique_ptr PartialPrediction::createRuleRefinement(IThresholdsSubset& thresholdsSubset, - uint32 featureIndex) const { - return indexVector_.createRuleRefinement(thresholdsSubset, featureIndex); -} - -void PartialPrediction::apply(IStatistics& statistics, uint32 statisticIndex) const { - statistics.applyPrediction(statisticIndex, *this); -} - -void PartialPrediction::revert(IStatistics& statistics, uint32 statisticIndex) const { - statistics.revertPrediction(statisticIndex, *this); -} - -void PartialPrediction::sort() { - if (!sorted_) { - uint32 numElements = this->getNumElements(); - - if (numElements > 1) { - SparseArrayVector sortedVector(numElements); - SparseArrayVector::iterator sortedIterator = sortedVector.begin(); - index_iterator indexIterator = this->indices_begin(); - score_iterator scoreIterator = this->scores_begin(); - - for (uint32 i = 0; i < numElements; i++) { - IndexedValue& entry = sortedIterator[i]; - entry.index = indexIterator[i]; - entry.value = scoreIterator[i]; - } - - std::sort(sortedIterator, sortedVector.end(), IndexedValue::CompareIndex()); - - for (uint32 i = 0; i < numElements; i++) { - const IndexedValue& entry = sortedIterator[i]; - indexIterator[i] = entry.index; - scoreIterator[i] = entry.value; - } - } - - sorted_ = true; - } -} - -std::unique_ptr PartialPrediction::createHead() const { - uint32 numElements = this->getNumElements(); - std::unique_ptr headPtr = std::make_unique(numElements); - copyArray(this->scores_cbegin(), headPtr->scores_begin(), numElements); - copyArray(this->indices_cbegin(), headPtr->indices_begin(), numElements); - return headPtr; -} diff --git a/cpp/subprojects/common/src/common/rule_refinement/refinement_comparator_fixed.cpp b/cpp/subprojects/common/src/common/rule_refinement/refinement_comparator_fixed.cpp deleted file mode 100644 index e41bdcaf..00000000 --- a/cpp/subprojects/common/src/common/rule_refinement/refinement_comparator_fixed.cpp +++ /dev/null @@ -1,125 +0,0 @@ -#include "common/rule_refinement/refinement_comparator_fixed.hpp" - -#include "common/rule_refinement/score_processor.hpp" - -#include - -FixedRefinementComparator::FixedRefinementComparator(RuleCompareFunction ruleCompareFunction, uint32 maxRefinements, - const Quality& minQuality) - : ruleCompareFunction_(ruleCompareFunction), maxRefinements_(maxRefinements), - refinements_(new Refinement[maxRefinements]), minQuality_(minQuality) { - order_.reserve(maxRefinements); -} - -FixedRefinementComparator::FixedRefinementComparator(RuleCompareFunction ruleCompareFunction, uint32 maxRefinements) - : FixedRefinementComparator(ruleCompareFunction, maxRefinements, ruleCompareFunction.minQuality) {} - -FixedRefinementComparator::FixedRefinementComparator(const FixedRefinementComparator& comparator) - : FixedRefinementComparator(comparator.ruleCompareFunction_, comparator.maxRefinements_, comparator.minQuality_) {} - -FixedRefinementComparator::~FixedRefinementComparator() { - delete[] refinements_; -} - -uint32 FixedRefinementComparator::getNumElements() const { - return (uint32) order_.size(); -} - -FixedRefinementComparator::iterator FixedRefinementComparator::begin() { - return order_.begin(); -} - -FixedRefinementComparator::iterator FixedRefinementComparator::end() { - return order_.end(); -} - -bool FixedRefinementComparator::isImprovement(const IScoreVector& scoreVector) const { - return ruleCompareFunction_.compare(scoreVector, minQuality_); -} - -void FixedRefinementComparator::pushRefinement(const Refinement& refinement, const IScoreVector& scoreVector) { - auto numRefinements = order_.size(); - - if (numRefinements < maxRefinements_) { - Refinement& newRefinement = refinements_[numRefinements]; - newRefinement = refinement; - ScoreProcessor scoreProcessor(newRefinement.headPtr); - scoreProcessor.processScores(scoreVector); - order_.push_back(newRefinement); - } else { - Refinement& worstRefinement = order_.back(); - worstRefinement = refinement; - ScoreProcessor scoreProcessor(worstRefinement.headPtr); - scoreProcessor.processScores(scoreVector); - } - - std::sort(order_.begin(), order_.end(), [=](const Refinement& a, const Refinement& b) { - return ruleCompareFunction_.compare(*a.headPtr, *b.headPtr); - }); - - const Refinement& worstRefinement = order_.back(); - minQuality_ = *worstRefinement.headPtr; -} - -bool FixedRefinementComparator::merge(FixedRefinementComparator& comparator) { - bool result = false; - Refinement* tmp = new Refinement[maxRefinements_]; - uint32 n = 0; - - auto it1 = order_.begin(); - auto end1 = order_.end(); - auto it2 = comparator.order_.begin(); - auto end2 = comparator.order_.end(); - - while (n < maxRefinements_ && it1 != end1 && it2 != end2) { - Refinement& refinement1 = *it1; - Refinement& refinement2 = *it2; - Refinement& newRefinement = tmp[n]; - - if (ruleCompareFunction_.compare(*refinement1.headPtr, *refinement2.headPtr)) { - newRefinement = refinement1; - newRefinement.headPtr = std::move(refinement1.headPtr); - it1++; - } else { - result = true; - newRefinement = refinement2; - newRefinement.headPtr = std::move(refinement2.headPtr); - it2++; - } - - n++; - } - - for (; n < maxRefinements_ && it1 != end1; it1++) { - Refinement& refinement = *it1; - Refinement& newRefinement = tmp[n]; - newRefinement = refinement; - newRefinement.headPtr = std::move(refinement.headPtr); - n++; - } - - for (; n < maxRefinements_ && it2 != end2; it2++) { - result = true; - Refinement& refinement = *it2; - Refinement& newRefinement = tmp[n]; - newRefinement = refinement; - newRefinement.headPtr = std::move(refinement.headPtr); - n++; - } - - order_.clear(); - - for (uint32 i = 0; i < n; i++) { - Refinement& newRefinement = tmp[i]; - order_.push_back(newRefinement); - } - - if (n > 0) { - const Refinement& worstRefinement = order_.back(); - minQuality_ = *worstRefinement.headPtr; - } - - delete[] refinements_; - refinements_ = tmp; - return result; -} diff --git a/cpp/subprojects/common/src/common/rule_refinement/refinement_comparator_single.cpp b/cpp/subprojects/common/src/common/rule_refinement/refinement_comparator_single.cpp deleted file mode 100644 index 3fd6b94f..00000000 --- a/cpp/subprojects/common/src/common/rule_refinement/refinement_comparator_single.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "common/rule_refinement/refinement_comparator_single.hpp" - -SingleRefinementComparator::SingleRefinementComparator(RuleCompareFunction ruleCompareFunction) - : ruleCompareFunction_(ruleCompareFunction), bestQuality_(ruleCompareFunction.minQuality), - scoreProcessor_(ScoreProcessor(bestRefinement_.headPtr)) {} - -SingleRefinementComparator::SingleRefinementComparator(const SingleRefinementComparator& comparator) - : ruleCompareFunction_(comparator.ruleCompareFunction_), bestQuality_(comparator.bestQuality_), - scoreProcessor_(ScoreProcessor(bestRefinement_.headPtr)) {} - -SingleRefinementComparator::iterator SingleRefinementComparator::begin() { - return &bestRefinement_; -} - -SingleRefinementComparator::iterator SingleRefinementComparator::end() { - return bestRefinement_.headPtr != nullptr ? &bestRefinement_ + 1 : &bestRefinement_; -} - -uint32 SingleRefinementComparator::getNumElements() const { - return bestRefinement_.headPtr != nullptr ? 1 : 0; -} - -bool SingleRefinementComparator::isImprovement(const IScoreVector& scoreVector) const { - return ruleCompareFunction_.compare(scoreVector, bestQuality_); -} - -void SingleRefinementComparator::pushRefinement(const Refinement& refinement, const IScoreVector& scoreVector) { - bestRefinement_ = refinement; - scoreProcessor_.processScores(scoreVector); - bestQuality_ = *bestRefinement_.headPtr; -} - -bool SingleRefinementComparator::merge(SingleRefinementComparator& comparator) { - if (ruleCompareFunction_.compare(comparator.bestQuality_, bestQuality_)) { - Refinement& otherRefinement = comparator.bestRefinement_; - bestRefinement_ = otherRefinement; - bestRefinement_.headPtr = std::move(otherRefinement.headPtr); - bestQuality_ = *bestRefinement_.headPtr; - return true; - } - - return false; -} diff --git a/cpp/subprojects/common/src/common/rule_refinement/rule_refinement_approximate.cpp b/cpp/subprojects/common/src/common/rule_refinement/rule_refinement_approximate.cpp deleted file mode 100644 index 16adafc6..00000000 --- a/cpp/subprojects/common/src/common/rule_refinement/rule_refinement_approximate.cpp +++ /dev/null @@ -1,376 +0,0 @@ -#include "common/rule_refinement/rule_refinement_approximate.hpp" - -#include - -template -static inline void findRefinementInternally(const IndexVector& labelIndices, uint32 numExamples, uint32 featureIndex, - bool nominal, uint32 minCoverage, - IRuleRefinementCallback& callback, - RefinementComparator& comparator) { - Refinement refinement; - refinement.featureIndex = featureIndex; - - // Invoke the callback... - IRuleRefinementCallback::Result callbackResult = callback.get(); - const IHistogram& statistics = callbackResult.statistics; - const ThresholdVector& thresholdVector = callbackResult.vector; - ThresholdVector::const_iterator thresholdIterator = thresholdVector.cbegin(); - uint32 numBins = thresholdVector.getNumElements(); - uint32 sparseBinIndex = thresholdVector.getSparseBinIndex(); - bool sparse = sparseBinIndex < numBins; - - // Create a new, empty subset of the statistics... - std::unique_ptr statisticsSubsetPtr = statistics.createSubset(labelIndices); - - for (auto it = thresholdVector.missing_indices_cbegin(); it != thresholdVector.missing_indices_cend(); it++) { - uint32 i = *it; - statisticsSubsetPtr->addToMissing(i); - } - - // In the following, we start by processing the bins in range [0, sparseBinIndex)... - uint32 numCovered = 0; - int64 firstR = 0; - int64 r; - - // Traverse bins in ascending order until the first bin with non-zero weight is encountered... - for (r = 0; r < sparseBinIndex; r++) { - uint32 weight = statistics.getBinWeight(r); - - if (weight > 0) { - // Add the bin to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(r); - numCovered += weight; - break; - } - } - - uint32 numAccumulated = numCovered; - - // Traverse the remaining bins in ascending order... - if (numCovered > 0) { - for (r = r + 1; r < sparseBinIndex; r++) { - uint32 weight = statistics.getBinWeight(r); - - // Do only consider bins that are not empty... - if (weight > 0) { - // Check if a condition that uses the <= operator (or the == operator in case of a nominal feature) - // covers at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.numCovered = numCovered; - refinement.covered = true; - refinement.threshold = thresholdIterator[r - 1]; - refinement.comparator = nominal ? EQ : LEQ; - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the > operator (or the != operator in case of a nominal feature) - // covers at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.numCovered = coverage; - refinement.covered = false; - refinement.threshold = thresholdIterator[r - 1]; - refinement.comparator = nominal ? NEQ : GR; - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Reset the subset in case of a nominal feature, as the previous bins will not be covered by the next - // condition... - if (nominal) { - statisticsSubsetPtr->resetSubset(); - numCovered = 0; - firstR = r; - } - - // Add the bin to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(r); - numCovered += weight; - numAccumulated += weight; - } - } - - // If any bins have been processed so far and if there is a sparse bin, we must evaluate additional conditions - // that separate the bins that have been iterated from the remaining ones (including the sparse bin)... - if (numCovered > 0 && sparse) { - // Check if a condition that uses the <= operator (or the == operator in case of a nominal feature) covers - // at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = sparseBinIndex; - refinement.numCovered = numCovered; - refinement.covered = true; - refinement.threshold = thresholdIterator[sparseBinIndex - 1]; - refinement.comparator = nominal ? EQ : LEQ; - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the > operator (or the != operator in case of a nominal feature) covers at - // least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = sparseBinIndex; - refinement.numCovered = coverage; - refinement.covered = false; - refinement.threshold = thresholdIterator[sparseBinIndex - 1]; - refinement.comparator = nominal ? NEQ : GR; - comparator.pushRefinement(refinement, scoreVector); - } - } - } - - // Reset the subset, if any bins have been processed... - statisticsSubsetPtr->resetSubset(); - } - - uint32 numAccumulatedPrevious = numAccumulated; - - // We continue by processing the bins in range (sparseBinIndex, numBins)... - numCovered = 0; - firstR = ((int64) numBins) - 1; - - // Traverse bins in descending order until the first bin with non-zero weight is encountered... - for (r = firstR; r > sparseBinIndex; r--) { - uint32 weight = statistics.getBinWeight(r); - - if (weight > 0) { - // Add the bin to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(r); - numCovered += weight; - break; - } - } - - numAccumulated = numCovered; - - // Traverse the remaining bins in descending order... - if (numCovered > 0) { - for (r = r - 1; r > sparseBinIndex; r--) { - uint32 weight = statistics.getBinWeight(r); - - // Do only consider bins that are not empty... - if (weight > 0) { - // Check if a condition that uses the > operator (or the == operator in case of a nominal feature) - // covers at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.numCovered = numCovered; - refinement.covered = true; - - if (nominal) { - refinement.threshold = thresholdIterator[firstR]; - refinement.comparator = EQ; - } else { - refinement.threshold = thresholdIterator[r]; - refinement.comparator = GR; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the <= operator (or the != operator in case of a nominal feature) - // covers at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.numCovered = coverage; - refinement.covered = false; - - if (nominal) { - refinement.threshold = thresholdIterator[firstR]; - refinement.comparator = NEQ; - } else { - refinement.threshold = thresholdIterator[r]; - refinement.comparator = LEQ; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Reset the subset in case of a nominal feature, as the previous bins will not be covered by the next - // condition... - if (nominal) { - statisticsSubsetPtr->resetSubset(); - numCovered = 0; - firstR = r; - } - - // Add the bin to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(r); - numCovered += weight; - numAccumulated += weight; - } - } - - // If there is a sparse bin, we must evaluate additional conditions that separate the bins in range - // (sparseBinIndex, numBins) from the remaining ones... - if (sparse) { - // Check if a condition that uses the > operator (or the == operator in case of a nominal feature) covers at - // least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = sparseBinIndex; - refinement.numCovered = numCovered; - refinement.covered = true; - - if (nominal) { - refinement.threshold = thresholdIterator[firstR]; - refinement.comparator = EQ; - } else { - refinement.threshold = thresholdIterator[sparseBinIndex]; - refinement.comparator = GR; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the <= operator (or the != operator in case of a nominal feature) covers - // at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = sparseBinIndex; - refinement.numCovered = coverage; - refinement.covered = false; - - if (nominal) { - refinement.threshold = thresholdIterator[firstR]; - refinement.comparator = NEQ; - } else { - refinement.threshold = thresholdIterator[sparseBinIndex]; - refinement.comparator = LEQ; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // If the feature is nominal and if any bins in the range [0, sparseBinIndex) have been processed earlier, - // we must test additional conditions that separate the sparse bin from the remaining bins... - if (nominal && numAccumulatedPrevious > 0) { - // Reset the subset once again to ensure that the accumulated state includes all bins that have been - // processed so far... - statisticsSubsetPtr->resetSubset(); - - // Check if the condition `f != thresholdIterator[sparseBinIndex]` covers at least `minCoverage` - // examples... - uint32 coverage = numExamples - numAccumulated - numAccumulatedPrevious; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresAccumulated(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = sparseBinIndex; - refinement.end = sparseBinIndex + 1; - refinement.numCovered = coverage; - refinement.covered = false; - refinement.threshold = thresholdIterator[sparseBinIndex]; - refinement.comparator = NEQ; - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if the condition `f == thresholdIterator[sparseBinIndex]` covers at least `minCoverage` - // examples... - coverage = numAccumulated + numAccumulatedPrevious; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncoveredAccumulated(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = sparseBinIndex; - refinement.end = sparseBinIndex + 1; - refinement.numCovered = coverage; - refinement.covered = true; - refinement.threshold = thresholdIterator[sparseBinIndex]; - refinement.comparator = EQ; - comparator.pushRefinement(refinement, scoreVector); - } - } - } - } - } -} - -template -ApproximateRuleRefinement::ApproximateRuleRefinement(const IndexVector& labelIndices, uint32 numExamples, - uint32 featureIndex, bool nominal, - std::unique_ptr callbackPtr) - : labelIndices_(labelIndices), numExamples_(numExamples), featureIndex_(featureIndex), nominal_(nominal), - callbackPtr_(std::move(callbackPtr)) {} - -template -void ApproximateRuleRefinement::findRefinement(SingleRefinementComparator& comparator, - uint32 minCoverage) { - findRefinementInternally(labelIndices_, numExamples_, featureIndex_, nominal_, minCoverage, *callbackPtr_, - comparator); -} - -template -void ApproximateRuleRefinement::findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) { - findRefinementInternally(labelIndices_, numExamples_, featureIndex_, nominal_, minCoverage, *callbackPtr_, - comparator); -} - -template class ApproximateRuleRefinement; -template class ApproximateRuleRefinement; diff --git a/cpp/subprojects/common/src/common/rule_refinement/rule_refinement_exact.cpp b/cpp/subprojects/common/src/common/rule_refinement/rule_refinement_exact.cpp deleted file mode 100644 index 63f360a5..00000000 --- a/cpp/subprojects/common/src/common/rule_refinement/rule_refinement_exact.cpp +++ /dev/null @@ -1,551 +0,0 @@ -#include "common/rule_refinement/rule_refinement_exact.hpp" - -#include "common/math/math.hpp" - -static inline uint32 upperBound(FeatureVector::const_iterator iterator, uint32 start, uint32 end, float32 threshold) { - while (start < end) { - uint32 pivot = start + ((end - start) / 2); - float32 featureValue = iterator[pivot].value; - - if (featureValue <= threshold) { - start = pivot + 1; - } else { - end = pivot; - } - } - - return start; -} - -static inline void adjustRefinement(Refinement& refinement, FeatureVector::const_iterator iterator) { - int64 previous = refinement.previous; - int64 end = refinement.end; - - if (std::abs(previous - end) > 1) { - if (end < previous) { - refinement.end = ((int64) upperBound(iterator, end + 1, previous, refinement.threshold)) - 1; - } else { - refinement.end = upperBound(iterator, previous + 1, end, refinement.threshold); - } - } -} - -template -static inline void findRefinementInternally( - const IndexIterator& labelIndices, uint32 numExamples, uint32 featureIndex, bool nominal, uint32 minCoverage, - bool hasZeroWeights, IRuleRefinementCallback& callback, - RefinementComparator& comparator) { - Refinement refinement; - refinement.featureIndex = featureIndex; - - // Invoke the callback... - IRuleRefinementCallback::Result callbackResult = callback.get(); - const IImmutableWeightedStatistics& statistics = callbackResult.statistics; - const FeatureVector& featureVector = callbackResult.vector; - FeatureVector::const_iterator featureVectorIterator = featureVector.cbegin(); - uint32 numFeatureValues = featureVector.getNumElements(); - - // Create a new, empty subset of the statistics... - std::unique_ptr statisticsSubsetPtr = statistics.createSubset(labelIndices); - - for (auto it = featureVector.missing_indices_cbegin(); it != featureVector.missing_indices_cend(); it++) { - uint32 i = *it; - statisticsSubsetPtr->addToMissing(i); - } - - // In the following, we start by processing all examples with feature values < 0... - uint32 numCovered = 0; - int64 firstR = 0; - int64 lastNegativeR = -1; - float32 previousThreshold = 0; - int64 previousR = 0; - int64 r; - - // Traverse examples with feature values < 0 in ascending order until the first example with non-zero weight is - // encountered... - for (r = 0; r < numFeatureValues; r++) { - float32 currentThreshold = featureVectorIterator[r].value; - - if (currentThreshold >= 0) { - break; - } - - lastNegativeR = r; - uint32 i = featureVectorIterator[r].index; - - if (statisticsSubsetPtr->hasNonZeroWeight(i)) { - // Add the example to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(i); - numCovered++; - previousThreshold = currentThreshold; - previousR = r; - break; - } - } - - uint32 numAccumulated = numCovered; - - // Traverse the remaining examples with feature values < 0 in ascending order... - if (numCovered > 0) { - for (r = r + 1; r < numFeatureValues; r++) { - float32 currentThreshold = featureVectorIterator[r].value; - - if (currentThreshold >= 0) { - break; - } - - lastNegativeR = r; - uint32 i = featureVectorIterator[r].index; - - // Do only consider examples that are included in the current sub-sample... - if (statisticsSubsetPtr->hasNonZeroWeight(i)) { - // Thresholds that separate between examples with the same feature value must not be considered... - if (previousThreshold != currentThreshold) { - // Check if a condition that uses the <= operator (or the == operator in case of a nominal feature) - // covers at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.previous = previousR; - refinement.numCovered = numCovered; - refinement.covered = true; - - if (nominal) { - refinement.comparator = EQ; - refinement.threshold = previousThreshold; - } else { - refinement.comparator = LEQ; - refinement.threshold = arithmeticMean(previousThreshold, currentThreshold); - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the > operator (or the != operator in case of a nominal feature) - // covers at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.previous = previousR; - refinement.numCovered = coverage; - refinement.covered = false; - - if (nominal) { - refinement.comparator = NEQ; - refinement.threshold = previousThreshold; - } else { - refinement.comparator = GR; - refinement.threshold = arithmeticMean(previousThreshold, currentThreshold); - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Reset the subset in case of a nominal feature, as the previous examples will not be covered by - // the next condition... - if (nominal) { - statisticsSubsetPtr->resetSubset(); - numCovered = 0; - firstR = r; - } - } - - previousThreshold = currentThreshold; - previousR = r; - - // Add the example to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(i); - numCovered++; - numAccumulated++; - } - } - - // If the feature is nominal and the examples that have been iterated so far do not have the same feature value, - // or if not all examples have been iterated so far, we must evaluate additional conditions - // `f == previousThreshold` and `f != previousThreshold`... - if (nominal && numCovered > 0 && (numCovered < numAccumulated || numAccumulated < numExamples)) { - // Check if a condition that uses the == operator covers at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = (lastNegativeR + 1); - refinement.previous = previousR; - refinement.numCovered = numCovered; - refinement.covered = true; - refinement.comparator = EQ; - refinement.threshold = previousThreshold; - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the != operator covers at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = (lastNegativeR + 1); - refinement.previous = previousR; - refinement.numCovered = coverage; - refinement.covered = false; - refinement.comparator = NEQ; - refinement.threshold = previousThreshold; - comparator.pushRefinement(refinement, scoreVector); - } - } - } - - // Reset the subset, if any examples with feature value < 0 have been processed... - statisticsSubsetPtr->resetSubset(); - } - - float32 previousThresholdNegative = previousThreshold; - int64 previousRNegative = previousR; - uint32 numAccumulatedNegative = numAccumulated; - - // We continue by processing all examples with feature values >= 0... - numCovered = 0; - firstR = ((int64) numFeatureValues) - 1; - - // Traverse examples with feature values >= 0 in descending order until the first example with non-zero weight is - // encountered... - for (r = firstR; r > lastNegativeR; r--) { - uint32 i = featureVectorIterator[r].index; - - if (statisticsSubsetPtr->hasNonZeroWeight(i)) { - // Add the example to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(i); - numCovered++; - previousThreshold = featureVectorIterator[r].value; - previousR = r; - break; - } - } - - numAccumulated = numCovered; - - // Traverse the remaining examples with feature values >= 0 in descending order... - if (numCovered > 0) { - for (r = r - 1; r > lastNegativeR; r--) { - uint32 i = featureVectorIterator[r].index; - - // Do only consider examples that are included in the current sub-sample... - if (statisticsSubsetPtr->hasNonZeroWeight(i)) { - float32 currentThreshold = featureVectorIterator[r].value; - - // Thresholds that separate between examples with the same feature value must not be considered... - if (previousThreshold != currentThreshold) { - // Check if a condition that uses the > operator (or the == operator in case of a nominal feature) - // covers at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.previous = previousR; - refinement.numCovered = numCovered; - refinement.covered = true; - - if (nominal) { - refinement.comparator = EQ; - refinement.threshold = previousThreshold; - } else { - refinement.comparator = GR; - refinement.threshold = arithmeticMean(currentThreshold, previousThreshold); - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the <= operator (or the != operator in case of a nominal feature) - // covers at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = r; - refinement.previous = previousR; - refinement.numCovered = coverage; - refinement.covered = false; - - if (nominal) { - refinement.comparator = NEQ; - refinement.threshold = previousThreshold; - } else { - refinement.comparator = LEQ; - refinement.threshold = arithmeticMean(currentThreshold, previousThreshold); - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Reset the subset in case of a nominal feature, as the previous examples will not be covered by - // the next condition... - if (nominal) { - statisticsSubsetPtr->resetSubset(); - numCovered = 0; - firstR = r; - } - } - - previousThreshold = currentThreshold; - previousR = r; - - // Add the example to the subset to mark it as covered by upcoming refinements... - statisticsSubsetPtr->addToSubset(i); - numCovered++; - numAccumulated++; - } - } - } - - // If the feature is nominal and the examples with feature values >= 0 that have been iterated so far do not all - // have the same feature value, we must evaluate additional conditions `f == previousThreshold` and - // `f != previousThreshold`... - if (nominal && numCovered > 0 && numCovered < numAccumulated) { - // Check if a condition that uses the == operator covers at least `minCoverage` examples... - if (numCovered >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = lastNegativeR; - refinement.previous = previousR; - refinement.numCovered = numCovered; - refinement.covered = true; - refinement.comparator = EQ; - refinement.threshold = previousThreshold; - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the != operator covers at least `minCoverage` examples... - uint32 coverage = numExamples - numCovered; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.end = lastNegativeR; - refinement.previous = previousR; - refinement.numCovered = coverage; - refinement.covered = false; - refinement.comparator = NEQ; - refinement.threshold = previousThreshold; - comparator.pushRefinement(refinement, scoreVector); - } - } - } - - uint32 numAccumulatedTotal = numAccumulatedNegative + numAccumulated; - - // If the number of all examples that have been iterated so far (including those with feature values < 0 and those - // with feature values >= 0) is less than the total number of examples, this means that there are examples with - // sparse, i.e. zero, feature values. In such case, we must explicitly test conditions that separate these examples - // from the ones that have already been iterated... - if (numAccumulatedTotal > 0 && numAccumulatedTotal < numExamples) { - // If the feature is nominal, we must reset the subset once again to ensure that the accumulated state includes - // all examples that have been processed so far... - if (nominal) { - statisticsSubsetPtr->resetSubset(); - firstR = ((int64) numFeatureValues) - 1; - } - - // Check if the condition `f > previousThreshold / 2` (or `f != 0` in case of a nominal feature) covers at least - // `minCoverage` examples... - uint32 coverage = nominal ? numAccumulatedTotal : numAccumulated; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = - nominal ? statisticsSubsetPtr->calculateScoresAccumulated() : statisticsSubsetPtr->calculateScores(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.covered = true; - refinement.numCovered = coverage; - - if (nominal) { - refinement.end = -1; - refinement.previous = -1; - refinement.comparator = NEQ; - refinement.threshold = 0.0; - } else { - refinement.end = lastNegativeR; - refinement.previous = previousR; - refinement.comparator = GR; - refinement.threshold = previousThreshold * 0.5; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if the condition `f <= previousThreshold / 2` (or `f == 0` in case of a nominal feature) covers at - // least `minCoverage` examples... - coverage = numExamples - (nominal ? numAccumulatedTotal : numAccumulated); - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = nominal ? statisticsSubsetPtr->calculateScoresUncoveredAccumulated() - : statisticsSubsetPtr->calculateScoresUncovered(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = firstR; - refinement.covered = false; - refinement.numCovered = coverage; - - if (nominal) { - refinement.end = -1; - refinement.previous = -1; - refinement.comparator = EQ; - refinement.threshold = 0.0; - } else { - refinement.end = lastNegativeR; - refinement.previous = previousR; - refinement.numCovered = (numExamples - numAccumulated); - refinement.comparator = LEQ; - refinement.threshold = previousThreshold * 0.5; - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - } - - // If the feature is numerical and there are other examples than those with feature values < 0 that have been - // processed earlier, we must evaluate additional conditions that separate the examples with feature values < 0 from - // the remaining ones (unlike in the nominal case, these conditions cannot be evaluated earlier, because it remains - // unclear what the thresholds of the conditions should be until the examples with feature values >= 0 have been - // processed). - if (!nominal && numAccumulatedNegative > 0 && numAccumulatedNegative < numExamples) { - // Check if a condition that uses the <= operator covers at least `minCoverage` examples... - if (numAccumulatedNegative >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresAccumulated(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = 0; - refinement.end = (lastNegativeR + 1); - refinement.previous = previousRNegative; - refinement.numCovered = numAccumulatedNegative; - refinement.covered = true; - refinement.comparator = LEQ; - - if (numAccumulatedTotal < numExamples) { - // If the condition separates an example with feature value < 0 from an (sparse) example with - // feature value == 0 - refinement.threshold = previousThresholdNegative * 0.5; - } else { - // If the condition separates an example with feature value < 0 from an example with feature value - // > 0 - refinement.threshold = arithmeticMean(previousThresholdNegative, previousThreshold); - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - - // Check if a condition that uses the > operator covers at least `minCoverage` examples... - uint32 coverage = numExamples - numAccumulatedNegative; - - if (coverage >= minCoverage) { - // Determine the best prediction for the covered examples... - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScoresUncoveredAccumulated(); - - // Check if the quality of the prediction is better than the quality of the current rule... - if (comparator.isImprovement(scoreVector)) { - refinement.start = 0; - refinement.end = (lastNegativeR + 1); - refinement.previous = previousRNegative; - refinement.numCovered = coverage; - refinement.covered = false; - refinement.comparator = GR; - - if (numAccumulatedTotal < numExamples) { - // If the condition separates an example with feature value < 0 from an (sparse) example with - // feature value == 0 - refinement.threshold = previousThresholdNegative * 0.5; - } else { - // If the condition separates an example with feature value < 0 from an example with feature value - // > 0 - refinement.threshold = arithmeticMean(previousThresholdNegative, previousThreshold); - } - - comparator.pushRefinement(refinement, scoreVector); - } - } - } - - // If there are examples with zero weights, those examples have not been considered when searching for potential - // refinements. In this case, we need to identify the examples that are covered by a refinement, including those - // that have previously been ignored, and adjust the value `refinement.end`, which specifies the position that - // separates the covered from the uncovered examples, accordingly. - if (hasZeroWeights) { - for (auto it = comparator.begin(); it != comparator.end(); it++) { - adjustRefinement(*it, featureVectorIterator); - } - } -} - -template -ExactRuleRefinement::ExactRuleRefinement(const IndexVector& labelIndices, uint32 numExamples, - uint32 featureIndex, bool nominal, bool hasZeroWeights, - std::unique_ptr callbackPtr) - : labelIndices_(labelIndices), numExamples_(numExamples), featureIndex_(featureIndex), nominal_(nominal), - hasZeroWeights_(hasZeroWeights), callbackPtr_(std::move(callbackPtr)) {} - -template -void ExactRuleRefinement::findRefinement(SingleRefinementComparator& comparator, uint32 minCoverage) { - findRefinementInternally(labelIndices_, numExamples_, featureIndex_, nominal_, minCoverage, hasZeroWeights_, - *callbackPtr_, comparator); -} - -template -void ExactRuleRefinement::findRefinement(FixedRefinementComparator& comparator, uint32 minCoverage) { - findRefinementInternally(labelIndices_, numExamples_, featureIndex_, nominal_, minCoverage, hasZeroWeights_, - *callbackPtr_, comparator); -} - -template class ExactRuleRefinement; -template class ExactRuleRefinement; diff --git a/cpp/subprojects/common/src/common/rule_refinement/score_processor.cpp b/cpp/subprojects/common/src/common/rule_refinement/score_processor.cpp deleted file mode 100644 index 571becc1..00000000 --- a/cpp/subprojects/common/src/common/rule_refinement/score_processor.cpp +++ /dev/null @@ -1,65 +0,0 @@ -#include "common/rule_refinement/score_processor.hpp" - -#include "common/data/arrays.hpp" -#include "common/rule_refinement/prediction_complete.hpp" -#include "common/rule_refinement/prediction_partial.hpp" - -template -static inline void processCompleteScores(std::unique_ptr& existingHeadPtr, - const T& scoreVector) { - uint32 numElements = scoreVector.getNumElements(); - - if (!existingHeadPtr) { - // Create a new head, if necessary... - existingHeadPtr = std::make_unique(numElements); - } - - copyArray(scoreVector.scores_cbegin(), existingHeadPtr->scores_begin(), numElements); - existingHeadPtr->quality = scoreVector.quality; -} - -template -static inline void processPartialScores(std::unique_ptr& existingHeadPtr, - const T& scoreVector) { - PartialPrediction* existingHead = dynamic_cast(existingHeadPtr.get()); - uint32 numElements = scoreVector.getNumElements(); - - if (!existingHead) { - // Create a new head, if necessary... - existingHeadPtr = std::make_unique(numElements, scoreVector.isSorted()); - existingHead = (PartialPrediction*) existingHeadPtr.get(); - } else { - // Adjust the size of the existing head, if necessary... - if (existingHead->getNumElements() != numElements) { - existingHead->setNumElements(numElements, false); - } - - existingHead->setSorted(scoreVector.isSorted()); - } - - copyArray(scoreVector.scores_cbegin(), existingHead->scores_begin(), numElements); - copyArray(scoreVector.indices_cbegin(), existingHead->indices_begin(), numElements); - existingHead->quality = scoreVector.quality; -} - -ScoreProcessor::ScoreProcessor(std::unique_ptr& headPtr) : headPtr_(headPtr) {} - -void ScoreProcessor::processScores(const DenseScoreVector& scoreVector) { - processCompleteScores(headPtr_, scoreVector); -} - -void ScoreProcessor::processScores(const DenseScoreVector& scoreVector) { - processPartialScores(headPtr_, scoreVector); -} - -void ScoreProcessor::processScores(const DenseBinnedScoreVector& scoreVector) { - processCompleteScores(headPtr_, scoreVector); -} - -void ScoreProcessor::processScores(const DenseBinnedScoreVector& scoreVector) { - processPartialScores(headPtr_, scoreVector); -} - -void ScoreProcessor::processScores(const IScoreVector& scoreVector) { - scoreVector.processScores(*this); -} diff --git a/cpp/subprojects/common/src/common/sampling/feature_sampling_no.cpp b/cpp/subprojects/common/src/common/sampling/feature_sampling_no.cpp deleted file mode 100644 index 47e21e64..00000000 --- a/cpp/subprojects/common/src/common/sampling/feature_sampling_no.cpp +++ /dev/null @@ -1,57 +0,0 @@ -#include "common/sampling/feature_sampling_no.hpp" - -#include "common/indices/index_vector_complete.hpp" -#include "common/sampling/feature_sampling_predefined.hpp" - -/** - * An implementation of the class `IFeatureSampling` that does not perform any sampling, but includes all features. - */ -class NoFeatureSampling final : public IFeatureSampling { - private: - - const CompleteIndexVector indexVector_; - - public: - - /** - * @param numFeatures The total number of available features - */ - NoFeatureSampling(uint32 numFeatures) : indexVector_(CompleteIndexVector(numFeatures)) {} - - const IIndexVector& sample(RNG& rng) override { - return indexVector_; - } - - std::unique_ptr createBeamSearchFeatureSampling(RNG& rng, bool resample) override { - return std::make_unique(indexVector_); - } -}; - -/** - * Allows to create instances of the type `IFeatureSampling` that do not perform any sampling, but include all features. - */ -class NoFeatureSamplingFactory final : public IFeatureSamplingFactory { - private: - - const uint32 numFeatures_; - - public: - - /** - * @param numFeatures The total number of available features - */ - NoFeatureSamplingFactory(uint32 numFeatures) : numFeatures_(numFeatures) {} - - std::unique_ptr create() const override { - return std::make_unique(numFeatures_); - } -}; - -std::unique_ptr NoFeatureSamplingConfig::createFeatureSamplingFactory( - const IFeatureMatrix& featureMatrix) const { - return std::make_unique(featureMatrix.getNumCols()); -} - -bool NoFeatureSamplingConfig::isSamplingUsed() const { - return false; -} diff --git a/cpp/subprojects/common/src/common/sampling/feature_sampling_predefined.cpp b/cpp/subprojects/common/src/common/sampling/feature_sampling_predefined.cpp deleted file mode 100644 index affc06a8..00000000 --- a/cpp/subprojects/common/src/common/sampling/feature_sampling_predefined.cpp +++ /dev/null @@ -1,11 +0,0 @@ -#include "common/sampling/feature_sampling_predefined.hpp" - -PredefinedFeatureSampling::PredefinedFeatureSampling(const IIndexVector& indexVector) : indexVector_(indexVector) {} - -const IIndexVector& PredefinedFeatureSampling::sample(RNG& rng) { - return indexVector_; -} - -std::unique_ptr PredefinedFeatureSampling::createBeamSearchFeatureSampling(RNG& rng, bool resample) { - return std::make_unique(indexVector_); -} diff --git a/cpp/subprojects/common/src/common/sampling/feature_sampling_without_replacement.cpp b/cpp/subprojects/common/src/common/sampling/feature_sampling_without_replacement.cpp deleted file mode 100644 index 22a13f50..00000000 --- a/cpp/subprojects/common/src/common/sampling/feature_sampling_without_replacement.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include "common/sampling/feature_sampling_without_replacement.hpp" - -#include "common/indices/index_vector_partial.hpp" -#include "common/iterator/index_iterator.hpp" -#include "common/sampling/feature_sampling_predefined.hpp" -#include "common/util/validation.hpp" -#include "index_sampling.hpp" - -/** - * Allows to select a subset of the available features without replacement. - */ -class FeatureSamplingWithoutReplacement final : public IFeatureSampling { - private: - - const uint32 numFeatures_; - - const uint32 numSamples_; - - const uint32 numRetained_; - - PartialIndexVector indexVector_; - - public: - - /** - * @param numFeatures The total number of available features - * @param numSamples The number of features to be included in the sample - * @param numRetained The number of trailing features to be always included in the sample - */ - FeatureSamplingWithoutReplacement(uint32 numFeatures, uint32 numSamples, uint32 numRetained) - : numFeatures_(numFeatures), numSamples_(numSamples), numRetained_(numRetained), - indexVector_(PartialIndexVector(numSamples + numRetained)) { - if (numRetained > 0) { - PartialIndexVector::iterator iterator = indexVector_.begin(); - uint32 offset = numFeatures - numRetained; - - for (uint32 i = 0; i < numRetained; i++) { - iterator[i] = offset + i; - } - } - } - - const IIndexVector& sample(RNG& rng) override { - uint32 numTotal = numFeatures_ - numRetained_; - sampleIndicesWithoutReplacement(&indexVector_.begin()[numRetained_], numSamples_, - IndexIterator(numTotal), numTotal, rng); - return indexVector_; - } - - std::unique_ptr createBeamSearchFeatureSampling(RNG& rng, bool resample) override { - if (resample) { - return std::make_unique(numFeatures_, numSamples_, numRetained_); - } else { - return std::make_unique(this->sample(rng)); - } - } -}; - -/** - * Allows to create instances of the type `IFeatureSampling` that select a random subset of the available features - * without replacement. - */ -class FeatureSamplingWithoutReplacementFactory final : public IFeatureSamplingFactory { - private: - - const uint32 numFeatures_; - - const uint32 numSamples_; - - const uint32 numRetained_; - - public: - - /** - * @param numFeatures The total number of available features - * @param numSamples The number of features to be included in the sample - * @param numRetained The number of trailing features to be always included in the sample - */ - FeatureSamplingWithoutReplacementFactory(uint32 numFeatures, uint32 numSamples, uint32 numRetained) - : numFeatures_(numFeatures), numSamples_(numSamples), numRetained_(numRetained) {} - - std::unique_ptr create() const override { - return std::make_unique(numFeatures_, numSamples_, numRetained_); - } -}; - -FeatureSamplingWithoutReplacementConfig::FeatureSamplingWithoutReplacementConfig() : sampleSize_(0), numRetained_(0) {} - -float32 FeatureSamplingWithoutReplacementConfig::getSampleSize() const { - return sampleSize_; -} - -IFeatureSamplingWithoutReplacementConfig& FeatureSamplingWithoutReplacementConfig::setSampleSize(float32 sampleSize) { - assertGreaterOrEqual("sampleSize", sampleSize, 0); - assertLess("sampleSize", sampleSize, 1); - sampleSize_ = sampleSize; - return *this; -} - -uint32 FeatureSamplingWithoutReplacementConfig::getNumRetained() const { - return numRetained_; -} - -IFeatureSamplingWithoutReplacementConfig& FeatureSamplingWithoutReplacementConfig::setNumRetained(uint32 numRetained) { - assertGreaterOrEqual("numRetained", numRetained, 0); - numRetained_ = numRetained; - return *this; -} - -std::unique_ptr FeatureSamplingWithoutReplacementConfig::createFeatureSamplingFactory( - const IFeatureMatrix& featureMatrix) const { - uint32 numFeatures = featureMatrix.getNumCols(); - uint32 numRetained = std::min(numRetained_, numFeatures); - uint32 numRemainingFeatures = numFeatures - numRetained; - uint32 numSamples = - (uint32) (sampleSize_ > 0 ? sampleSize_ * numRemainingFeatures : log2(numRemainingFeatures - 1) + 1); - return std::make_unique(numFeatures, numSamples, numRetained); -} - -bool FeatureSamplingWithoutReplacementConfig::isSamplingUsed() const { - return true; -} diff --git a/cpp/subprojects/common/src/common/sampling/index_sampling.hpp b/cpp/subprojects/common/src/common/sampling/index_sampling.hpp deleted file mode 100644 index cf20cae5..00000000 --- a/cpp/subprojects/common/src/common/sampling/index_sampling.hpp +++ /dev/null @@ -1,166 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/indices/index_vector_partial.hpp" - -#include - -/** - * Randomly selects `numSamples` out of `numTotal` indices without replacement by using a set to keep track of the - * indices that have already been selected. This method is suitable if `numSamples` is much smaller than `numTotal` - * - * @tparam TotalIterator The type of the iterator that provides random access to the available indices to sample from - * @param sampleIterator A `PartialIndexVector::iterator`, the sampled indices should be written to - * @param numSamples The number of indices to be sampled - * @param totalIterator An iterator that provides random access to the available indices to sample from - * @param numTotal The total number of available indices to sample from - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - */ -template -static inline void sampleIndicesWithoutReplacementViaTrackingSelection(PartialIndexVector::iterator sampleIterator, - uint32 numSamples, TotalIterator totalIterator, - uint32 numTotal, RNG& rng) { - std::unordered_set selectedIndices; - - for (uint32 i = 0; i < numSamples; i++) { - bool shouldContinue = true; - uint32 sampledIndex; - - while (shouldContinue) { - uint32 randomIndex = rng.random(0, numTotal); - sampledIndex = totalIterator[randomIndex]; - shouldContinue = !selectedIndices.insert(sampledIndex).second; - } - - sampleIterator[i] = sampledIndex; - } -} - -/** - * Randomly selects `numSamples` out of `numTotal` indices without replacement using a reservoir sampling algorithm. - * This method is suitable if `numSamples` is almost as large as `numTotal`. - * - * @tparam TotalIterator The type of the iterator that provides random access to the available indices to sample from - * @param sampleIterator A `PartialIndexVector::iterator`, the sampled indices should be written to - * @param numSamples The number of indices to be sampled - * @param totalIterator An iterator that provides random access to the available indices to sample from - * @param numTotal The total number of available indices to sample from - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - */ -template -static inline void sampleIndicesWithoutReplacementViaReservoirSampling(PartialIndexVector::iterator sampleIterator, - uint32 numSamples, TotalIterator totalIterator, - uint32 numTotal, RNG& rng) { - for (uint32 i = 0; i < numSamples; i++) { - sampleIterator[i] = totalIterator[i]; - } - - for (uint32 i = numSamples; i < numTotal; i++) { - uint32 randomIndex = rng.random(0, i + 1); - - if (randomIndex < numSamples) { - sampleIterator[randomIndex] = totalIterator[i]; - } - } -} - -/** - * Computes a random permutation of the indices that are contained by two mutually exclusive sets using the Fisher-Yates - * shuffle. - * - * @tparam FirstIterator The type of the iterator that provides random access to the indices that are contained by - * the first set - * @tparam SecondIterator The type of the iterator that provides random access to the indices that are contained by - * the second set - * @param firstIterator The iterator that provides random access to the indices that are contained by the first set - * @param secondIterator The iterator that provides random access to the indices that are contained by the second set - * @param numFirst The number of indices that are contained by the first set - * @param numTotal The total number of indices to sample from - * @param numPermutations The maximum number of permutations to be performed. Must be in [1, numTotal) - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - */ -template -static inline void randomPermutation(FirstIterator firstIterator, SecondIterator secondIterator, uint32 numFirst, - uint32 numTotal, uint32 numPermutations, RNG& rng) { - for (uint32 i = 0; i < numPermutations; i++) { - // Swap elements at index i and at a randomly selected index... - uint32 randomIndex = rng.random(i, numTotal); - uint32 tmp1 = i < numFirst ? firstIterator[i] : secondIterator[i - numFirst]; - uint32 tmp2; - - if (randomIndex < numFirst) { - tmp2 = firstIterator[randomIndex]; - firstIterator[randomIndex] = tmp1; - } else { - tmp2 = secondIterator[randomIndex - numFirst]; - secondIterator[randomIndex - numFirst] = tmp1; - } - - if (i < numFirst) { - firstIterator[i] = tmp2; - } else { - secondIterator[i - numFirst] = tmp2; - } - } -} - -/** - * Randomly selects `numSamples` out of `numTotal` indices without replacement by first generating a random permutation - * of the available indices and then returning the first `numSamples` indices. - * - * @tparam TotalIterator The type of the iterator that provides random access to the available indices to sample from - * @param sampleIterator A `PartialIndexVector::iterator`, the sampled indices should be written to - * @param numSamples The number of indices to be sampled - * @param totalIterator An iterator that provides random access to the available indices to sample from - * @param numTotal The total number of available indices to sample from - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - */ -template -static inline void sampleIndicesWithoutReplacementViaRandomPermutation(PartialIndexVector::iterator sampleIterator, - uint32 numSamples, TotalIterator totalIterator, - uint32 numTotal, RNG& rng) { - uint32* unusedIndices = new uint32[numTotal - numSamples]; - - for (uint32 i = 0; i < numSamples; i++) { - sampleIterator[i] = totalIterator[i]; - } - - for (uint32 i = numSamples; i < numTotal; i++) { - unusedIndices[i - numSamples] = totalIterator[i]; - } - - randomPermutation(sampleIterator, &unusedIndices[0], numSamples, numTotal, - numSamples, rng); - delete[] unusedIndices; -} - -/** - * Randomly selects `numSamples` out of `numTotal` indices without replacement. The method that is used internally is - * chosen automatically, depending on the ratio `numSamples / numTotal`. - * - * @tparam TotalIterator The type of the iterator that provides random access to the available indices to sample from - * @param sampleIterator A `PartialIndexVector::iterator`, the sampled indices should be written to - * @param numSamples The number of indices to be sampled - * @param totalIterator An iterator that provides random access to the available indices to sample from - * @param numTotal The total number of available indices to sample from - * @param rng A reference to an object of type `RNG`, implementing the random number generator to be used - */ -template -static inline void sampleIndicesWithoutReplacement(PartialIndexVector::iterator sampleIterator, uint32 numSamples, - TotalIterator totalIterator, uint32 numTotal, RNG& rng) { - float64 ratio = numTotal > 0 ? ((float64) numSamples) / ((float64) numTotal) : 1; - - // The thresholds for choosing a suitable method are based on empirical experiments - if (ratio < 0.06) { - // For very small ratios use tracking selection - sampleIndicesWithoutReplacementViaTrackingSelection(sampleIterator, numSamples, totalIterator, numTotal, rng); - } else if (ratio > 0.5) { - // For large ratios use reservoir sampling - sampleIndicesWithoutReplacementViaReservoirSampling(sampleIterator, numSamples, totalIterator, numTotal, rng); - } else { - // Otherwise, use random permutation as the default method - sampleIndicesWithoutReplacementViaRandomPermutation(sampleIterator, numSamples, totalIterator, numTotal, rng); - } -} diff --git a/cpp/subprojects/common/src/common/sampling/instance_sampling_no.cpp b/cpp/subprojects/common/src/common/sampling/instance_sampling_no.cpp deleted file mode 100644 index 0ce606b7..00000000 --- a/cpp/subprojects/common/src/common/sampling/instance_sampling_no.cpp +++ /dev/null @@ -1,87 +0,0 @@ -#include "common/sampling/instance_sampling_no.hpp" - -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/partition_single.hpp" -#include "common/sampling/weight_vector_bit.hpp" -#include "common/sampling/weight_vector_equal.hpp" - -static inline void sampleInternally(const SinglePartition& partition, EqualWeightVector& weightVector, RNG& rng) { - return; -} - -static inline void sampleInternally(BiPartition& partition, BitWeightVector& weightVector, RNG& rng) { - uint32 numTrainingExamples = partition.getNumFirst(); - BiPartition::const_iterator indexIterator = partition.first_cbegin(); - weightVector.clear(); - - for (uint32 i = 0; i < numTrainingExamples; i++) { - uint32 index = indexIterator[i]; - weightVector.set(index, true); - } - - weightVector.setNumNonZeroWeights(numTrainingExamples); -} - -/** - * An implementation of the class `IInstanceSampling` that does not perform any sampling, but assigns equal weights to - * all examples. - * - * @tparam Partition The type of the object that provides access to the indices of the examples that are included in - * the training set - * @tparam WeightVector The type of the weight vector that is used to store the weights - */ -template -class NoInstanceSampling final : public IInstanceSampling { - private: - - Partition& partition_; - - WeightVector weightVector_; - - public: - - /** - * @param partition A reference to an object of template type `Partition` that provides access to the indices of - * the examples that are included in the training set - */ - NoInstanceSampling(Partition& partition) - : partition_(partition), weightVector_(WeightVector(partition.getNumElements())) {} - - const IWeightVector& sample(RNG& rng) override { - sampleInternally(partition_, weightVector_, rng); - return weightVector_; - } -}; - -/** - * Allows to create instances of the type `IInstanceSampling` that do not perform any sampling, but assign equal weights - * to all examples. - */ -class NoInstanceSamplingFactory final : public IInstanceSamplingFactory { - public: - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, - const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition); - } - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition); - } -}; - -std::unique_ptr NoInstanceSamplingConfig::createInstanceSamplingFactory() const { - return std::make_unique(); -} diff --git a/cpp/subprojects/common/src/common/sampling/instance_sampling_stratified_example_wise.cpp b/cpp/subprojects/common/src/common/sampling/instance_sampling_stratified_example_wise.cpp deleted file mode 100644 index 44bcb3dd..00000000 --- a/cpp/subprojects/common/src/common/sampling/instance_sampling_stratified_example_wise.cpp +++ /dev/null @@ -1,113 +0,0 @@ -#include "common/sampling/instance_sampling_stratified_example_wise.hpp" - -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/partition_single.hpp" -#include "common/sampling/stratified_sampling_example_wise.hpp" -#include "common/util/validation.hpp" - -/** - * Implements stratified sampling, where distinct label vectors are treated as individual classes. - * - * @tparam LabelMatrix The type of the label matrix that provides random or row-wise access to the labels of the - * training examples - * @tparam IndexIterator The type of the iterator that provides access to the indices of the examples that are - * contained by the training set - */ -template -class ExampleWiseStratifiedSampling final : public IInstanceSampling { - private: - - const float32 sampleSize_; - - BitWeightVector weightVector_; - - const ExampleWiseStratification stratification_; - - public: - - /** - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides random or - * row-wise access to the labels of the training examples - * @param indicesBegin An iterator to the beginning of the indices of the examples that are contained by - * the training set - * @param indicesEnd An iterator to the end of the indices of the examples that are contained by the - * training set - * @param sampleSize The fraction of examples to be included in the sample (e.g. a value of 0.6 - * corresponds to 60 % of the available examples). Must be in (0, 1] - */ - ExampleWiseStratifiedSampling(const LabelMatrix& labelMatrix, IndexIterator indicesBegin, - IndexIterator indicesEnd, float32 sampleSize) - : sampleSize_(sampleSize), - weightVector_(BitWeightVector(labelMatrix.getNumRows(), - (uint32) (indicesEnd - indicesBegin) < labelMatrix.getNumRows())), - stratification_( - ExampleWiseStratification(labelMatrix, indicesBegin, indicesEnd)) {} - - const IWeightVector& sample(RNG& rng) override { - stratification_.sampleWeights(weightVector_, sampleSize_, rng); - return weightVector_; - } -}; - -/** - * Allows to create instances of the type `IInstanceSampling` that implement stratified sampling, where distinct label - * vectors are treated as individual classes. - */ -class ExampleWiseStratifiedInstanceSamplingFactory final : public IInstanceSamplingFactory { - private: - - const float32 sampleSize_; - - public: - - /** - * @param sampleSize The fraction of examples to be included in the sample (e.g. a value of 0.6 corresponds to - * 60 % of the available examples). Must be in (0, 1] - */ - ExampleWiseStratifiedInstanceSamplingFactory(float32 sampleSize) : sampleSize_(sampleSize) {} - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, - const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique< - ExampleWiseStratifiedSampling>( - labelMatrix, partition.cbegin(), partition.cend(), sampleSize_); - } - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>( - labelMatrix, partition.first_cbegin(), partition.first_cend(), sampleSize_); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique>( - labelMatrix, partition.cbegin(), partition.cend(), sampleSize_); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>( - labelMatrix, partition.first_cbegin(), partition.first_cend(), sampleSize_); - } -}; - -ExampleWiseStratifiedInstanceSamplingConfig::ExampleWiseStratifiedInstanceSamplingConfig() : sampleSize_(0.66f) {} - -float32 ExampleWiseStratifiedInstanceSamplingConfig::getSampleSize() const { - return sampleSize_; -} - -IExampleWiseStratifiedInstanceSamplingConfig& ExampleWiseStratifiedInstanceSamplingConfig::setSampleSize( - float32 sampleSize) { - assertGreater("sampleSize", sampleSize, 0); - assertLess("sampleSize", sampleSize, 1); - sampleSize_ = sampleSize; - return *this; -} - -std::unique_ptr ExampleWiseStratifiedInstanceSamplingConfig::createInstanceSamplingFactory() - const { - return std::make_unique(sampleSize_); -} diff --git a/cpp/subprojects/common/src/common/sampling/instance_sampling_stratified_label_wise.cpp b/cpp/subprojects/common/src/common/sampling/instance_sampling_stratified_label_wise.cpp deleted file mode 100644 index 0f5256ec..00000000 --- a/cpp/subprojects/common/src/common/sampling/instance_sampling_stratified_label_wise.cpp +++ /dev/null @@ -1,115 +0,0 @@ -#include "common/sampling/instance_sampling_stratified_label_wise.hpp" - -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/partition_single.hpp" -#include "common/sampling/stratified_sampling_label_wise.hpp" -#include "common/util/validation.hpp" - -/** - * Implements stratified sampling for selecting a subset of the available training examples, such that for each label - * the proportion of relevant and irrelevant examples is maintained. - * - * @tparam LabelMatrix The type of the label matrix that provides random or row-wise access to the labels of the - * training examples - * @tparam IndexIterator The type of the iterator that provides access to the indices of the examples that are - * contained by the training set - */ -template -class LabelWiseStratifiedSampling final : public IInstanceSampling { - private: - - const float32 sampleSize_; - - BitWeightVector weightVector_; - - const LabelWiseStratification stratification_; - - public: - - /** - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides random or - * row-wise access to the labels of the training examples - * @param indicesBegin An iterator to the beginning of the indices of the examples that are contained by - * the training set - * @param indicesEnd An iterator to the end of the indices of the examples that are contained by the - * training set - * @param sampleSize The fraction of examples to be included in the sample (e.g. a value of 0.6 - * corresponds to 60 % of the available examples). Must be in (0, 1] - */ - LabelWiseStratifiedSampling(const LabelMatrix& labelMatrix, IndexIterator indicesBegin, - IndexIterator indicesEnd, float32 sampleSize) - : sampleSize_(sampleSize), - weightVector_(BitWeightVector(labelMatrix.getNumRows(), - (uint32) (indicesEnd - indicesBegin) < labelMatrix.getNumRows())), - stratification_( - LabelWiseStratification(labelMatrix, indicesBegin, indicesEnd)) {} - - const IWeightVector& sample(RNG& rng) override { - stratification_.sampleWeights(weightVector_, sampleSize_, rng); - return weightVector_; - } -}; - -/** - * Allows to create instances of the type `IInstanceSampling` that implement stratified sampling for selecting a subset - * of the available training examples, such that for each label the proportion of relevant and irrelevant examples is - * maintained. - */ -class LabelWiseStratifiedInstanceSamplingFactory final : public IInstanceSamplingFactory { - private: - - const float32 sampleSize_; - - public: - - /** - * @param sampleSize The fraction of examples to be included in the sample (e.g. a value of 0.6 corresponds to - * 60 % of the available examples). Must be in (0, 1] - */ - LabelWiseStratifiedInstanceSamplingFactory(float32 sampleSize) : sampleSize_(sampleSize) {} - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, - const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique< - LabelWiseStratifiedSampling>( - labelMatrix, partition.cbegin(), partition.cend(), sampleSize_); - } - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>( - labelMatrix, partition.first_cbegin(), partition.first_cend(), sampleSize_); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique>( - labelMatrix, partition.cbegin(), partition.cend(), sampleSize_); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>( - labelMatrix, partition.first_cbegin(), partition.first_cend(), sampleSize_); - } -}; - -LabelWiseStratifiedInstanceSamplingConfig::LabelWiseStratifiedInstanceSamplingConfig() : sampleSize_(0.66f) {} - -float32 LabelWiseStratifiedInstanceSamplingConfig::getSampleSize() const { - return sampleSize_; -} - -ILabelWiseStratifiedInstanceSamplingConfig& LabelWiseStratifiedInstanceSamplingConfig::setSampleSize( - float32 sampleSize) { - assertGreater("sampleSize", sampleSize, 0); - assertLess("sampleSize", sampleSize, 1); - sampleSize_ = sampleSize; - return *this; -} - -std::unique_ptr LabelWiseStratifiedInstanceSamplingConfig::createInstanceSamplingFactory() - const { - return std::make_unique(sampleSize_); -} diff --git a/cpp/subprojects/common/src/common/sampling/instance_sampling_with_replacement.cpp b/cpp/subprojects/common/src/common/sampling/instance_sampling_with_replacement.cpp deleted file mode 100644 index a307f282..00000000 --- a/cpp/subprojects/common/src/common/sampling/instance_sampling_with_replacement.cpp +++ /dev/null @@ -1,148 +0,0 @@ -#include "common/sampling/instance_sampling_with_replacement.hpp" - -#include "common/data/arrays.hpp" -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/partition_single.hpp" -#include "common/sampling/weight_vector_dense.hpp" -#include "common/util/validation.hpp" - -static inline void sampleInternally(const SinglePartition& partition, float32 sampleSize, - DenseWeightVector& weightVector, RNG& rng) { - uint32 numExamples = partition.getNumElements(); - uint32 numSamples = (uint32) (sampleSize * numExamples); - typename DenseWeightVector::iterator weightIterator = weightVector.begin(); - setArrayToZeros(weightIterator, numExamples); - uint32 numNonZeroWeights = 0; - - for (uint32 i = 0; i < numSamples; i++) { - // Randomly select the index of an example... - uint32 randomIndex = rng.random(0, numExamples); - - // Update weight at the selected index... - uint32 previousWeight = weightIterator[randomIndex]; - weightIterator[randomIndex] = previousWeight + 1; - - if (previousWeight == 0) { - numNonZeroWeights++; - } - } - - weightVector.setNumNonZeroWeights(numNonZeroWeights); -} - -static inline void sampleInternally(BiPartition& partition, float32 sampleSize, DenseWeightVector& weightVector, - RNG& rng) { - uint32 numExamples = partition.getNumElements(); - uint32 numTrainingExamples = partition.getNumFirst(); - uint32 numSamples = (uint32) (sampleSize * numTrainingExamples); - BiPartition::const_iterator indexIterator = partition.first_cbegin(); - typename DenseWeightVector::iterator weightIterator = weightVector.begin(); - setArrayToZeros(weightIterator, numExamples); - uint32 numNonZeroWeights = 0; - - for (uint32 i = 0; i < numSamples; i++) { - // Randomly select the index of an example... - uint32 randomIndex = rng.random(0, numTrainingExamples); - uint32 sampledIndex = indexIterator[randomIndex]; - - // Update weight at the selected index... - uint32 previousWeight = weightIterator[sampledIndex]; - weightIterator[sampledIndex] = previousWeight + 1; - - if (previousWeight == 0) { - numNonZeroWeights++; - } - } - - weightVector.setNumNonZeroWeights(numNonZeroWeights); -} - -/** - * Allows to select a subset of the available training examples with replacement. - * - * @tparam Partition The type of the object that provides access to the indices of the examples that are included in the - * training set - */ -template -class InstanceSamplingWithReplacement final : public IInstanceSampling { - private: - - Partition& partition_; - - const float32 sampleSize_; - - DenseWeightVector weightVector_; - - public: - - /** - * @param partition A reference to an object of template type `Partition` that provides access to the indices - * of the examples that are included in the training set - * @param sampleSize The fraction of examples to be included in the sample (e.g. a value of 0.6 corresponds to - * 60 % of the available examples). Must be in (0, 1] - */ - InstanceSamplingWithReplacement(Partition& partition, float32 sampleSize) - : partition_(partition), sampleSize_(sampleSize), - weightVector_(DenseWeightVector(partition.getNumElements())) {} - - const IWeightVector& sample(RNG& rng) override { - sampleInternally(partition_, sampleSize_, weightVector_, rng); - return weightVector_; - } -}; - -/** - * Allows to create instances of the type `IInstanceSampling` that allow to select a subset of the available training - * examples with replacement. - */ -class InstanceSamplingWithReplacementFactory final : public IInstanceSamplingFactory { - private: - - const float32 sampleSize_; - - public: - - /** - * @param sampleSize The fraction of examples to be included in the sample (e.g. a value of 0.6 corresponds to - * 60 % of the available examples). Must be in (0, 1] - */ - InstanceSamplingWithReplacementFactory(float32 sampleSize) : sampleSize_(sampleSize) {} - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, - const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition, sampleSize_); - } - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition, sampleSize_); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition, sampleSize_); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition, sampleSize_); - } -}; - -InstanceSamplingWithReplacementConfig::InstanceSamplingWithReplacementConfig() : sampleSize_(0.66f) {} - -float32 InstanceSamplingWithReplacementConfig::getSampleSize() const { - return sampleSize_; -} - -IInstanceSamplingWithReplacementConfig& InstanceSamplingWithReplacementConfig::setSampleSize(float32 sampleSize) { - assertGreater("sampleSize", sampleSize, 0); - assertLessOrEqual("sampleSize", sampleSize, 1); - sampleSize_ = sampleSize; - return *this; -} - -std::unique_ptr InstanceSamplingWithReplacementConfig::createInstanceSamplingFactory() const { - return std::make_unique(sampleSize_); -} diff --git a/cpp/subprojects/common/src/common/sampling/instance_sampling_without_replacement.cpp b/cpp/subprojects/common/src/common/sampling/instance_sampling_without_replacement.cpp deleted file mode 100644 index ec430138..00000000 --- a/cpp/subprojects/common/src/common/sampling/instance_sampling_without_replacement.cpp +++ /dev/null @@ -1,114 +0,0 @@ -#include "common/sampling/instance_sampling_without_replacement.hpp" - -#include "common/iterator/index_iterator.hpp" -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/partition_single.hpp" -#include "common/sampling/weight_sampling.hpp" -#include "common/util/validation.hpp" - -static inline void sampleInternally(const SinglePartition& partition, float32 sampleSize, BitWeightVector& weightVector, - RNG& rng) { - uint32 numExamples = partition.getNumElements(); - uint32 numSamples = (uint32) (sampleSize * numExamples); - sampleWeightsWithoutReplacement(weightVector, partition.cbegin(), numExamples, - numSamples, rng); -} - -static inline void sampleInternally(BiPartition& partition, float32 sampleSize, BitWeightVector& weightVector, - RNG& rng) { - uint32 numTrainingExamples = partition.getNumFirst(); - uint32 numSamples = (uint32) (sampleSize * numTrainingExamples); - sampleWeightsWithoutReplacement(weightVector, partition.first_cbegin(), - numTrainingExamples, numSamples, rng); -} - -/** - * Allows to select a subset of the available training examples without replacement. - * - * @tparam Partition The type of the object that provides access to the indices of the examples that are included in the - * training set - */ -template -class InstanceSamplingWithoutReplacement final : public IInstanceSampling { - private: - - Partition& partition_; - - const float32 sampleSize_; - - BitWeightVector weightVector_; - - public: - - /** - * @param partition A reference to an object of template type `Partition` that provides access to the indices - * of the examples that are included in the training set - * @param sampleSize The fraction of examples to be included in the sample (e.g. a value of 0.6 corresponds to - * 60 % of the available examples). Must be in (0, 1) - */ - InstanceSamplingWithoutReplacement(Partition& partition, float32 sampleSize) - : partition_(partition), sampleSize_(sampleSize), - weightVector_(BitWeightVector(partition.getNumElements())) {} - - const IWeightVector& sample(RNG& rng) override { - sampleInternally(partition_, sampleSize_, weightVector_, rng); - return weightVector_; - } -}; - -/** - * Allows to create instances of the type `IInstanceSampling` that allow to select a subset of the available training - * examples without replacement. - */ -class InstanceSamplingWithoutReplacementFactory final : public IInstanceSamplingFactory { - private: - - const float32 sampleSize_; - - public: - - /** - * @param sampleSize The fraction of examples to be included in the sample (e.g. a value of 0.6 corresponds to - * 60 % of the available examples). Must be in (0, 1) - */ - InstanceSamplingWithoutReplacementFactory(float32 sampleSize) : sampleSize_(sampleSize) {} - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, - const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition, sampleSize_); - } - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition, sampleSize_); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, const SinglePartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition, sampleSize_); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix, BiPartition& partition, - IStatistics& statistics) const override { - return std::make_unique>(partition, sampleSize_); - } -}; - -InstanceSamplingWithoutReplacementConfig::InstanceSamplingWithoutReplacementConfig() : sampleSize_(0.66f) {} - -float32 InstanceSamplingWithoutReplacementConfig::getSampleSize() const { - return sampleSize_; -} - -IInstanceSamplingWithoutReplacementConfig& InstanceSamplingWithoutReplacementConfig::setSampleSize(float32 sampleSize) { - assertGreater("sampleSize", sampleSize, 0); - assertLess("sampleSize", sampleSize, 1); - sampleSize_ = sampleSize; - return *this; -} - -std::unique_ptr InstanceSamplingWithoutReplacementConfig::createInstanceSamplingFactory() - const { - return std::make_unique(sampleSize_); -} diff --git a/cpp/subprojects/common/src/common/sampling/label_sampling_no.cpp b/cpp/subprojects/common/src/common/sampling/label_sampling_no.cpp deleted file mode 100644 index 359150ea..00000000 --- a/cpp/subprojects/common/src/common/sampling/label_sampling_no.cpp +++ /dev/null @@ -1,48 +0,0 @@ -#include "common/sampling/label_sampling_no.hpp" - -#include "common/indices/index_vector_complete.hpp" - -/** - * An implementation of the class `ILabelSampling` that does not perform any sampling, but includes all labels. - */ -class NoLabelSampling final : public ILabelSampling { - private: - - const CompleteIndexVector indexVector_; - - public: - - /** - * @param numLabels The total number of available labels - */ - NoLabelSampling(uint32 numLabels) : indexVector_(numLabels) {} - - const IIndexVector& sample(RNG& rng) override { - return indexVector_; - } -}; - -/** - * Allows to create objects of the class `ILabelSampling` that do not perform any sampling, but include all labels. - */ -class NoLabelSamplingFactory final : public ILabelSamplingFactory { - private: - - const uint32 numLabels_; - - public: - - /** - * @param numLabels The total number of available labels - */ - NoLabelSamplingFactory(uint32 numLabels) : numLabels_(numLabels) {} - - std::unique_ptr create() const override { - return std::make_unique(numLabels_); - } -}; - -std::unique_ptr NoLabelSamplingConfig::createLabelSamplingFactory( - const ILabelMatrix& labelMatrix) const { - return std::make_unique(labelMatrix.getNumCols()); -} diff --git a/cpp/subprojects/common/src/common/sampling/label_sampling_round_robin.cpp b/cpp/subprojects/common/src/common/sampling/label_sampling_round_robin.cpp deleted file mode 100644 index b656722e..00000000 --- a/cpp/subprojects/common/src/common/sampling/label_sampling_round_robin.cpp +++ /dev/null @@ -1,60 +0,0 @@ -#include "common/sampling/label_sampling_round_robin.hpp" - -#include "common/indices/index_vector_partial.hpp" - -/** - * Allows to select a sinle label in a round-robin fashion. - */ -class RoundRobinLabelSampling final : public ILabelSampling { - private: - - const uint32 numLabels_; - - PartialIndexVector indexVector_; - - uint32 nextIndex_; - - public: - - /** - * @param numLabels The total number of available labels - */ - RoundRobinLabelSampling(uint32 numLabels) - : numLabels_(numLabels), indexVector_(PartialIndexVector(1)), nextIndex_(0) {} - - const IIndexVector& sample(RNG& rng) override { - indexVector_.begin()[0] = nextIndex_; - nextIndex_++; - - if (nextIndex_ >= numLabels_) { - nextIndex_ = 0; - } - - return indexVector_; - } -}; - -/** - * Allows to create objects of type `ILabelSampling` that select a single label in a round-robin fashion. - */ -class RoundRobinLabelSamplingFactory final : public ILabelSamplingFactory { - private: - - const uint32 numLabels_; - - public: - - /** - * @param numLabels The total number of available labels - */ - RoundRobinLabelSamplingFactory(uint32 numLabels) : numLabels_(numLabels) {} - - std::unique_ptr create() const override { - return std::make_unique(numLabels_); - } -}; - -std::unique_ptr RoundRobinLabelSamplingConfig::createLabelSamplingFactory( - const ILabelMatrix& labelMatrix) const { - return std::make_unique(labelMatrix.getNumCols()); -} diff --git a/cpp/subprojects/common/src/common/sampling/label_sampling_without_replacement.cpp b/cpp/subprojects/common/src/common/sampling/label_sampling_without_replacement.cpp deleted file mode 100644 index 69826c1e..00000000 --- a/cpp/subprojects/common/src/common/sampling/label_sampling_without_replacement.cpp +++ /dev/null @@ -1,74 +0,0 @@ -#include "common/sampling/label_sampling_without_replacement.hpp" - -#include "common/indices/index_vector_partial.hpp" -#include "common/iterator/index_iterator.hpp" -#include "common/util/validation.hpp" -#include "index_sampling.hpp" - -/** - * Allows to select a subset of the available labels without replacement. - */ -class LabelSamplingWithoutReplacement final : public ILabelSampling { - private: - - const uint32 numLabels_; - - PartialIndexVector indexVector_; - - public: - - /** - * @param numLabels The total number of available labels - * @param numSamples The number of labels to be included in the sample - */ - LabelSamplingWithoutReplacement(uint32 numLabels, uint32 numSamples) - : numLabels_(numLabels), indexVector_(PartialIndexVector(numSamples)) {} - - const IIndexVector& sample(RNG& rng) override { - sampleIndicesWithoutReplacement(indexVector_.begin(), indexVector_.getNumElements(), - IndexIterator(numLabels_), numLabels_, rng); - return indexVector_; - } -}; - -/** - * Allows to create objects of type `ILabelSampling` that select a random subset of the available labels without - * replacement. - */ -class LabelSamplingWithoutReplacementFactory final : public ILabelSamplingFactory { - private: - - const uint32 numLabels_; - - const uint32 numSamples_; - - public: - - /** - * @param numLabels The total number of available labels - * @param numSamples The number of labels to be included in the sample. Must be at least 1 - */ - LabelSamplingWithoutReplacementFactory(uint32 numLabels, uint32 numSamples) - : numLabels_(numLabels), numSamples_(numSamples > numLabels ? numLabels : numSamples) {} - - std::unique_ptr create() const override { - return std::make_unique(numLabels_, numSamples_); - } -}; - -LabelSamplingWithoutReplacementConfig::LabelSamplingWithoutReplacementConfig() : numSamples_(1) {} - -uint32 LabelSamplingWithoutReplacementConfig::getNumSamples() const { - return numSamples_; -} - -ILabelSamplingWithoutReplacementConfig& LabelSamplingWithoutReplacementConfig::setNumSamples(uint32 numSamples) { - assertGreaterOrEqual("numSamples", numSamples, 1); - numSamples_ = numSamples; - return *this; -} - -std::unique_ptr LabelSamplingWithoutReplacementConfig::createLabelSamplingFactory( - const ILabelMatrix& labelMatrix) const { - return std::make_unique(labelMatrix.getNumCols(), numSamples_); -} diff --git a/cpp/subprojects/common/src/common/sampling/partition_bi.cpp b/cpp/subprojects/common/src/common/sampling/partition_bi.cpp deleted file mode 100644 index ea7b26b9..00000000 --- a/cpp/subprojects/common/src/common/sampling/partition_bi.cpp +++ /dev/null @@ -1,103 +0,0 @@ -#include "common/sampling/partition_bi.hpp" - -#include "common/prediction/probability_calibration_joint.hpp" -#include "common/rule_refinement/prediction.hpp" -#include "common/sampling/instance_sampling.hpp" -#include "common/stopping/stopping_criterion.hpp" -#include "common/thresholds/thresholds_subset.hpp" - -#include - -BiPartition::BiPartition(uint32 numFirst, uint32 numSecond) - : vector_(DenseVector(numFirst + numSecond)), numFirst_(numFirst), firstSorted_(false), - secondSorted_(false) {} - -BiPartition::iterator BiPartition::first_begin() { - return vector_.begin(); -} - -BiPartition::iterator BiPartition::first_end() { - return &vector_.begin()[numFirst_]; -} - -BiPartition::const_iterator BiPartition::first_cbegin() const { - return vector_.cbegin(); -} - -BiPartition::const_iterator BiPartition::first_cend() const { - return &vector_.cbegin()[numFirst_]; -} - -BiPartition::iterator BiPartition::second_begin() { - return &vector_.begin()[numFirst_]; -} - -BiPartition::iterator BiPartition::second_end() { - return vector_.end(); -} - -BiPartition::const_iterator BiPartition::second_cbegin() const { - return &vector_.cbegin()[numFirst_]; -} - -BiPartition::const_iterator BiPartition::second_cend() const { - return vector_.cend(); -} - -uint32 BiPartition::getNumFirst() const { - return numFirst_; -} - -uint32 BiPartition::getNumSecond() const { - return vector_.getNumElements() - numFirst_; -} - -uint32 BiPartition::getNumElements() const { - return vector_.getNumElements(); -} - -void BiPartition::sortFirst() { - if (!firstSorted_) { - std::sort(this->first_begin(), this->first_end(), std::less()); - firstSorted_ = true; - } -} - -void BiPartition::sortSecond() { - if (!secondSorted_) { - std::sort(this->second_begin(), this->second_end(), std::less()); - secondSorted_ = true; - } -} - -std::unique_ptr BiPartition::createStoppingCriterion(const IStoppingCriterionFactory& factory) { - return factory.create(*this); -} - -std::unique_ptr BiPartition::createInstanceSampling(const IInstanceSamplingFactory& factory, - const IRowWiseLabelMatrix& labelMatrix, - IStatistics& statistics) { - return labelMatrix.createInstanceSampling(factory, *this, statistics); -} - -Quality BiPartition::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, - const AbstractPrediction& head) { - return coverageState.evaluateOutOfSample(thresholdsSubset, *this, head); -} - -void BiPartition::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const ICoverageState& coverageState, - AbstractPrediction& head) { - coverageState.recalculatePrediction(thresholdsSubset, *this, head); -} - -std::unique_ptr BiPartition::fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) { - return labelMatrix.fitMarginalProbabilityCalibrationModel(probabilityCalibrator, *this, statistics); -} - -std::unique_ptr BiPartition::fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) { - return labelMatrix.fitJointProbabilityCalibrationModel(probabilityCalibrator, *this, statistics); -} diff --git a/cpp/subprojects/common/src/common/sampling/partition_sampling_bi_random.cpp b/cpp/subprojects/common/src/common/sampling/partition_sampling_bi_random.cpp deleted file mode 100644 index b1858488..00000000 --- a/cpp/subprojects/common/src/common/sampling/partition_sampling_bi_random.cpp +++ /dev/null @@ -1,90 +0,0 @@ -#include "common/sampling/partition_sampling_bi_random.hpp" - -#include "common/sampling/partition_bi.hpp" -#include "common/util/validation.hpp" -#include "index_sampling.hpp" - -/** - * Allows to randomly split the training examples into two mutually exclusive sets that may be used as a training set - * and a holdout set. - */ -class RandomBiPartitionSampling final : public IPartitionSampling { - private: - - BiPartition partition_; - - public: - - /** - * @param numTraining The number of examples to be included in the training set - * @param numHoldout The number of examples to be included in the holdout set - */ - RandomBiPartitionSampling(uint32 numTraining, uint32 numHoldout) - : partition_(BiPartition(numTraining, numHoldout)) {} - - IPartition& partition(RNG& rng) override { - uint32 numTraining = partition_.getNumFirst(); - uint32 numHoldout = partition_.getNumSecond(); - BiPartition::iterator trainingIterator = partition_.first_begin(); - setArrayToIncreasingValues(trainingIterator, numTraining, 0, 1); - BiPartition::iterator holdoutIterator = partition_.second_begin(); - - for (uint32 i = 0; i < numHoldout; i++) { - holdoutIterator[i] = numTraining + i; - } - - uint32 numTotal = partition_.getNumElements(); - randomPermutation(trainingIterator, holdoutIterator, - numTraining, numTotal, numTraining, rng); - return partition_; - } -}; - -/** - * Allows to create objects of the type `IPartitionSampling` that randomly split the training examples into two mutually - * exclusive sets that may be used as a training set and a holdout set. - */ -class RandomBiPartitionSamplingFactory final : public IPartitionSamplingFactory { - private: - - const float32 holdoutSetSize_; - - public: - - /** - * @param holdoutSetSize The fraction of examples to be included in the holdout set (e.g. a value of 0.6 - * corresponds to 60 % of the available examples). Must be in (0, 1) - */ - RandomBiPartitionSamplingFactory(float32 holdoutSetSize) : holdoutSetSize_(holdoutSetSize) {} - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix) const override { - uint32 numExamples = labelMatrix.getNumRows(); - uint32 numHoldout = (uint32) (holdoutSetSize_ * numExamples); - uint32 numTraining = numExamples - numHoldout; - return std::make_unique(numTraining, numHoldout); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix) const override { - uint32 numExamples = labelMatrix.getNumRows(); - uint32 numHoldout = (uint32) (holdoutSetSize_ * numExamples); - uint32 numTraining = numExamples - numHoldout; - return std::make_unique(numTraining, numHoldout); - } -}; - -RandomBiPartitionSamplingConfig::RandomBiPartitionSamplingConfig() : holdoutSetSize_(0.33f) {} - -float32 RandomBiPartitionSamplingConfig::getHoldoutSetSize() const { - return holdoutSetSize_; -} - -IRandomBiPartitionSamplingConfig& RandomBiPartitionSamplingConfig::setHoldoutSetSize(float32 holdoutSetSize) { - assertGreater("holdoutSetSize", holdoutSetSize, 0); - assertLess("holdoutSetSize", holdoutSetSize, 1); - holdoutSetSize_ = holdoutSetSize; - return *this; -} - -std::unique_ptr RandomBiPartitionSamplingConfig::createPartitionSamplingFactory() const { - return std::make_unique(holdoutSetSize_); -} diff --git a/cpp/subprojects/common/src/common/sampling/partition_sampling_bi_stratified_example_wise.cpp b/cpp/subprojects/common/src/common/sampling/partition_sampling_bi_stratified_example_wise.cpp deleted file mode 100644 index ae6365a0..00000000 --- a/cpp/subprojects/common/src/common/sampling/partition_sampling_bi_stratified_example_wise.cpp +++ /dev/null @@ -1,94 +0,0 @@ -#include "common/sampling/partition_sampling_bi_stratified_example_wise.hpp" - -#include "common/iterator/index_iterator.hpp" -#include "common/sampling/stratified_sampling_example_wise.hpp" -#include "common/util/validation.hpp" - -/** - * Allows to use stratified sampling, where distinct label vectors are treated as individual classes, to split the - * training examples into two mutually exclusive sets that may be used as a training set and a holdout set. - * - * @tparam LabelMatrix The type of the label matrix that provides random or row-wise access to the labels of the - * training examples - */ -template -class ExampleWiseStratifiedBiPartitionSampling final : public IPartitionSampling { - private: - - BiPartition partition_; - - const ExampleWiseStratification stratification_; - - public: - - /** - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides random or row-wise - * access to the labels of the training examples - * @param numTraining The number of examples to be included in the training set - * @param numHoldout The number of examples to be included in the holdout set - */ - ExampleWiseStratifiedBiPartitionSampling(const LabelMatrix& labelMatrix, uint32 numTraining, uint32 numHoldout) - : partition_(BiPartition(numTraining, numHoldout)), - stratification_(ExampleWiseStratification( - labelMatrix, IndexIterator(), IndexIterator(labelMatrix.getNumRows()))) {} - - IPartition& partition(RNG& rng) override { - stratification_.sampleBiPartition(partition_, rng); - return partition_; - } -}; - -/** - * Allows to create objects of the type `IPartitionSampling` that use stratified sampling, where distinct label vectors - * are treated as individual classes, to split the training examples into two mutually exclusive sets that may be used - * as a training set and a holdout set. - */ -class ExampleWiseStratifiedBiPartitionSamplingFactory final : public IPartitionSamplingFactory { - private: - - const float32 holdoutSetSize_; - - public: - - /** - * @param holdoutSetSize The fraction of examples to be included in the holdout set (e.g. a value of 0.6 - * corresponds to 60 % of the available examples). Must be in (0, 1) - */ - ExampleWiseStratifiedBiPartitionSamplingFactory(float32 holdoutSetSize) : holdoutSetSize_(holdoutSetSize) {} - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix) const override { - uint32 numExamples = labelMatrix.getNumRows(); - uint32 numHoldout = (uint32) (holdoutSetSize_ * numExamples); - uint32 numTraining = numExamples - numHoldout; - return std::make_unique>( - labelMatrix, numTraining, numHoldout); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix) const override { - uint32 numExamples = labelMatrix.getNumRows(); - uint32 numHoldout = (uint32) (holdoutSetSize_ * numExamples); - uint32 numTraining = numExamples - numHoldout; - return std::make_unique>(labelMatrix, numTraining, - numHoldout); - } -}; - -ExampleWiseStratifiedBiPartitionSamplingConfig::ExampleWiseStratifiedBiPartitionSamplingConfig() - : holdoutSetSize_(0.33f) {} - -float32 ExampleWiseStratifiedBiPartitionSamplingConfig::getHoldoutSetSize() const { - return holdoutSetSize_; -} - -IExampleWiseStratifiedBiPartitionSamplingConfig& ExampleWiseStratifiedBiPartitionSamplingConfig::setHoldoutSetSize( - float32 holdoutSetSize) { - assertGreater("holdoutSetSize", holdoutSetSize, 0); - assertLess("holdoutSetSize", holdoutSetSize, 1); - holdoutSetSize_ = holdoutSetSize; - return *this; -} - -std::unique_ptr - ExampleWiseStratifiedBiPartitionSamplingConfig::createPartitionSamplingFactory() const { - return std::make_unique(holdoutSetSize_); -} diff --git a/cpp/subprojects/common/src/common/sampling/partition_sampling_bi_stratified_label_wise.cpp b/cpp/subprojects/common/src/common/sampling/partition_sampling_bi_stratified_label_wise.cpp deleted file mode 100644 index 3aff871d..00000000 --- a/cpp/subprojects/common/src/common/sampling/partition_sampling_bi_stratified_label_wise.cpp +++ /dev/null @@ -1,94 +0,0 @@ -#include "common/sampling/partition_sampling_bi_stratified_label_wise.hpp" - -#include "common/iterator/index_iterator.hpp" -#include "common/sampling/stratified_sampling_label_wise.hpp" -#include "common/util/validation.hpp" - -/** - * Allows to use stratified sampling to split the training examples into two mutually exclusive sets that may be used as - * a training set and a holdout set, such that for each label the proportion of relevant and irrelevant examples is - * maintained. - * - * @tparam LabelMatrix The type of the label matrix that provides random or row-wise access to the labels of the - * training examples - */ -template -class LabelWiseStratifiedBiPartitionSampling final : public IPartitionSampling { - private: - - BiPartition partition_; - - const LabelWiseStratification stratification_; - - public: - - /** - * @param labelMatrix A reference to an object of template type `LabelMatrix` that provides random or row-wise - * access to the labels of the training examples - * @param numTraining The number of examples to be included in the training set - * @param numHoldout The number of examples to be included in the holdout set - */ - LabelWiseStratifiedBiPartitionSampling(const LabelMatrix& labelMatrix, uint32 numTraining, uint32 numHoldout) - : partition_(BiPartition(numTraining, numHoldout)), - stratification_(LabelWiseStratification( - labelMatrix, IndexIterator(), IndexIterator(labelMatrix.getNumRows()))) {} - - IPartition& partition(RNG& rng) override { - stratification_.sampleBiPartition(partition_, rng); - return partition_; - } -}; - -/** - * Allows to create objects of the type `IPartitionSampling` that use stratified sampling to split the training examples - * into two mutually exclusive sets that may be used as a training set and a holdout set, such that for each label the - * proportion of relevant and irrelevant examples is maintained. - */ -class LabelWiseStratifiedBiPartitionSamplingFactory final : public IPartitionSamplingFactory { - private: - - const float32 holdoutSetSize_; - - public: - - /** - * @param holdoutSetSize The fraction of examples to be included in the holdout set (e.g. a value of 0.6 - * corresponds to 60 % of the available examples). Must be in (0, 1) - */ - LabelWiseStratifiedBiPartitionSamplingFactory(float32 holdoutSetSize) : holdoutSetSize_(holdoutSetSize) {} - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix) const override { - uint32 numExamples = labelMatrix.getNumRows(); - uint32 numHoldout = (uint32) (holdoutSetSize_ * numExamples); - uint32 numTraining = numExamples - numHoldout; - return std::make_unique>( - labelMatrix, numTraining, numHoldout); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix) const override { - uint32 numExamples = labelMatrix.getNumRows(); - uint32 numHoldout = (uint32) (holdoutSetSize_ * numExamples); - uint32 numTraining = numExamples - numHoldout; - return std::make_unique>(labelMatrix, numTraining, - numHoldout); - } -}; - -LabelWiseStratifiedBiPartitionSamplingConfig::LabelWiseStratifiedBiPartitionSamplingConfig() : holdoutSetSize_(0.33f) {} - -float32 LabelWiseStratifiedBiPartitionSamplingConfig::getHoldoutSetSize() const { - return holdoutSetSize_; -} - -ILabelWiseStratifiedBiPartitionSamplingConfig& LabelWiseStratifiedBiPartitionSamplingConfig::setHoldoutSetSize( - float32 holdoutSetSize) { - assertGreater("holdoutSetSize", holdoutSetSize, 0); - assertLess("holdoutSetSize", holdoutSetSize, 1); - holdoutSetSize_ = holdoutSetSize; - return *this; -} - -std::unique_ptr - LabelWiseStratifiedBiPartitionSamplingConfig::createPartitionSamplingFactory() const { - return std::make_unique(holdoutSetSize_); -} diff --git a/cpp/subprojects/common/src/common/sampling/partition_sampling_no.cpp b/cpp/subprojects/common/src/common/sampling/partition_sampling_no.cpp deleted file mode 100644 index 3521af34..00000000 --- a/cpp/subprojects/common/src/common/sampling/partition_sampling_no.cpp +++ /dev/null @@ -1,44 +0,0 @@ -#include "common/sampling/partition_sampling_no.hpp" - -#include "common/sampling/partition_single.hpp" - -/** - * An implementation of the class `IPartitionSampling` that does not split the training examples, but includes all of - * them in the training set. - */ -class NoPartitionSampling final : public IPartitionSampling { - private: - - SinglePartition partition_; - - public: - - /** - * @param numExamples The total number of available training examples - */ - NoPartitionSampling(uint32 numExamples) : partition_(SinglePartition(numExamples)) {} - - IPartition& partition(RNG& rng) override { - return partition_; - } -}; - -/** - * Allows to create objects of the type `IPartitionSampling` that do not split the training examples, but include all of - * them in the training set. - */ -class NoPartitionSamplingFactory final : public IPartitionSamplingFactory { - public: - - std::unique_ptr create(const CContiguousLabelMatrix& labelMatrix) const override { - return std::make_unique(labelMatrix.getNumRows()); - } - - std::unique_ptr create(const CsrLabelMatrix& labelMatrix) const override { - return std::make_unique(labelMatrix.getNumRows()); - } -}; - -std::unique_ptr NoPartitionSamplingConfig::createPartitionSamplingFactory() const { - return std::make_unique(); -} diff --git a/cpp/subprojects/common/src/common/sampling/partition_single.cpp b/cpp/subprojects/common/src/common/sampling/partition_single.cpp deleted file mode 100644 index bd089414..00000000 --- a/cpp/subprojects/common/src/common/sampling/partition_single.cpp +++ /dev/null @@ -1,53 +0,0 @@ -#include "common/sampling/partition_single.hpp" - -#include "common/prediction/probability_calibration_joint.hpp" -#include "common/rule_refinement/prediction.hpp" -#include "common/sampling/instance_sampling.hpp" -#include "common/stopping/stopping_criterion.hpp" -#include "common/thresholds/thresholds_subset.hpp" - -SinglePartition::SinglePartition(uint32 numElements) : numElements_(numElements) {} - -SinglePartition::const_iterator SinglePartition::cbegin() const { - return IndexIterator(); -} - -SinglePartition::const_iterator SinglePartition::cend() const { - return IndexIterator(numElements_); -} - -uint32 SinglePartition::getNumElements() const { - return numElements_; -} - -std::unique_ptr SinglePartition::createStoppingCriterion(const IStoppingCriterionFactory& factory) { - return factory.create(*this); -} - -std::unique_ptr SinglePartition::createInstanceSampling(const IInstanceSamplingFactory& factory, - const IRowWiseLabelMatrix& labelMatrix, - IStatistics& statistics) { - return labelMatrix.createInstanceSampling(factory, *this, statistics); -} - -Quality SinglePartition::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, - const ICoverageState& coverageState, const AbstractPrediction& head) { - return coverageState.evaluateOutOfSample(thresholdsSubset, *this, head); -} - -void SinglePartition::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, - const ICoverageState& coverageState, AbstractPrediction& head) { - coverageState.recalculatePrediction(thresholdsSubset, *this, head); -} - -std::unique_ptr SinglePartition::fitMarginalProbabilityCalibrationModel( - const IMarginalProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) { - return labelMatrix.fitMarginalProbabilityCalibrationModel(probabilityCalibrator, *this, statistics); -} - -std::unique_ptr SinglePartition::fitJointProbabilityCalibrationModel( - const IJointProbabilityCalibrator& probabilityCalibrator, const IRowWiseLabelMatrix& labelMatrix, - const IStatistics& statistics) { - return labelMatrix.fitJointProbabilityCalibrationModel(probabilityCalibrator, *this, statistics); -} diff --git a/cpp/subprojects/common/src/common/sampling/random.cpp b/cpp/subprojects/common/src/common/sampling/random.cpp deleted file mode 100644 index 9262c9f4..00000000 --- a/cpp/subprojects/common/src/common/sampling/random.cpp +++ /dev/null @@ -1,20 +0,0 @@ -#include "common/sampling/random.hpp" - -const uint32 MAX_RANDOM = 0x7FFFFFFF; - -RNG::RNG(uint32 randomState) : randomState_(randomState) {} - -uint32 RNG::random(uint32 min, uint32 max) { - uint32* randomState = &randomState_; - - if (randomState[0] == 0) { - randomState[0] = 1; - } - - randomState[0] ^= (uint32) (randomState[0] << 13); - randomState[0] ^= (uint32) (randomState[0] >> 17); - randomState[0] ^= (uint32) (randomState[0] << 5); - - uint32 randomNumber = randomState[0] % (MAX_RANDOM + 1); - return min + (randomNumber % (max - min)); -} diff --git a/cpp/subprojects/common/src/common/sampling/stratified_sampling_common.hpp b/cpp/subprojects/common/src/common/sampling/stratified_sampling_common.hpp deleted file mode 100644 index 2c9c5cd7..00000000 --- a/cpp/subprojects/common/src/common/sampling/stratified_sampling_common.hpp +++ /dev/null @@ -1,16 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/sampling/random.hpp" - -static inline bool tiebreak(uint32 numDesiredSamples, uint32 numDesiredOutOfSamples, RNG& rng) { - if (numDesiredSamples > numDesiredOutOfSamples) { - return true; - } else if (numDesiredSamples < numDesiredOutOfSamples) { - return false; - } else { - return rng.random(0, 2) != 0; - } -} diff --git a/cpp/subprojects/common/src/common/sampling/stratified_sampling_example_wise.cpp b/cpp/subprojects/common/src/common/sampling/stratified_sampling_example_wise.cpp deleted file mode 100644 index c876dcb5..00000000 --- a/cpp/subprojects/common/src/common/sampling/stratified_sampling_example_wise.cpp +++ /dev/null @@ -1,126 +0,0 @@ -#include "common/sampling/stratified_sampling_example_wise.hpp" - -#include "common/sampling/partition_single.hpp" -#include "stratified_sampling_common.hpp" - -#include - -template -ExampleWiseStratification::ExampleWiseStratification(const LabelMatrix& labelMatrix, - IndexIterator indicesBegin, - IndexIterator indicesEnd) - : numTotal_(indicesEnd - indicesBegin) { - // Create a map that stores the indices of the examples that are associated with each unique label vector... - for (uint32 i = 0; i < numTotal_; i++) { - uint32 exampleIndex = indicesBegin[i]; - std::vector& exampleIndices = map_[labelMatrix.createView(exampleIndex)]; - exampleIndices.push_back(exampleIndex); - } - - // Sort the label vectors by their frequency... - order_.reserve(map_.size()); - - for (auto it = map_.begin(); it != map_.end(); it++) { - auto& entry = *it; - std::vector& exampleIndices = entry.second; - order_.push_back(exampleIndices); - } - - std::sort(order_.begin(), order_.end(), [=](const std::vector& a, const std::vector& b) { - return a.size() < b.size(); - }); -} - -template -void ExampleWiseStratification::sampleWeights(BitWeightVector& weightVector, - float32 sampleSize, RNG& rng) const { - uint32 numTotalSamples = (uint32) std::round(sampleSize * numTotal_); - uint32 numTotalOutOfSamples = numTotal_ - numTotalSamples; - uint32 numNonZeroWeights = 0; - uint32 numZeroWeights = 0; - - for (auto it = order_.begin(); it != order_.end(); it++) { - std::vector& exampleIndices = *it; - std::vector::iterator indexIterator = exampleIndices.begin(); - uint32 numExamples = exampleIndices.size(); - float32 numSamplesDecimal = sampleSize * numExamples; - uint32 numDesiredSamples = numTotalSamples - numNonZeroWeights; - uint32 numDesiredOutOfSamples = numTotalOutOfSamples - numZeroWeights; - uint32 numSamples = - (uint32) (tiebreak(numDesiredSamples, numDesiredOutOfSamples, rng) ? std::ceil(numSamplesDecimal) - : std::floor(numSamplesDecimal)); - numNonZeroWeights += numSamples; - numZeroWeights += (numExamples - numSamples); - - // Use the Fisher-Yates shuffle to randomly draw `numSamples` examples and set their weight to 1... - uint32 i; - - for (i = 0; i < numSamples; i++) { - uint32 randomIndex = rng.random(i, numExamples); - uint32 exampleIndex = indexIterator[randomIndex]; - indexIterator[randomIndex] = indexIterator[i]; - indexIterator[i] = exampleIndex; - weightVector.set(exampleIndex, true); - } - - // Set the weights of the remaining examples to 0... - for (; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - weightVector.set(exampleIndex, false); - } - } - - weightVector.setNumNonZeroWeights(numNonZeroWeights); -} - -template -void ExampleWiseStratification::sampleBiPartition(BiPartition& partition, RNG& rng) const { - BiPartition::iterator firstIterator = partition.first_begin(); - BiPartition::iterator secondIterator = partition.second_begin(); - uint32 numFirst = partition.getNumFirst(); - uint32 numSecond = partition.getNumSecond(); - - for (auto it = order_.begin(); it != order_.end(); it++) { - std::vector& exampleIndices = *it; - std::vector::iterator indexIterator = exampleIndices.begin(); - uint32 numExamples = exampleIndices.size(); - float32 sampleSize = (float32) numFirst / (float32) (numFirst + numSecond); - float32 numSamplesDecimal = sampleSize * numExamples; - uint32 numSamples = - (uint32) (tiebreak(numFirst, numSecond, rng) ? std::ceil(numSamplesDecimal) : std::floor(numSamplesDecimal)); - - // Ensure that we do not add too many examples to the first or second partition... - if (numSamples > numFirst) { - numSamples = numFirst; - } else if (numExamples - numSamples > numSecond) { - numSamples = numExamples - numSecond; - } - - numFirst -= numSamples; - numSecond -= (numExamples - numSamples); - - // Use the Fisher-Yates shuffle to randomly draw `numSamples` examples and add them to the first set... - uint32 i; - - for (i = 0; i < numSamples; i++) { - uint32 randomIndex = rng.random(i, numExamples); - uint32 exampleIndex = indexIterator[randomIndex]; - indexIterator[randomIndex] = indexIterator[i]; - indexIterator[i] = exampleIndex; - *firstIterator = exampleIndex; - firstIterator++; - } - - // Add the remaining examples to the second set... - for (; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - *secondIterator = exampleIndex; - secondIterator++; - } - } -} - -template class ExampleWiseStratification; -template class ExampleWiseStratification; -template class ExampleWiseStratification; -template class ExampleWiseStratification; diff --git a/cpp/subprojects/common/src/common/sampling/stratified_sampling_label_wise.cpp b/cpp/subprojects/common/src/common/sampling/stratified_sampling_label_wise.cpp deleted file mode 100644 index 8de72a54..00000000 --- a/cpp/subprojects/common/src/common/sampling/stratified_sampling_label_wise.cpp +++ /dev/null @@ -1,293 +0,0 @@ -#include "common/sampling/stratified_sampling_label_wise.hpp" - -#include "common/data/indexed_value.hpp" -#include "common/input/label_matrix_c_contiguous.hpp" -#include "common/input/label_matrix_csc.hpp" -#include "common/input/label_matrix_csr.hpp" -#include "common/sampling/partition_single.hpp" -#include "stratified_sampling_common.hpp" - -#include -#include - -/** - * Allows to compare two objects of type `IndexedValue` according to the following strict weak ordering: If the value of - * the first object is smaller, it goes before the second one. If the values of both objects are equal and the index of - * the first object is smaller, it goes before the second one. Otherwise, the first object goes after the second one. - */ -struct CompareIndexedValue final { - public: - - /** - * Returns whether the a given object of type `IndexedValue` should go before a second one. - * - * @param lhs A reference to a first object of type `IndexedValue` - * @param rhs A reference to a second object of type `IndexedValue` - * @return True, if the first object should go before the second one, false otherwise - */ - inline bool operator()(const IndexedValue& lhs, const IndexedValue& rhs) const { - return lhs.value < rhs.value || (lhs.value == rhs.value && lhs.index < rhs.index); - } -}; - -static inline void updateNumExamplesPerLabel(const CContiguousLabelMatrix& labelMatrix, uint32 exampleIndex, - uint32* numExamplesPerLabel, - std::unordered_map& affectedLabelIndices) { - CContiguousLabelMatrix::value_const_iterator labelIterator = labelMatrix.values_cbegin(exampleIndex); - uint32 numLabels = labelMatrix.getNumCols(); - - for (uint32 i = 0; i < numLabels; i++) { - if (labelIterator[i]) { - uint32 numRemaining = numExamplesPerLabel[i]; - numExamplesPerLabel[i] = numRemaining - 1; - affectedLabelIndices.emplace(i, numRemaining); - } - } -} - -static inline void updateNumExamplesPerLabel(const CsrLabelMatrix& labelMatrix, uint32 exampleIndex, - uint32* numExamplesPerLabel, - std::unordered_map& affectedLabelIndices) { - CsrLabelMatrix::index_const_iterator indexIterator = labelMatrix.indices_cbegin(exampleIndex); - uint32 numLabels = labelMatrix.indices_cend(exampleIndex) - indexIterator; - - for (uint32 i = 0; i < numLabels; i++) { - uint32 labelIndex = indexIterator[i]; - uint32 numRemaining = numExamplesPerLabel[labelIndex]; - numExamplesPerLabel[labelIndex] = numRemaining - 1; - affectedLabelIndices.emplace(labelIndex, numRemaining); - } -} - -template -LabelWiseStratification::LabelWiseStratification(const LabelMatrix& labelMatrix, - IndexIterator indicesBegin, - IndexIterator indicesEnd) - : numRows_(indicesEnd - indicesBegin) { - // Convert the given label matrix into the CSC format... - const CscLabelMatrix cscLabelMatrix(labelMatrix, indicesBegin, indicesEnd); - - // Create an array that stores for each label the number of examples that are associated with the label, as well as - // a sorted map that stores all label indices in increasing order of the number of associated examples... - uint32 numLabels = cscLabelMatrix.getNumCols(); - uint32* numExamplesPerLabel = new uint32[numLabels]; - typedef std::set, CompareIndexedValue> SortedSet; - SortedSet sortedLabelIndices; - - for (uint32 i = 0; i < numLabels; i++) { - uint32 numExamples = cscLabelMatrix.indices_cend(i) - cscLabelMatrix.indices_cbegin(i); - numExamplesPerLabel[i] = numExamples; - - if (numExamples > 0) { - sortedLabelIndices.emplace(i, numExamples); - } - } - - // Allocate arrays for storing the row and column indices of the labels to be processed by the sampling method in - // the CSC format... - rowIndices_ = (uint32*) malloc(cscLabelMatrix.getNumNonZeroElements() * sizeof(uint32)); - colIndices_ = (uint32*) malloc((sortedLabelIndices.size() + 1) * sizeof(uint32)); - uint32 numNonZeroElements = 0; - uint32 numCols = 0; - - // Create a boolean array that stores whether individual examples remain to be processed (1) or not (0)... - uint32 numTotalExamples = labelMatrix.getNumRows(); - BitVector mask(numTotalExamples, true); - - for (uint32 i = 0; i < numRows_; i++) { - uint32 exampleIndex = indicesBegin[i]; - mask.set(exampleIndex, true); - } - - // As long as there are labels that have not been processed yet, proceed with the label that has the smallest number - // of associated examples... - std::unordered_map affectedLabelIndices; - SortedSet::iterator firstEntry; - - while ((firstEntry = sortedLabelIndices.begin()) != sortedLabelIndices.end()) { - const IndexedValue& entry = *firstEntry; - uint32 labelIndex = entry.index; - - // Remove the label from the sorted map... - sortedLabelIndices.erase(firstEntry); - - // Add the number of non-zero labels that have been processed so far to the array of column indices... - colIndices_[numCols] = numNonZeroElements; - numCols++; - - // Iterate the examples that are associated with the current label, if no weight has been set yet... - CscLabelMatrix::index_const_iterator indexIterator = cscLabelMatrix.indices_cbegin(labelIndex); - uint32 numExamples = cscLabelMatrix.indices_cend(labelIndex) - indexIterator; - - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - - // If the example has not been processed yet... - if (mask[exampleIndex]) { - mask.set(exampleIndex, false); - - // Add the example's index to the array of row indices... - rowIndices_[numNonZeroElements] = exampleIndex; - numNonZeroElements++; - - // For each label that is associated with the example, decrement the number of associated examples by - // one... - updateNumExamplesPerLabel(labelMatrix, exampleIndex, &numExamplesPerLabel[0], affectedLabelIndices); - } - } - - // Remove each label, for which the number of associated examples have been changed previously, from the sorted - // map and add it again to update the order... - for (auto it = affectedLabelIndices.cbegin(); it != affectedLabelIndices.cend(); it++) { - uint32 key = it->first; - - if (key != labelIndex) { - uint32 value = it->second; - SortedSet::iterator it2 = sortedLabelIndices.find(IndexedValue(key, value)); - uint32 numRemaining = numExamplesPerLabel[key]; - - if (numRemaining > 0) { - sortedLabelIndices.emplace_hint(it2, key, numRemaining); - } - - sortedLabelIndices.erase(it2); - } - } - - affectedLabelIndices.clear(); - } - - // If there are examples that are not associated with any labels, we handle them separately.. - uint32 numRemaining = numRows_ - numNonZeroElements; - - if (numRemaining > 0) { - // Adjust the size of the arrays that are used to store row and column indices... - rowIndices_ = (uint32*) realloc(rowIndices_, (numNonZeroElements + numRemaining) * sizeof(uint32)); - colIndices_ = (uint32*) realloc(colIndices_, (numCols + 2) * sizeof(uint32)); - - // Add the number of non-zero labels that have been processed so far to the array of column indices... - colIndices_[numCols] = numNonZeroElements; - numCols++; - - // Iterate the weights of all examples to find those whose weight has not been set yet... - for (uint32 i = 0; i < numTotalExamples; i++) { - if (mask[i]) { - // Add the example's index to the array of row indices... - rowIndices_[numNonZeroElements] = i; - numNonZeroElements++; - } - } - } else { - // Adjust the size of the arrays that are used to store row and column indices... - rowIndices_ = (uint32*) realloc(rowIndices_, numNonZeroElements * sizeof(uint32)); - colIndices_ = (uint32*) realloc(colIndices_, (numCols + 1) * sizeof(uint32)); - } - - colIndices_[numCols] = numNonZeroElements; - numCols_ = numCols; - - delete[] numExamplesPerLabel; -} - -template -LabelWiseStratification::~LabelWiseStratification() { - free(rowIndices_); - free(colIndices_); -} - -template -void LabelWiseStratification::sampleWeights(BitWeightVector& weightVector, - float32 sampleSize, RNG& rng) const { - uint32 numTotalSamples = (uint32) std::round(sampleSize * numRows_); - uint32 numTotalOutOfSamples = numRows_ - numTotalSamples; - uint32 numNonZeroWeights = 0; - uint32 numZeroWeights = 0; - - // For each column, assign a weight to the corresponding examples... - for (uint32 i = 0; i < numCols_; i++) { - uint32 start = colIndices_[i]; - uint32* exampleIndices = &rowIndices_[start]; - uint32 end = colIndices_[i + 1]; - uint32 numExamples = end - start; - float32 numSamplesDecimal = sampleSize * numExamples; - uint32 numDesiredSamples = numTotalSamples - numNonZeroWeights; - uint32 numDesiredOutOfSamples = numTotalOutOfSamples - numZeroWeights; - uint32 numSamples = - (uint32) (tiebreak(numDesiredSamples, numDesiredOutOfSamples, rng) ? std::ceil(numSamplesDecimal) - : std::floor(numSamplesDecimal)); - numNonZeroWeights += numSamples; - numZeroWeights += (numExamples - numSamples); - uint32 j; - - // Use the Fisher-Yates shuffle to randomly draw `numSamples` examples and set their weights to 1... - for (j = 0; j < numSamples; j++) { - uint32 randomIndex = rng.random(j, numExamples); - uint32 exampleIndex = exampleIndices[randomIndex]; - exampleIndices[randomIndex] = exampleIndices[j]; - exampleIndices[j] = exampleIndex; - weightVector.set(exampleIndex, true); - } - - // Set the weights of the remaining examples to 0... - for (; j < numExamples; j++) { - uint32 exampleIndex = exampleIndices[j]; - weightVector.set(exampleIndex, false); - } - } - - weightVector.setNumNonZeroWeights(numNonZeroWeights); -} - -template -void LabelWiseStratification::sampleBiPartition(BiPartition& partition, RNG& rng) const { - BiPartition::iterator firstIterator = partition.first_begin(); - BiPartition::iterator secondIterator = partition.second_begin(); - uint32 numFirst = partition.getNumFirst(); - uint32 numSecond = partition.getNumSecond(); - - for (uint32 i = 0; i < numCols_; i++) { - uint32 start = colIndices_[i]; - uint32* exampleIndices = &rowIndices_[start]; - uint32 end = colIndices_[i + 1]; - uint32 numExamples = end - start; - - float32 sampleSize = (float32) numFirst / (float32) (numFirst + numSecond); - float32 numSamplesDecimal = sampleSize * numExamples; - uint32 numSamples = - (uint32) (tiebreak(numFirst, numSecond, rng) ? std::ceil(numSamplesDecimal) : std::floor(numSamplesDecimal)); - - // Ensure that we do not add too many examples to the first or second partition... - if (numSamples > numFirst) { - numSamples = numFirst; - } else if (numExamples - numSamples > numSecond) { - numSamples = numExamples - numSecond; - } - - numFirst -= numSamples; - numSecond -= (numExamples - numSamples); - - // Use the Fisher-Yates shuffle to randomly draw `numSamples` examples and add them to the first set... - uint32 j; - - for (j = 0; j < numSamples; j++) { - uint32 randomIndex = rng.random(j, numExamples); - uint32 exampleIndex = exampleIndices[randomIndex]; - exampleIndices[randomIndex] = exampleIndices[j]; - exampleIndices[j] = exampleIndex; - *firstIterator = exampleIndex; - firstIterator++; - } - - // Add the remaining examples to the second set... - for (; j < numExamples; j++) { - uint32 exampleIndex = exampleIndices[j]; - *secondIterator = exampleIndex; - secondIterator++; - } - } -} - -template class LabelWiseStratification; -template class LabelWiseStratification; -template class LabelWiseStratification; -template class LabelWiseStratification; diff --git a/cpp/subprojects/common/src/common/sampling/weight_vector_bit.cpp b/cpp/subprojects/common/src/common/sampling/weight_vector_bit.cpp deleted file mode 100644 index bd3fe70d..00000000 --- a/cpp/subprojects/common/src/common/sampling/weight_vector_bit.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "common/sampling/weight_vector_bit.hpp" - -#include "common/thresholds/thresholds.hpp" -#include "common/thresholds/thresholds_subset.hpp" - -BitWeightVector::BitWeightVector(uint32 numElements) : BitWeightVector(numElements, false) {} - -BitWeightVector::BitWeightVector(uint32 numElements, bool init) - : vector_(BitVector(numElements, init)), numNonZeroWeights_(0) {} - -uint32 BitWeightVector::getNumElements() const { - return vector_.getNumElements(); -} - -uint32 BitWeightVector::getNumNonZeroWeights() const { - return numNonZeroWeights_; -} - -void BitWeightVector::setNumNonZeroWeights(uint32 numNonZeroWeights) { - numNonZeroWeights_ = numNonZeroWeights; -} - -bool BitWeightVector::hasZeroWeights() const { - return numNonZeroWeights_ < vector_.getNumElements(); -} - -bool BitWeightVector::operator[](uint32 pos) const { - return vector_[pos]; -} - -void BitWeightVector::set(uint32 pos, bool weight) { - vector_.set(pos, weight); -} - -void BitWeightVector::clear() { - vector_.clear(); -} - -std::unique_ptr BitWeightVector::createThresholdsSubset(IThresholds& thresholds) const { - return thresholds.createSubset(*this); -} diff --git a/cpp/subprojects/common/src/common/sampling/weight_vector_dense.cpp b/cpp/subprojects/common/src/common/sampling/weight_vector_dense.cpp deleted file mode 100644 index 4cea2a82..00000000 --- a/cpp/subprojects/common/src/common/sampling/weight_vector_dense.cpp +++ /dev/null @@ -1,68 +0,0 @@ -#include "common/sampling/weight_vector_dense.hpp" - -#include "common/thresholds/thresholds.hpp" -#include "common/thresholds/thresholds_subset.hpp" - -template -DenseWeightVector::DenseWeightVector(uint32 numElements) : DenseWeightVector(numElements, false) {} - -template -DenseWeightVector::DenseWeightVector(uint32 numElements, bool init) - : vector_(DenseVector(numElements, init)), numNonZeroWeights_(0) {} - -template -typename DenseWeightVector::iterator DenseWeightVector::begin() { - return vector_.begin(); -} - -template -typename DenseWeightVector::iterator DenseWeightVector::end() { - return vector_.end(); -} - -template -typename DenseWeightVector::const_iterator DenseWeightVector::cbegin() const { - return vector_.cbegin(); -} - -template -typename DenseWeightVector::const_iterator DenseWeightVector::cend() const { - return vector_.cend(); -} - -template -uint32 DenseWeightVector::getNumElements() const { - return vector_.getNumElements(); -} - -template -const T& DenseWeightVector::operator[](uint32 pos) const { - return vector_[pos]; -} - -template -T& DenseWeightVector::operator[](uint32 pos) { - return vector_[pos]; -} - -template -uint32 DenseWeightVector::getNumNonZeroWeights() const { - return numNonZeroWeights_; -} - -template -void DenseWeightVector::setNumNonZeroWeights(uint32 numNonZeroWeights) { - numNonZeroWeights_ = numNonZeroWeights; -} - -template -bool DenseWeightVector::hasZeroWeights() const { - return numNonZeroWeights_ < vector_.getNumElements(); -} - -template -std::unique_ptr DenseWeightVector::createThresholdsSubset(IThresholds& thresholds) const { - return thresholds.createSubset(*this); -} - -template class DenseWeightVector; diff --git a/cpp/subprojects/common/src/common/sampling/weight_vector_equal.cpp b/cpp/subprojects/common/src/common/sampling/weight_vector_equal.cpp deleted file mode 100644 index a9218822..00000000 --- a/cpp/subprojects/common/src/common/sampling/weight_vector_equal.cpp +++ /dev/null @@ -1,26 +0,0 @@ -#include "common/sampling/weight_vector_equal.hpp" - -#include "common/thresholds/thresholds.hpp" -#include "common/thresholds/thresholds_subset.hpp" - -EqualWeightVector::EqualWeightVector(uint32 numElements) : numElements_(numElements) {} - -uint32 EqualWeightVector::getNumElements() const { - return numElements_; -} - -uint32 EqualWeightVector::operator[](uint32 pos) const { - return 1; -} - -uint32 EqualWeightVector::getNumNonZeroWeights() const { - return numElements_; -} - -bool EqualWeightVector::hasZeroWeights() const { - return false; -} - -std::unique_ptr EqualWeightVector::createThresholdsSubset(IThresholds& thresholds) const { - return thresholds.createSubset(*this); -} diff --git a/cpp/subprojects/common/src/common/sampling/weight_vector_out_of_sample.cpp b/cpp/subprojects/common/src/common/sampling/weight_vector_out_of_sample.cpp deleted file mode 100644 index 854913ec..00000000 --- a/cpp/subprojects/common/src/common/sampling/weight_vector_out_of_sample.cpp +++ /dev/null @@ -1,22 +0,0 @@ -#include "common/sampling/weight_vector_out_of_sample.hpp" - -#include "common/sampling/weight_vector_bit.hpp" -#include "common/sampling/weight_vector_dense.hpp" -#include "common/sampling/weight_vector_equal.hpp" - -template -OutOfSampleWeightVector::OutOfSampleWeightVector(const WeightVector& vector) : vector_(vector) {} - -template -uint32 OutOfSampleWeightVector::getNumElements() const { - return vector_.getNumElements(); -} - -template -bool OutOfSampleWeightVector::operator[](uint32 pos) const { - return vector_[pos] == 0; -} - -template class OutOfSampleWeightVector; -template class OutOfSampleWeightVector; -template class OutOfSampleWeightVector>; diff --git a/cpp/subprojects/common/src/common/stopping/aggregation_function_common.hpp b/cpp/subprojects/common/src/common/stopping/aggregation_function_common.hpp deleted file mode 100644 index 1dae7531..00000000 --- a/cpp/subprojects/common/src/common/stopping/aggregation_function_common.hpp +++ /dev/null @@ -1,167 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/data/ring_buffer.hpp" -#include "common/math/math.hpp" - -#include - -/** - * Defines an interface for all classes that allow to aggregate the values that are stored in a buffer. - */ -class IAggregationFunction { - public: - - virtual ~IAggregationFunction() {}; - - /** - * Aggregates the values that are stored in a buffer. - * - * @param begin An iterator to the beginning of the buffer - * @param end An iterator to the end of the buffer - * @return The aggregated value - */ - virtual float64 aggregate(RingBuffer::const_iterator begin, - RingBuffer::const_iterator end) const = 0; -}; - -/** - * An implementation of the type `IAggregationFunction` that aggregates the values that are stored in a buffer by - * finding the minimum value. - */ -class MinAggregationFunction final : public IAggregationFunction { - public: - - float64 aggregate(RingBuffer::const_iterator begin, - RingBuffer::const_iterator end) const override { - uint32 numElements = end - begin; - float64 min = begin[0]; - - for (uint32 i = 1; i < numElements; i++) { - float64 value = begin[i]; - - if (value < min) { - min = value; - } - } - - return min; - } -}; - -/** - * An implementation of the type `IAggregationFunction` that aggregates the values that are stored in a buffer by - * finding the maximum value. - */ -class MaxAggregationFunction final : public IAggregationFunction { - public: - - float64 aggregate(RingBuffer::const_iterator begin, - RingBuffer::const_iterator end) const override { - uint32 numElements = end - begin; - float64 max = begin[0]; - - for (uint32 i = 1; i < numElements; i++) { - float64 value = begin[i]; - - if (value > max) { - max = value; - } - } - - return max; - } -}; - -/** - * An implementation of the type `IAggregationFunction` that aggregates the values that are stored in a buffer by - * calculating the arithmetic mean. - */ -class ArithmeticMeanAggregationFunction final : public IAggregationFunction { - public: - - float64 aggregate(RingBuffer::const_iterator begin, - RingBuffer::const_iterator end) const override { - uint32 numElements = end - begin; - float64 mean = 0; - - for (uint32 i = 0; i < numElements; i++) { - float64 value = begin[i]; - mean = iterativeArithmeticMean(i + 1, value, mean); - } - - return mean; - } -}; - -/** - * Defines an interface for all factories that allow to create instances of the type `IAggregationFunction`. - */ -class IAggregationFunctionFactory { - public: - - virtual ~IAggregationFunctionFactory() {}; - - /** - * Creates and returns a new object of type `IAggregationFunction`. - * - * @return An unique pointer to an object of type `IAggregationFunction` that has been created - */ - virtual std::unique_ptr create() const = 0; -}; - -/** - * Allows to create instances of the type `IAggregationFunction` that aggregate the values that are stored in a buffer - * by finding the minimum value. - */ -class MinAggregationFunctionFactory final : public IAggregationFunctionFactory { - public: - - std::unique_ptr create() const override { - return std::make_unique(); - } -}; - -/** - * Allows to create instances of the type `IAggregationFunction` that aggregate the values that are stored in a buffer - * by finding the maximum value. - */ -class MaxAggregationFunctionFactory final : public IAggregationFunctionFactory { - public: - - std::unique_ptr create() const override { - return std::make_unique(); - } -}; - -/** - * Allows to create instances of the type `IAggregationFunction` that aggregate the values that are stored in a buffer - * by calculating the arithmetic mean. - */ -class ArithmeticMeanAggregationFunctionFactory final : public IAggregationFunctionFactory { - public: - - std::unique_ptr create() const override { - return std::make_unique(); - } -}; - -/** - * Creates and returns a new object of type `IAggregationFunctionFactory` according to a given `AggregationFunction`. - * - * @param aggregationFunction A value of the enum `AggregationFunction` - * @return An unique pointer to an object of type `IAggregationFunctionFactory` that has been - * created - */ -std::unique_ptr createAggregationFunctionFactory(AggregationFunction aggregationFunction) { - switch (aggregationFunction) { - case AggregationFunction::MIN: - return std::make_unique(); - case AggregationFunction::MAX: - return std::make_unique(); - default: - return std::make_unique(); - } -} diff --git a/cpp/subprojects/common/src/common/stopping/global_pruning_common.hpp b/cpp/subprojects/common/src/common/stopping/global_pruning_common.hpp deleted file mode 100644 index 585b8abf..00000000 --- a/cpp/subprojects/common/src/common/stopping/global_pruning_common.hpp +++ /dev/null @@ -1,70 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/math/math.hpp" -#include "common/sampling/partition_bi.hpp" -#include "common/sampling/partition_single.hpp" -#include "common/statistics/statistics.hpp" - -/** - * Calculates and returns a numerical score that assesses the quality of a model's predictions for the examples in a - * training set. - * - * @param partition A reference to an object of type `SinglePartition` that provides access to the indices of the - * examples that are included in the training set - * @param useHoldoutSet True, if the quality of the predictions should be measured on the holdout set, if available, - * false, if the training set should be used instead - * @param statistics A reference to an object of type `IStatistics` that should be used to calculate the quality of - * the predictions - * @return The numerical score that has been calculated - */ -static inline float64 evaluate(const SinglePartition& partition, bool useHoldoutSet, const IStatistics& statistics) { - uint32 numExamples = partition.getNumElements(); - SinglePartition::const_iterator iterator = partition.cbegin(); - float64 mean = 0; - - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = iterator[i]; - float64 score = statistics.evaluatePrediction(exampleIndex); - mean = iterativeArithmeticMean(i + 1, score, mean); - } - - return mean; -} - -/** - * Calculates and returns a numerical score that assesses the quality of a model's predictions for the examples in a - * training or holdout set, respectively. - * - * @param partition A reference to an object of type `BiPartition` that provides access to the indices of the - * examples that are included in the training and holdout set, respectively - * @param useHoldoutSet True, if the quality of the predictions should be measured on the holdout set, if available, - * false, if the training set should be used instead - * @param statistics A reference to an object of type `IStatistics` that should be used to calculate the quality of - * the predictions - * @return The numerical score that has been calculated - */ -static inline float64 evaluate(const BiPartition& partition, bool useHoldoutSet, const IStatistics& statistics) { - uint32 numExamples; - BiPartition::const_iterator iterator; - - if (useHoldoutSet) { - numExamples = partition.getNumSecond(); - iterator = partition.second_cbegin(); - } else { - numExamples = partition.getNumFirst(); - iterator = partition.first_cbegin(); - } - - float64 mean = 0; - - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = iterator[i]; - float64 score = statistics.evaluatePrediction(exampleIndex); - mean = iterativeArithmeticMean(i + 1, score, mean); - } - - return mean; -} diff --git a/cpp/subprojects/common/src/common/stopping/global_pruning_post.cpp b/cpp/subprojects/common/src/common/stopping/global_pruning_post.cpp deleted file mode 100644 index 86b5034e..00000000 --- a/cpp/subprojects/common/src/common/stopping/global_pruning_post.cpp +++ /dev/null @@ -1,147 +0,0 @@ -#include "common/stopping/global_pruning_post.hpp" - -#include "common/util/validation.hpp" -#include "global_pruning_common.hpp" - -/** - * An implementation of the type `IStoppingCriterion` that that keeps track of the number of rules in a model that - * perform best with respect to the examples in the training or holdout set according to a certain measure. - * - * @tparam Partition The type of the object that provides access to the indices of the examples that are included in the - * training and holdout set, respectively - */ -template -class PostPruning final : public IStoppingCriterion { - private: - - const Partition& partition_; - - const bool useHoldoutSet_; - - const uint32 minRules_; - - const uint32 interval_; - - float64 bestScore_; - - uint32 bestNumRules_; - - public: - - /** - * @param partition A reference to an object of template type `Partition` that provides access to the - * indices of the examples that are included in the training and holdout set, respectively - * @param useHoldoutSet True, if the quality of the current model's predictions should be measured on the - * holdout set, if available, false otherwise - * @param minRules The minimum number of rules that must be included in a model. Must be at least 1 - * @param interval The interval to be used to check whether the current model is the best one evaluated so - * far, e.g., a value of 10 means that the best model may contain 10, 20, ... rules - */ - PostPruning(const Partition& partition, bool useHoldoutSet, uint32 minRules, uint32 interval) - : partition_(partition), useHoldoutSet_(useHoldoutSet), minRules_(minRules), interval_(interval), - bestScore_(std::numeric_limits::infinity()), bestNumRules_(minRules) {} - - Result test(const IStatistics& statistics, uint32 numRules) override { - Result result; - - if (numRules >= minRules_ && numRules % interval_ == 0) { - float64 currentScore = evaluate(partition_, useHoldoutSet_, statistics); - - if (currentScore < bestScore_) { - bestScore_ = currentScore; - bestNumRules_ = numRules; - result.numUsedRules = numRules; - } - } - - return result; - } -}; - -/** - * Allows to create implementations of the type `IStoppingCriterion` that stop the induction of rules as soon as the - * quality of a model's predictions for the examples in the training or holdout set do not improve according a certain - * measure. - */ -class PostPruningFactory final : public IStoppingCriterionFactory { - private: - - const bool useHoldoutSet_; - - const uint32 minRules_; - - const uint32 interval_; - - public: - - /** - * @param useHoldoutSet True, if the quality of the current model's predictions should be measured on the - * holdout set, if available, false otherwise - * @param minRules The minimum number of rules that must be included in a model. Must be at least 1 - * @param interval The interval to be used to check whether the current model is the best one evaluated so - * far, e.g., a value of 10 means that the best model may contain 10, 20, ... rules - */ - PostPruningFactory(bool useHoldoutSet, uint32 minRules, uint32 interval) - : useHoldoutSet_(useHoldoutSet), minRules_(minRules), interval_(interval) {} - - std::unique_ptr create(const SinglePartition& partition) const override { - return std::make_unique>(partition, useHoldoutSet_, minRules_, - interval_); - } - - std::unique_ptr create(BiPartition& partition) const override { - return std::make_unique>(partition, useHoldoutSet_, minRules_, interval_); - } -}; - -PostPruningConfig::PostPruningConfig() : useHoldoutSet_(true), removeUnusedRules_(true), minRules_(100), interval_(1) {} - -bool PostPruningConfig::isHoldoutSetUsed() const { - return useHoldoutSet_; -} - -IPostPruningConfig& PostPruningConfig::setUseHoldoutSet(bool useHoldoutSet) { - useHoldoutSet_ = useHoldoutSet; - return *this; -} - -bool PostPruningConfig::isRemoveUnusedRules() const { - return removeUnusedRules_; -} - -IPostPruningConfig& PostPruningConfig::setRemoveUnusedRules(bool removeUnusedRules) { - removeUnusedRules_ = removeUnusedRules; - return *this; -} - -uint32 PostPruningConfig::getMinRules() const { - return minRules_; -} - -IPostPruningConfig& PostPruningConfig::setMinRules(uint32 minRules) { - assertGreaterOrEqual("minRules", minRules, 1); - minRules_ = minRules; - return *this; -} - -uint32 PostPruningConfig::getInterval() const { - return interval_; -} - -IPostPruningConfig& PostPruningConfig::setInterval(uint32 interval) { - assertGreaterOrEqual("interval", interval, 1); - interval_ = interval; - return *this; -} - -std::unique_ptr PostPruningConfig::createStoppingCriterionFactory() const { - return std::make_unique(useHoldoutSet_, minRules_, interval_); -} - -bool PostPruningConfig::shouldUseHoldoutSet() const { - return useHoldoutSet_; -} - -bool PostPruningConfig::shouldRemoveUnusedRules() const { - return removeUnusedRules_; -} diff --git a/cpp/subprojects/common/src/common/stopping/global_pruning_pre.cpp b/cpp/subprojects/common/src/common/stopping/global_pruning_pre.cpp deleted file mode 100644 index 6a500632..00000000 --- a/cpp/subprojects/common/src/common/stopping/global_pruning_pre.cpp +++ /dev/null @@ -1,301 +0,0 @@ -#include "common/stopping/global_pruning_pre.hpp" - -#include "aggregation_function_common.hpp" -#include "common/util/validation.hpp" -#include "global_pruning_common.hpp" - -/** - * An implementation of the type `IStoppingCriterion` that stops the induction of rules as soon as the quality of a - * model's predictions for the examples in the training or holdout set do not improve according a certain measure. - * - * @tparam Partition The type of the object that provides access to the indices of the examples that are included in the - * training and holdout set, respectively - */ -template -class PrePruning final : public IStoppingCriterion { - private: - - const Partition& partition_; - - const std::unique_ptr aggregationFunctionPtr_; - - const bool useHoldoutSet_; - - const bool removeUnusedRules_; - - const uint32 updateInterval_; - - const uint32 stopInterval_; - - const float64 minImprovement_; - - RingBuffer pastBuffer_; - - RingBuffer recentBuffer_; - - uint32 offset_; - - float64 bestScore_; - - uint32 bestNumRules_; - - bool stopped_; - - public: - - /** - * @param partition A reference to an object of template type `Partition` that provides access - * to the indices of the examples that are included in the training and holdout - * set, respectively - * @param aggregationFunctionPtr An unique pointer to an object of type `IAggregationFunctionFactory` that - * allows to create implementations of the aggregation function that should be - * used to aggregate the scores in the buffer - * @param useHoldoutSet True, if the quality of the current model's predictions should be measured - * on the holdout set, if available, false otherwise - * @param removeUnusedRules True, if rules that have been induced, but are not used, should be removed - * from a model, false otherwise - * @param minRules The minimum number of rules that must have been learned until the induction - * of rules might be stopped. Must be at least 1 - * @param updateInterval The interval to be used to update the quality of the current model, e.g., a - * value of 5 means that the model quality is assessed every 5 rules. Must be - * at least 1 - * @param stopInterval The interval to be used to decide whether the induction of rules should be - * stopped, e.g., a value of 10 means that the rule induction might be stopped - * after 10, 20, ... rules. Must be a multiple of `updateInterval` - * @param numPast The number of past iterations to be stored in a buffer. Must be at least 1 - * @param numCurrent The number of the most recent iterations to be stored in a buffer. Must be - * at least 1 - * @param minImprovement The minimum improvement in percent that must be reached for the rule - * induction to be continued. Must be in [0, 1] - */ - PrePruning(const Partition& partition, std::unique_ptr aggregationFunctionPtr, - bool useHoldoutSet, bool removeUnusedRules, uint32 minRules, uint32 updateInterval, - uint32 stopInterval, uint32 numPast, uint32 numCurrent, float64 minImprovement) - : partition_(partition), aggregationFunctionPtr_(std::move(aggregationFunctionPtr)), - useHoldoutSet_(useHoldoutSet), removeUnusedRules_(removeUnusedRules), updateInterval_(updateInterval), - stopInterval_(stopInterval), minImprovement_(minImprovement), pastBuffer_(RingBuffer(numPast)), - recentBuffer_(RingBuffer(numCurrent)), bestScore_(std::numeric_limits::infinity()), - stopped_(false) { - uint32 bufferInterval = (numPast * updateInterval) + (numCurrent * updateInterval); - offset_ = bufferInterval < minRules ? minRules - bufferInterval : 0; - } - - Result test(const IStatistics& statistics, uint32 numRules) override { - Result result; - - if (!stopped_ && numRules > offset_ && numRules % updateInterval_ == 0) { - float64 currentScore = evaluate(partition_, useHoldoutSet_, statistics); - - if (pastBuffer_.isFull()) { - if (currentScore < bestScore_) { - bestScore_ = currentScore; - bestNumRules_ = numRules; - } - - if (numRules % stopInterval_ == 0) { - float64 aggregatedScorePast = - aggregationFunctionPtr_->aggregate(pastBuffer_.cbegin(), pastBuffer_.cend()); - float64 aggregatedScoreRecent = - aggregationFunctionPtr_->aggregate(recentBuffer_.cbegin(), recentBuffer_.cend()); - float64 percentageImprovement = - (aggregatedScorePast - aggregatedScoreRecent) / aggregatedScoreRecent; - - if (percentageImprovement <= minImprovement_) { - result.stop = removeUnusedRules_; - result.numUsedRules = bestNumRules_; - stopped_ = true; - } - } - } - - std::pair pair = recentBuffer_.push(currentScore); - - if (pair.first) { - pastBuffer_.push(pair.second); - } - } - - return result; - } -}; - -/** - * Allows to create implementations of the type `IStoppingCriterion` that stop the induction of rules as soon as the - * quality of a model's predictions for the examples in the training or holdout set do not improve according a certain - * measure. - */ -class PrePruningFactory final : public IStoppingCriterionFactory { - private: - - std::unique_ptr aggregationFunctionFactoryPtr_; - - bool useHoldoutSet_; - - bool removeUnusedRules_; - - uint32 minRules_; - - uint32 updateInterval_; - - uint32 stopInterval_; - - uint32 numPast_; - - uint32 numCurrent_; - - float64 minImprovement_; - - public: - - /** - * @param aggregationFunctionFactoryPtr An unique pointer to an object of type `IAggregationFunctionFactory` - * that allows to create implementations of the aggregation function that - * should be used to aggregate the scores in the buffer - * @param useHoldoutSet True, if the quality of the current model's predictions should be - * measured on the holdout set, if available, false otherwise - * @param removeUnusedRules True, if rules that have been induced, but are not used, should be - * removed from a model, false otherwise - * @param minRules The minimum number of rules that must have been learned until the - * induction of rules might be stopped. Must be at least 1 - * @param updateInterval The interval to be used to update the quality of the current model, - * e.g., a value of 5 means that the model quality is assessed every 5 - * rules. Must be at least 1 - * @param stopInterval The interval to be used to decide whether the induction of rules should - * be stopped, e.g., a value of 10 means that the rule induction might be - * stopped after 10, 20, ... rules. Must be a multiple of `updateInterval` - * @param numPast The number of past iterations to be stored in a buffer. Must be at least - * 1 - * @param numCurrent The number of the most recent iterations to be stored in a buffer. Must - * be at least 1 - * @param minImprovement The minimum improvement in percent that must be reached for the rule - * induction to be continued. Must be in [0, 1] - */ - PrePruningFactory(std::unique_ptr aggregationFunctionFactoryPtr, - bool useHoldoutSet, bool removeUnusedRules, uint32 minRules, uint32 updateInterval, - uint32 stopInterval, uint32 numPast, uint32 numCurrent, float64 minImprovement) - : aggregationFunctionFactoryPtr_(std::move(aggregationFunctionFactoryPtr)), useHoldoutSet_(useHoldoutSet), - minRules_(minRules), updateInterval_(updateInterval), stopInterval_(stopInterval), numPast_(numPast), - numCurrent_(numCurrent), minImprovement_(minImprovement) {} - - std::unique_ptr create(const SinglePartition& partition) const override { - std::unique_ptr aggregationFunctionPtr = aggregationFunctionFactoryPtr_->create(); - return std::make_unique>( - partition, std::move(aggregationFunctionPtr), useHoldoutSet_, removeUnusedRules_, minRules_, - updateInterval_, stopInterval_, numPast_, numCurrent_, minImprovement_); - } - - std::unique_ptr create(BiPartition& partition) const override { - std::unique_ptr aggregationFunctionPtr = aggregationFunctionFactoryPtr_->create(); - return std::make_unique>( - partition, std::move(aggregationFunctionPtr), useHoldoutSet_, removeUnusedRules_, minRules_, - updateInterval_, stopInterval_, numPast_, numCurrent_, minImprovement_); - } -}; - -PrePruningConfig::PrePruningConfig() - : aggregationFunction_(AggregationFunction::ARITHMETIC_MEAN), useHoldoutSet_(true), removeUnusedRules_(true), - minRules_(100), updateInterval_(1), stopInterval_(1), numPast_(50), numCurrent_(50), minImprovement_(0.005) {} - -AggregationFunction PrePruningConfig::getAggregationFunction() const { - return aggregationFunction_; -} - -IPrePruningConfig& PrePruningConfig::setAggregationFunction(AggregationFunction aggregationFunction) { - aggregationFunction_ = aggregationFunction; - return *this; -} - -bool PrePruningConfig::isHoldoutSetUsed() const { - return useHoldoutSet_; -} - -IPrePruningConfig& PrePruningConfig::setUseHoldoutSet(bool useHoldoutSet) { - useHoldoutSet_ = useHoldoutSet; - return *this; -} - -bool PrePruningConfig::isRemoveUnusedRules() const { - return removeUnusedRules_; -} - -IPrePruningConfig& PrePruningConfig::setRemoveUnusedRules(bool removeUnusedRules) { - removeUnusedRules_ = removeUnusedRules; - return *this; -} - -uint32 PrePruningConfig::getMinRules() const { - return minRules_; -} - -IPrePruningConfig& PrePruningConfig::setMinRules(uint32 minRules) { - assertGreaterOrEqual("minRules", minRules, 1); - minRules_ = minRules; - return *this; -} - -uint32 PrePruningConfig::getUpdateInterval() const { - return updateInterval_; -} - -IPrePruningConfig& PrePruningConfig::setUpdateInterval(uint32 updateInterval) { - assertGreaterOrEqual("updateInterval", updateInterval, 1); - updateInterval_ = updateInterval; - return *this; -} - -uint32 PrePruningConfig::getStopInterval() const { - return stopInterval_; -} - -IPrePruningConfig& PrePruningConfig::setStopInterval(uint32 stopInterval) { - assertMultiple("stopInterval", stopInterval, updateInterval_); - stopInterval_ = stopInterval; - return *this; -} - -uint32 PrePruningConfig::getNumPast() const { - return numPast_; -} - -IPrePruningConfig& PrePruningConfig::setNumPast(uint32 numPast) { - assertGreaterOrEqual("numPast", numPast, 1); - numPast_ = numPast; - return *this; -} - -uint32 PrePruningConfig::getNumCurrent() const { - return numCurrent_; -} - -IPrePruningConfig& PrePruningConfig::setNumCurrent(uint32 numCurrent) { - assertGreaterOrEqual("numCurrent", numCurrent, 1); - numCurrent_ = numCurrent; - return *this; -} - -float64 PrePruningConfig::getMinImprovement() const { - return minImprovement_; -} - -IPrePruningConfig& PrePruningConfig::setMinImprovement(float64 minImprovement) { - assertGreaterOrEqual("minImprovement", minImprovement, 0); - assertLessOrEqual("minImprovement", minImprovement, 1); - minImprovement_ = minImprovement; - return *this; -} - -std::unique_ptr PrePruningConfig::createStoppingCriterionFactory() const { - std::unique_ptr aggregationFunctionFactoryPtr = - createAggregationFunctionFactory(aggregationFunction_); - return std::make_unique(std::move(aggregationFunctionFactoryPtr), useHoldoutSet_, - removeUnusedRules_, minRules_, updateInterval_, stopInterval_, numPast_, - numCurrent_, minImprovement_); -} - -bool PrePruningConfig::shouldUseHoldoutSet() const { - return useHoldoutSet_; -} - -bool PrePruningConfig::shouldRemoveUnusedRules() const { - return removeUnusedRules_; -} diff --git a/cpp/subprojects/common/src/common/stopping/stopping_criterion_list.cpp b/cpp/subprojects/common/src/common/stopping/stopping_criterion_list.cpp deleted file mode 100644 index bb10e618..00000000 --- a/cpp/subprojects/common/src/common/stopping/stopping_criterion_list.cpp +++ /dev/null @@ -1,85 +0,0 @@ -#include "common/stopping/stopping_criterion_list.hpp" - -/** - * An implementation of the type `IStoppingCriterion` that tests multiple stopping criteria. - * - * - * @tparam Partition The type of the object that provides access to the indices of the examples that are included in the - * holdout set - */ -template -class StoppingCriterionList final : public IStoppingCriterion { - private: - - std::vector> stoppingCriteria_; - - public: - - /** - * @param partition A reference to an object of template type `Partition` that provides - * access to the indices of the examples that are included in the holdout - * set - * @param stoppingCriterionFactories A reference to a vector that stores the factories that allow to create - * instances of the stopping criteria to be tested - */ - StoppingCriterionList( - Partition& partition, - const std::vector>& stoppingCriterionFactories) { - stoppingCriteria_.reserve(stoppingCriterionFactories.size()); - - for (auto it = stoppingCriterionFactories.cbegin(); it != stoppingCriterionFactories.cend(); it++) { - const std::unique_ptr& stoppingCriterionFactoryPtr = *it; - stoppingCriteria_.push_back(std::move(stoppingCriterionFactoryPtr->create(partition))); - } - } - - Result test(const IStatistics& statistics, uint32 numRules) override { - Result result; - - for (auto it = stoppingCriteria_.begin(); it != stoppingCriteria_.end(); it++) { - std::unique_ptr& stoppingCriterionPtr = *it; - Result stoppingCriterionResult = stoppingCriterionPtr->test(statistics, numRules); - result.stop |= stoppingCriterionResult.stop; - uint32 numUsedRules = stoppingCriterionResult.numUsedRules; - - if (numUsedRules != 0) { - result.numUsedRules = numUsedRules; - } - } - - return result; - } -}; - -/** - * An implementation of the type `IStoppingCriterion` that does not test for any stopping criteria. - */ -class NoStoppingCriterion final : public IStoppingCriterion { - public: - - Result test(const IStatistics& statistics, uint32 numRules) override { - Result result; - return result; - } -}; - -void StoppingCriterionListFactory::addStoppingCriterionFactory( - std::unique_ptr stoppingCriterionFactoryPtr) { - stoppingCriterionFactories_.push_back(std::move(stoppingCriterionFactoryPtr)); -} - -std::unique_ptr StoppingCriterionListFactory::create(const SinglePartition& partition) const { - if (stoppingCriterionFactories_.empty()) { - return std::make_unique(); - } else { - return std::make_unique>(partition, stoppingCriterionFactories_); - } -} - -std::unique_ptr StoppingCriterionListFactory::create(BiPartition& partition) const { - if (stoppingCriterionFactories_.empty()) { - return std::make_unique(); - } else { - return std::make_unique>(partition, stoppingCriterionFactories_); - } -} diff --git a/cpp/subprojects/common/src/common/stopping/stopping_criterion_size.cpp b/cpp/subprojects/common/src/common/stopping/stopping_criterion_size.cpp deleted file mode 100644 index 68a99e70..00000000 --- a/cpp/subprojects/common/src/common/stopping/stopping_criterion_size.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "common/stopping/stopping_criterion_size.hpp" - -#include "common/util/validation.hpp" - -/** - * An implementation of the type `IStoppingCriterion` that ensures that the number of induced rules does not exceed a - * certain maximum. - */ -class SizeStoppingCriterion final : public IStoppingCriterion { - private: - - const uint32 maxRules_; - - public: - - /** - * @param maxRules The maximum number of rules. Must be at least 1 - */ - SizeStoppingCriterion(uint32 maxRules) : maxRules_(maxRules) {} - - Result test(const IStatistics& statistics, uint32 numRules) override { - Result result; - - if (numRules >= maxRules_) { - result.stop = true; - } - - return result; - } -}; - -/** - * Allows to create instances of the type `IStoppingCriterion` that ensure that the number of induced rules does not - * exceed a certain maximum. - */ -class SizeStoppingCriterionFactory final : public IStoppingCriterionFactory { - private: - - const uint32 maxRules_; - - public: - - /** - * @param maxRules The maximum number of rules. Must be at least 1 - */ - SizeStoppingCriterionFactory(uint32 maxRules) : maxRules_(maxRules) {} - - std::unique_ptr create(const SinglePartition& partition) const override { - return std::make_unique(maxRules_); - } - - std::unique_ptr create(BiPartition& partition) const override { - return std::make_unique(maxRules_); - } -}; - -SizeStoppingCriterionConfig::SizeStoppingCriterionConfig() : maxRules_(10) {} - -uint32 SizeStoppingCriterionConfig::getMaxRules() const { - return maxRules_; -} - -ISizeStoppingCriterionConfig& SizeStoppingCriterionConfig::setMaxRules(uint32 maxRules) { - assertGreaterOrEqual("maxRules", maxRules, 1); - maxRules_ = maxRules; - return *this; -} - -std::unique_ptr SizeStoppingCriterionConfig::createStoppingCriterionFactory() const { - return std::make_unique(maxRules_); -} diff --git a/cpp/subprojects/common/src/common/stopping/stopping_criterion_time.cpp b/cpp/subprojects/common/src/common/stopping/stopping_criterion_time.cpp deleted file mode 100644 index c51a4fdc..00000000 --- a/cpp/subprojects/common/src/common/stopping/stopping_criterion_time.cpp +++ /dev/null @@ -1,89 +0,0 @@ -#include "common/stopping/stopping_criterion_time.hpp" - -#include "common/util/validation.hpp" - -#include - -/** - * An implementation of the type `IStoppingCriterion` that ensures that a certain time limit is not exceeded. - */ -class TimeStoppingCriterion final : public IStoppingCriterion { - private: - - typedef std::chrono::steady_clock timer; - - typedef std::chrono::seconds timer_unit; - - const timer_unit timeLimit_; - - std::chrono::time_point startTime_; - - bool timerStarted_; - - public: - - /** - * @param timeLimit The time limit in seconds. Must be at least 1 - */ - TimeStoppingCriterion(uint32 timeLimit) - : timeLimit_(std::chrono::duration_cast(std::chrono::seconds(timeLimit))), - startTime_(timer::now()), timerStarted_(false) {} - - Result test(const IStatistics& statistics, uint32 numRules) override { - Result result; - - if (timerStarted_) { - auto currentTime = timer::now(); - auto duration = std::chrono::duration_cast(currentTime - startTime_); - - if (duration >= timeLimit_) { - result.stop = true; - } - } else { - startTime_ = timer::now(); - timerStarted_ = true; - } - - return result; - } -}; - -/** - * Allows to create instances of the type `IStoppingCriterion` that ensure that a certain time limit is not exceeded. - */ -class TimeStoppingCriterionFactory final : public IStoppingCriterionFactory { - private: - - const uint32 timeLimit_; - - public: - - /** - * @param timeLimit The time limit in seconds. Must be at least 1 - */ - TimeStoppingCriterionFactory(uint32 timeLimit) : timeLimit_(timeLimit) {} - - std::unique_ptr create(const SinglePartition& partition) const override { - return std::make_unique(timeLimit_); - } - - std::unique_ptr create(BiPartition& partition) const override { - return std::make_unique(timeLimit_); - } -}; - -TimeStoppingCriterionConfig::TimeStoppingCriterionConfig() : timeLimit_(3600) {} - -uint32 TimeStoppingCriterionConfig::getTimeLimit() const { - return timeLimit_; -} - -ITimeStoppingCriterionConfig& TimeStoppingCriterionConfig::setTimeLimit(uint32 timeLimit) { - assertGreaterOrEqual("timeLimit", timeLimit, 1); - timeLimit_ = timeLimit; - return *this; -} - -std::unique_ptr TimeStoppingCriterionConfig::createStoppingCriterionFactory() const { - return std::make_unique(timeLimit_); -} diff --git a/cpp/subprojects/common/src/common/thresholds/coverage_mask.cpp b/cpp/subprojects/common/src/common/thresholds/coverage_mask.cpp deleted file mode 100644 index 29066f18..00000000 --- a/cpp/subprojects/common/src/common/thresholds/coverage_mask.cpp +++ /dev/null @@ -1,79 +0,0 @@ -#include "common/thresholds/coverage_mask.hpp" - -#include "common/data/arrays.hpp" -#include "common/rule_refinement/prediction.hpp" -#include "common/thresholds/thresholds_subset.hpp" - -CoverageMask::CoverageMask(uint32 numElements) - : array_(new uint32[numElements] {0}), numElements_(numElements), indicatorValue_(0) {} - -CoverageMask::CoverageMask(const CoverageMask& coverageMask) - : array_(new uint32[coverageMask.numElements_]), numElements_(coverageMask.numElements_), - indicatorValue_(coverageMask.indicatorValue_) { - copyArray(coverageMask.array_, array_, numElements_); -} - -CoverageMask::~CoverageMask() { - delete[] array_; -} - -CoverageMask::iterator CoverageMask::begin() { - return array_; -} - -CoverageMask::iterator CoverageMask::end() { - return &array_[numElements_]; -} - -CoverageMask::const_iterator CoverageMask::cbegin() const { - return array_; -} - -CoverageMask::const_iterator CoverageMask::cend() const { - return &array_[numElements_]; -} - -uint32 CoverageMask::getNumElements() const { - return numElements_; -} - -uint32 CoverageMask::getIndicatorValue() const { - return indicatorValue_; -} - -void CoverageMask::setIndicatorValue(uint32 indicatorValue) { - indicatorValue_ = indicatorValue; -} - -void CoverageMask::reset() { - indicatorValue_ = 0; - setArrayToZeros(array_, numElements_); -} - -bool CoverageMask::isCovered(uint32 pos) const { - return array_[pos] == indicatorValue_; -} - -std::unique_ptr CoverageMask::copy() const { - return std::make_unique(*this); -} - -Quality CoverageMask::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const AbstractPrediction& head) const { - return thresholdsSubset.evaluateOutOfSample(partition, *this, head); -} - -Quality CoverageMask::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const AbstractPrediction& head) const { - return thresholdsSubset.evaluateOutOfSample(partition, *this, head); -} - -void CoverageMask::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - AbstractPrediction& head) const { - thresholdsSubset.recalculatePrediction(partition, *this, head); -} - -void CoverageMask::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - AbstractPrediction& head) const { - thresholdsSubset.recalculatePrediction(partition, *this, head); -} diff --git a/cpp/subprojects/common/src/common/thresholds/coverage_set.cpp b/cpp/subprojects/common/src/common/thresholds/coverage_set.cpp deleted file mode 100644 index dec9faad..00000000 --- a/cpp/subprojects/common/src/common/thresholds/coverage_set.cpp +++ /dev/null @@ -1,77 +0,0 @@ -#include "common/thresholds/coverage_set.hpp" - -#include "common/data/arrays.hpp" -#include "common/rule_refinement/prediction.hpp" -#include "common/thresholds/thresholds_subset.hpp" - -CoverageSet::CoverageSet(uint32 numElements) - : array_(new uint32[numElements]), numElements_(numElements), numCovered_(numElements) { - setArrayToIncreasingValues(array_, numElements, 0, 1); -} - -CoverageSet::CoverageSet(const CoverageSet& coverageSet) - : array_(new uint32[coverageSet.numElements_]), numElements_(coverageSet.numElements_), - numCovered_(coverageSet.numCovered_) { - copyArray(coverageSet.array_, array_, numCovered_); -} - -CoverageSet::~CoverageSet() { - delete[] array_; -} - -CoverageSet::iterator CoverageSet::begin() { - return array_; -} - -CoverageSet::iterator CoverageSet::end() { - return &array_[numCovered_]; -} - -CoverageSet::const_iterator CoverageSet::cbegin() const { - return array_; -} - -CoverageSet::const_iterator CoverageSet::cend() const { - return &array_[numCovered_]; -} - -uint32 CoverageSet::getNumElements() const { - return numElements_; -} - -uint32 CoverageSet::getNumCovered() const { - return numCovered_; -} - -void CoverageSet::setNumCovered(uint32 numCovered) { - numCovered_ = numCovered; -} - -void CoverageSet::reset() { - numCovered_ = numElements_; - setArrayToIncreasingValues(array_, numElements_, 0, 1); -} - -std::unique_ptr CoverageSet::copy() const { - return std::make_unique(*this); -} - -Quality CoverageSet::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - const AbstractPrediction& head) const { - return thresholdsSubset.evaluateOutOfSample(partition, *this, head); -} - -Quality CoverageSet::evaluateOutOfSample(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - const AbstractPrediction& head) const { - return thresholdsSubset.evaluateOutOfSample(partition, *this, head); -} - -void CoverageSet::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, const SinglePartition& partition, - AbstractPrediction& head) const { - thresholdsSubset.recalculatePrediction(partition, *this, head); -} - -void CoverageSet::recalculatePrediction(const IThresholdsSubset& thresholdsSubset, BiPartition& partition, - AbstractPrediction& head) const { - thresholdsSubset.recalculatePrediction(partition, *this, head); -} diff --git a/cpp/subprojects/common/src/common/thresholds/thresholds_approximate.cpp b/cpp/subprojects/common/src/common/thresholds/thresholds_approximate.cpp deleted file mode 100644 index 96642ba2..00000000 --- a/cpp/subprojects/common/src/common/thresholds/thresholds_approximate.cpp +++ /dev/null @@ -1,416 +0,0 @@ -#include "common/thresholds/thresholds_approximate.hpp" - -#include "common/rule_refinement/rule_refinement_approximate.hpp" -#include "thresholds_common.hpp" - -#include - -/** - * Updates a given `CoverageSet` after a new condition has been added, such that only the examples that are covered by - * the new rule are marked es covered. - * - * @param thresholdVector A reference to an object of type `ThresholdVector` that stores the thresholds that result - * from the boundaries of the bins - * @param binIndices A reference to an object of type `IBinIndexVector` that stores the indices of the bins, - * individual examples belong to - * @param conditionStart The first bin (inclusive) that is covered by the new condition - * @param conditionEnd The last bin (exclusive) that is covered by the new condition - * @param covered True, if the bins in range [conditionStart, conditionEnd) are covered by the new condition - * and the remaining ones are not, false, if the elements in said range are not covered, but - * the remaining ones are - * @param coverageSet A reference to an object of type `CoverageSet` that is used to keep track of the examples - * that are covered by the previous rule. It will be updated by this function - * @param statistics A reference to an object of type `IWeightedStatistics` to be notified about the statistics - * that must be considered when searching for the next refinement, i.e., the statistics that - * are covered by the new rule - */ -static inline void updateCoveredExamples(const ThresholdVector& thresholdVector, const IBinIndexVector& binIndices, - int64 conditionStart, int64 conditionEnd, bool covered, - CoverageSet& coverageSet, IWeightedStatistics& statistics) { - int64 start, end; - - if (conditionEnd < conditionStart) { - start = conditionEnd + 1; - end = conditionStart + 1; - } else { - start = conditionStart; - end = conditionEnd; - } - - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::iterator coverageSetIterator = coverageSet.begin(); - statistics.resetCoveredStatistics(); - uint32 n = 0; - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - - if (!thresholdVector.isMissing(exampleIndex)) { - uint32 binIndex = binIndices.getBinIndex(exampleIndex); - - if (binIndex == IBinIndexVector::BIN_INDEX_SPARSE) { - binIndex = thresholdVector.getSparseBinIndex(); - } - - if ((binIndex >= start && binIndex < end) == covered) { - statistics.addCoveredStatistic(exampleIndex); - coverageSetIterator[n] = exampleIndex; - n++; - } - } - } - - coverageSet.setNumCovered(n); -} - -/** - * Rebuilds a given histogram. - * - * @param thresholdVector A reference to an object of type `ThresholdVector` that stores the thresholds that result - * from the boundaries of the bins - * @param histogram A reference to an object of type `IHistogram` that should be rebuild - * @param coverageSet A reference to an object of type `CoverageSet` that is used to keep track of the examples - * that are currently covered - */ -static inline void rebuildHistogram(const ThresholdVector& thresholdVector, IHistogram& histogram, - const CoverageSet& coverageSet) { - // Reset all statistics in the histogram to zero... - histogram.clear(); - - // Iterate the covered examples and add their statistics to the corresponding bin... - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - - if (!thresholdVector.isMissing(exampleIndex)) { - histogram.addToBin(exampleIndex); - } - } -} - -/** - * Provides access to the thresholds that result from applying a binning method to the feature values of the training - * examples. - */ -class ApproximateThresholds final : public AbstractThresholds { - private: - - /** - * Provides access to a subset of the thresholds that are stored by an instance of the class - * `ApproximateThresholds`. - * - * @tparam WeightVector The type of the vector that provides access to the weights of individual training - * examples - */ - template - class ThresholdsSubset final : public IThresholdsSubset { - private: - - /** - * A callback that allows to retrieve bins and corresponding statistics. If available, the bins and - * statistics are retrieved from the cache. Otherwise, they are computed by fetching the feature values - * from the feature matrix and applying a binning method. - */ - class Callback final : public IRuleRefinementCallback { - private: - - ThresholdsSubset& thresholdsSubset_; - - const uint32 featureIndex_; - - const bool nominal_; - - public: - - /** - * @param thresholdsSubset A reference to an object of type `ThresholdsSubset` that caches the - * bins - * @param featureIndex The index of the feature for which the bins should be retrieved - * @param nominal True, if the feature at index `featureIndex` is nominal, false - * otherwise - */ - Callback(ThresholdsSubset& thresholdsSubset, uint32 featureIndex, bool nominal) - : thresholdsSubset_(thresholdsSubset), featureIndex_(featureIndex), nominal_(nominal) {} - - Result get() override { - auto cacheIterator = thresholdsSubset_.thresholds_.cache_.find(featureIndex_); - IFeatureBinning::Result& cacheEntry = cacheIterator->second; - ThresholdVector* thresholdVector = cacheEntry.thresholdVectorPtr.get(); - IBinIndexVector* binIndices = cacheEntry.binIndicesPtr.get(); - - if (!thresholdVector) { - // Fetch feature vector... - std::unique_ptr featureVectorPtr; - const IColumnWiseFeatureMatrix& featureMatrix = - thresholdsSubset_.thresholds_.featureMatrix_; - uint32 numExamples = featureMatrix.getNumRows(); - featureMatrix.fetchFeatureVector(featureIndex_, featureVectorPtr); - - // Apply binning method... - const IFeatureBinning& binning = - nominal_ ? *thresholdsSubset_.thresholds_.nominalFeatureBinningPtr_ - : *thresholdsSubset_.thresholds_.numericalFeatureBinningPtr_; - IFeatureBinning::Result result = binning.createBins(*featureVectorPtr, numExamples); - cacheEntry.thresholdVectorPtr = std::move(result.thresholdVectorPtr); - thresholdVector = cacheEntry.thresholdVectorPtr.get(); - cacheEntry.binIndicesPtr = std::move(result.binIndicesPtr); - binIndices = cacheEntry.binIndicesPtr.get(); - } - - auto cacheHistogramIterator = thresholdsSubset_.cacheHistogram_.find(featureIndex_); - - if (!cacheHistogramIterator->second) { - // Create histogram and weight vector... - uint32 numBins = thresholdVector->getNumElements(); - cacheHistogramIterator->second = - binIndices->createHistogram(*thresholdsSubset_.weightedStatisticsPtr_, numBins); - } - - // Rebuild histogram... - IHistogram& histogram = *cacheHistogramIterator->second; - rebuildHistogram(*thresholdVector, histogram, thresholdsSubset_.coverageSet_); - - return Result(histogram, *thresholdVector); - } - }; - - ApproximateThresholds& thresholds_; - - std::unique_ptr weightedStatisticsPtr_; - - const WeightVector& weights_; - - CoverageSet coverageSet_; - - std::unordered_map> cacheHistogram_; - - template - std::unique_ptr createApproximateRuleRefinement(const IndexVector& labelIndices, - uint32 featureIndex) { - // Retrieve `unique_ptr` from the cache, or insert an empty one if it does not already exist... - auto cacheHistogramIterator = - cacheHistogram_.emplace(featureIndex, std::unique_ptr()).first; - - // If the `unique_ptr` in the cache does not refer to an `IHistogram`, add an empty - // `IFeatureBinning::Result` to the cache... - if (!cacheHistogramIterator->second) { - thresholds_.cache_.emplace(featureIndex, IFeatureBinning::Result()); - } - - std::unique_ptr featureTypePtr = - thresholds_.featureInfo_.createFeatureType(featureIndex); - bool nominal = featureTypePtr->isNominal(); - std::unique_ptr callbackPtr = std::make_unique(*this, featureIndex, nominal); - return std::make_unique>( - labelIndices, coverageSet_.getNumCovered(), featureIndex, nominal, std::move(callbackPtr)); - } - - public: - - /** - * @param thresholds A reference to an object of type `ApproximateThresholds` that stores the - * thresholds - * @param weightedStatisticsPtr An unique pointer to an object of type `IWeightedStatistics` that - * provides access to the statistics - * @param weights A reference to an object of template type `WeightWeight` that provides - * access to the weights of individual training examples - */ - ThresholdsSubset(ApproximateThresholds& thresholds, - std::unique_ptr weightedStatisticsPtr, - const WeightVector& weights) - : thresholds_(thresholds), weightedStatisticsPtr_(std::move(weightedStatisticsPtr)), - weights_(weights), coverageSet_(CoverageSet(thresholds.featureMatrix_.getNumRows())) {} - - /** - * @param thresholdsSubset A reference to an object of type `ThresholdsSubset` to be copied - */ - ThresholdsSubset(const ThresholdsSubset& thresholdsSubset) - : thresholds_(thresholdsSubset.thresholds_), - weightedStatisticsPtr_(thresholdsSubset.weightedStatisticsPtr_->copy()), - weights_(thresholdsSubset.weights_), coverageSet_(CoverageSet(thresholdsSubset.coverageSet_)) {} - - std::unique_ptr copy() const override { - return std::make_unique>(*this); - } - - std::unique_ptr createRuleRefinement(const CompleteIndexVector& labelIndices, - uint32 featureIndex) override { - return createApproximateRuleRefinement(labelIndices, featureIndex); - } - - std::unique_ptr createRuleRefinement(const PartialIndexVector& labelIndices, - uint32 featureIndex) override { - return createApproximateRuleRefinement(labelIndices, featureIndex); - } - - void filterThresholds(const Condition& condition) override { - uint32 featureIndex = condition.featureIndex; - auto cacheIterator = thresholds_.cache_.find(featureIndex); - IFeatureBinning::Result& cacheEntry = cacheIterator->second; - const ThresholdVector& thresholdVector = *cacheEntry.thresholdVectorPtr; - const IBinIndexVector& binIndices = *cacheEntry.binIndicesPtr; - updateCoveredExamples(thresholdVector, binIndices, condition.start, condition.end, - condition.covered, coverageSet_, *weightedStatisticsPtr_); - } - - void resetThresholds() override { - coverageSet_.reset(); - } - - const ICoverageState& getCoverageState() const override { - return coverageSet_; - } - - Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageMask& coverageState, - const AbstractPrediction& head) const override { - return evaluateOutOfSampleInternally( - partition.cbegin(), partition.getNumElements(), weights_, coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageState, - const AbstractPrediction& head) const override { - return evaluateOutOfSampleInternally( - partition.first_cbegin(), partition.getNumFirst(), weights_, coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageSet& coverageState, - const AbstractPrediction& head) const override { - return evaluateOutOfSampleInternally(weights_, coverageState, thresholds_.statisticsProvider_.get(), - head); - } - - Quality evaluateOutOfSample(BiPartition& partition, const CoverageSet& coverageState, - const AbstractPrediction& head) const override { - return evaluateOutOfSampleInternally(weights_, coverageState, partition, - thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageState, - AbstractPrediction& head) const override { - recalculatePredictionInternally( - partition.cbegin(), partition.getNumElements(), coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageState, - AbstractPrediction& head) const override { - recalculatePredictionInternally( - partition.first_cbegin(), partition.getNumFirst(), coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(const SinglePartition& partition, const CoverageSet& coverageState, - AbstractPrediction& head) const override { - recalculatePredictionInternally(coverageState, thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(BiPartition& partition, const CoverageSet& coverageState, - AbstractPrediction& head) const override { - recalculatePredictionInternally(coverageState, partition, thresholds_.statisticsProvider_.get(), - head); - } - - void applyPrediction(const AbstractPrediction& prediction) override { - uint32 numCovered = coverageSet_.getNumCovered(); - CoverageSet::const_iterator iterator = coverageSet_.cbegin(); - const AbstractPrediction* predictionPtr = &prediction; - IStatistics* statisticsPtr = &thresholds_.statisticsProvider_.get(); - uint32 numThreads = thresholds_.numThreads_; - -#pragma omp parallel for firstprivate(numCovered) firstprivate(iterator) firstprivate(predictionPtr) \ - firstprivate(statisticsPtr) schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numCovered; i++) { - uint32 exampleIndex = iterator[i]; - predictionPtr->apply(*statisticsPtr, exampleIndex); - } - } - - void revertPrediction(const AbstractPrediction& prediction) override { - uint32 numCovered = coverageSet_.getNumCovered(); - CoverageSet::const_iterator iterator = coverageSet_.cbegin(); - const AbstractPrediction* predictionPtr = &prediction; - IStatistics* statisticsPtr = &thresholds_.statisticsProvider_.get(); - uint32 numThreads = thresholds_.numThreads_; - -#pragma omp parallel for firstprivate(numCovered) firstprivate(iterator) firstprivate(predictionPtr) \ - firstprivate(statisticsPtr) schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numCovered; i++) { - uint32 exampleIndex = iterator[i]; - predictionPtr->revert(*statisticsPtr, exampleIndex); - } - } - }; - - const std::unique_ptr numericalFeatureBinningPtr_; - - const std::unique_ptr nominalFeatureBinningPtr_; - - const uint32 numThreads_; - - std::unordered_map cache_; - - public: - - /** - * @param featureMatrix A reference to an object of type `IColumnWiseFeatureMatrix` that - * provides column-wise access to the feature values of individual training - * examples - * @param featureInfo A reference to an object of type `IFeatureInfo` that provides - * information about the types of individual features - * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides - * access to statistics about the labels of the training examples - * @param numericalFeatureBinningPtr An unique pointer to an object of type `IFeatureBinning` that should be - * used to assign numerical feature values to bins - * @param nominalFeatureBinningPtr An unique pointer to an object of type `IFeatureBinning` that should be - * used to assign nominal feature values to bins - * @param numThreads The number of CPU threads to be used to update statistics in parallel - */ - ApproximateThresholds(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider, - std::unique_ptr numericalFeatureBinningPtr, - std::unique_ptr nominalFeatureBinningPtr, uint32 numThreads) - : AbstractThresholds(featureMatrix, featureInfo, statisticsProvider), - numericalFeatureBinningPtr_(std::move(numericalFeatureBinningPtr)), - nominalFeatureBinningPtr_(std::move(nominalFeatureBinningPtr)), numThreads_(numThreads) {} - - std::unique_ptr createSubset(const EqualWeightVector& weights) override { - IStatistics& statistics = statisticsProvider_.get(); - std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>( - *this, std::move(weightedStatisticsPtr), weights); - } - - std::unique_ptr createSubset(const BitWeightVector& weights) override { - IStatistics& statistics = statisticsProvider_.get(); - std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>( - *this, std::move(weightedStatisticsPtr), weights); - } - - std::unique_ptr createSubset(const DenseWeightVector& weights) override { - IStatistics& statistics = statisticsProvider_.get(); - std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>>( - *this, std::move(weightedStatisticsPtr), weights); - } -}; - -ApproximateThresholdsFactory::ApproximateThresholdsFactory( - std::unique_ptr numericalFeatureBinningFactoryPtr, - std::unique_ptr nominalFeatureBinningFactoryPtr, uint32 numThreads) - : numericalFeatureBinningFactoryPtr_(std::move(numericalFeatureBinningFactoryPtr)), - nominalFeatureBinningFactoryPtr_(std::move(nominalFeatureBinningFactoryPtr)), numThreads_(numThreads) {} - -std::unique_ptr ApproximateThresholdsFactory::create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const { - std::unique_ptr numericalFeatureBinningPtr = numericalFeatureBinningFactoryPtr_->create(); - std::unique_ptr nominalFeatureBinningPtr = nominalFeatureBinningFactoryPtr_->create(); - return std::make_unique(featureMatrix, featureInfo, statisticsProvider, - std::move(numericalFeatureBinningPtr), - std::move(nominalFeatureBinningPtr), numThreads_); -} diff --git a/cpp/subprojects/common/src/common/thresholds/thresholds_common.hpp b/cpp/subprojects/common/src/common/thresholds/thresholds_common.hpp deleted file mode 100644 index 9ef27380..00000000 --- a/cpp/subprojects/common/src/common/thresholds/thresholds_common.hpp +++ /dev/null @@ -1,186 +0,0 @@ -/* - * @author Michael Rapp (michael.rapp.ml@gmail.com) - */ -#pragma once - -#include "common/input/feature_info.hpp" -#include "common/input/feature_matrix.hpp" -#include "common/iterator/binary_forward_iterator.hpp" -#include "common/thresholds/thresholds.hpp" -#include "omp.h" - -template -static inline Quality evaluateOutOfSampleInternally(IndexIterator indexIterator, uint32 numExamples, - const WeightVector& weights, const CoverageMask& coverageMask, - const IStatistics& statistics, - const AbstractPrediction& prediction) { - OutOfSampleWeightVector outOfSampleWeights(weights); - std::unique_ptr statisticsSubsetPtr = - prediction.createStatisticsSubset(statistics, outOfSampleWeights); - - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - - if (statisticsSubsetPtr->hasNonZeroWeight(exampleIndex) && coverageMask.isCovered(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - } - - return statisticsSubsetPtr->calculateScores(); -} - -template -static inline Quality evaluateOutOfSampleInternally(const WeightVector& weights, const CoverageSet& coverageSet, - const IStatistics& statistics, - const AbstractPrediction& prediction) { - OutOfSampleWeightVector outOfSampleWeights(weights); - std::unique_ptr statisticsSubsetPtr = - prediction.createStatisticsSubset(statistics, outOfSampleWeights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - - if (statisticsSubsetPtr->hasNonZeroWeight(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - } - - return statisticsSubsetPtr->calculateScores(); -} - -template -static inline Quality evaluateOutOfSampleInternally(const WeightVector& weights, const CoverageSet& coverageSet, - BiPartition& partition, const IStatistics& statistics, - const AbstractPrediction& prediction) { - OutOfSampleWeightVector outOfSampleWeights(weights); - std::unique_ptr statisticsSubsetPtr = - prediction.createStatisticsSubset(statistics, outOfSampleWeights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - partition.sortSecond(); - auto holdoutSetIterator = make_binary_forward_iterator(partition.second_cbegin(), partition.second_cend()); - uint32 previousExampleIndex = 0; - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - std::advance(holdoutSetIterator, exampleIndex - previousExampleIndex); - - if (*holdoutSetIterator && statisticsSubsetPtr->hasNonZeroWeight(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - - previousExampleIndex = exampleIndex; - } - - return statisticsSubsetPtr->calculateScores(); -} - -template -static inline void recalculatePredictionInternally(IndexIterator indexIterator, uint32 numExamples, - const CoverageMask& coverageMask, const IStatistics& statistics, - AbstractPrediction& prediction) { - EqualWeightVector weights(numExamples); - std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); - - for (uint32 i = 0; i < numExamples; i++) { - uint32 exampleIndex = indexIterator[i]; - - if (coverageMask.isCovered(exampleIndex)) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - } - - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - scoreVector.updatePrediction(prediction); -} - -static inline void recalculatePredictionInternally(const CoverageSet& coverageSet, const IStatistics& statistics, - AbstractPrediction& prediction) { - uint32 numStatistics = statistics.getNumStatistics(); - EqualWeightVector weights(numStatistics); - std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - statisticsSubsetPtr->addToSubset(exampleIndex); - } - - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - scoreVector.updatePrediction(prediction); -} - -static inline void recalculatePredictionInternally(const CoverageSet& coverageSet, BiPartition& partition, - const IStatistics& statistics, AbstractPrediction& prediction) { - uint32 numStatistics = statistics.getNumStatistics(); - EqualWeightVector weights(numStatistics); - std::unique_ptr statisticsSubsetPtr = prediction.createStatisticsSubset(statistics, weights); - uint32 numCovered = coverageSet.getNumCovered(); - CoverageSet::const_iterator coverageSetIterator = coverageSet.cbegin(); - partition.sortSecond(); - auto holdoutSetIterator = make_binary_forward_iterator(partition.second_cbegin(), partition.second_cend()); - uint32 previousExampleIndex = 0; - - for (uint32 i = 0; i < numCovered; i++) { - uint32 exampleIndex = coverageSetIterator[i]; - std::advance(holdoutSetIterator, exampleIndex - previousExampleIndex); - - if (*holdoutSetIterator) { - statisticsSubsetPtr->addToSubset(exampleIndex); - } - - previousExampleIndex = exampleIndex; - } - - const IScoreVector& scoreVector = statisticsSubsetPtr->calculateScores(); - scoreVector.updatePrediction(prediction); -} - -/** - * An abstract base class for all classes that provide access to thresholds that may be used by the first condition of a - * rule that currently has an empty body and therefore covers the entire instance space. - */ -class AbstractThresholds : public IThresholds { - protected: - - /** - * A reference to an object of type `IColumnWiseFeatureMatrix` that provides column-wise access to the feature - * values of the training examples. - */ - const IColumnWiseFeatureMatrix& featureMatrix_; - - /** - * A reference to an object of type `IFeatureInfo` that provides information about the types of individual - * features. - */ - const IFeatureInfo& featureInfo_; - - /** - * A reference to an object of type `IStatisticsProvider` that provides access to statistics about the labels of - * the training examples. - */ - IStatisticsProvider& statisticsProvider_; - - public: - - /** - * @param featureMatrix A reference to an object of type `IColumnWiseFeatureMatrix` that provides - * column-wise access to the feature values of individual training examples - * @param featureInfo A reference to an object of type `IFeatureInfo` that provides information about - * the types of individual features - * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides access to - * statistics about the labels of the training examples - */ - AbstractThresholds(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) - : featureMatrix_(featureMatrix), featureInfo_(featureInfo), statisticsProvider_(statisticsProvider) {} - - virtual ~AbstractThresholds() override {}; - - IStatisticsProvider& getStatisticsProvider() const override final { - return statisticsProvider_; - } -}; diff --git a/cpp/subprojects/common/src/common/thresholds/thresholds_exact.cpp b/cpp/subprojects/common/src/common/thresholds/thresholds_exact.cpp deleted file mode 100644 index 141772c6..00000000 --- a/cpp/subprojects/common/src/common/thresholds/thresholds_exact.cpp +++ /dev/null @@ -1,536 +0,0 @@ -#include "common/thresholds/thresholds_exact.hpp" - -#include "common/rule_refinement/rule_refinement_exact.hpp" -#include "thresholds_common.hpp" - -#include - -/** - * An entry that is stored in a cache and contains an unique pointer to a feature vector. The field `numConditions` - * specifies how many conditions the rule contained when the vector was updated for the last time. It may be used to - * check if the vector is still valid or must be updated. - */ -struct FilteredCacheEntry final { - public: - - FilteredCacheEntry() : numConditions(0) {}; - - /** - * An unique pointer to an object of type `FeatureVector` that stores feature values. - */ - std::unique_ptr vectorPtr; - - /** - * The number of conditions that were contained by the rule when the cache was updated for the last time. - */ - uint32 numConditions; -}; - -/** - * Filters a given feature vector, which contains the elements for a certain feature that are covered by the previous - * rule, after a new condition that corresponds to said feature has been added, such that the filtered vector does only - * contain the elements that are covered by the new rule. The filtered vector is stored in a given struct of type - * `FilteredCacheEntry` and the given statistics are updated accordingly. - * - * @param vector A reference to an object of type `FeatureVector` that should be filtered - * @param cacheEntry A reference to a struct of type `FilteredCacheEntry` that should be used to store the - * filtered feature vector - * @param conditionStart The element in `vector` that corresponds to the first statistic (inclusive) that is - * covered by the new condition - * @param conditionEnd The element in `vector` that corresponds to the last statistic (exclusive) that is - * covered by the new condition - * @param conditionComparator The type of the operator that is used by the new condition - * @param covered True, if the elements in range [conditionStart, conditionEnd) are covered by the new - * condition and the remaining ones are not, false, if the elements in said range are not - * covered, but the remaining ones are - * @param numConditions The total number of conditions in the rule's body (including the new one) - * @param coverageMask A reference to an object of type `CoverageMask` that is used to keep track of the - * elements that are covered by the previous rule. It will be updated by this function - * @param statistics A reference to an object of type `IWeightedStatistics` to be notified about the - * statistics that must be considered when searching for the next refinement, i.e., the - * statistics that are covered by the new rule - */ -static inline void filterCurrentVector(const FeatureVector& vector, FilteredCacheEntry& cacheEntry, - int64 conditionStart, int64 conditionEnd, Comparator conditionComparator, - bool covered, uint32 numConditions, CoverageMask& coverageMask, - IWeightedStatistics& statistics) { - // Determine the number of elements in the filtered vector... - uint32 numTotalElements = vector.getNumElements(); - uint32 distance = std::abs(conditionStart - conditionEnd); - uint32 numElements = covered ? distance : (numTotalElements > distance ? numTotalElements - distance : 0); - - // Create a new vector that will contain the filtered elements, if necessary... - FeatureVector* filteredVector = cacheEntry.vectorPtr.get(); - - if (!filteredVector) { - cacheEntry.vectorPtr = std::make_unique(numElements); - filteredVector = cacheEntry.vectorPtr.get(); - } - - typename FeatureVector::const_iterator iterator = vector.cbegin(); - FeatureVector::iterator filteredIterator = filteredVector->begin(); - CoverageMask::iterator coverageMaskIterator = coverageMask.begin(); - - bool descending = conditionEnd < conditionStart; - int64 start, end; - - if (descending) { - start = conditionEnd + 1; - end = conditionStart + 1; - } else { - start = conditionStart; - end = conditionEnd; - } - - if (covered) { - coverageMask.setIndicatorValue(numConditions); - statistics.resetCoveredStatistics(); - uint32 i = 0; - - // Retain the indices at positions [start, end) and set the corresponding values in the given `coverageMask` to - // `numConditions` to mark them as covered... - for (int64 r = start; r < end; r++) { - uint32 index = iterator[r].index; - coverageMaskIterator[index] = numConditions; - filteredIterator[i].index = index; - filteredIterator[i].value = iterator[r].value; - statistics.addCoveredStatistic(index); - i++; - } - } else { - // Discard the indices at positions [start, end) and set the corresponding values in `coverageMask` to - // `numConditions`, which marks them as uncovered... - for (int64 r = start; r < end; r++) { - uint32 index = iterator[r].index; - coverageMaskIterator[index] = numConditions; - statistics.removeCoveredStatistic(index); - } - - if (conditionComparator == NEQ) { - // Retain the indices at positions [currentStart, currentEnd), while leaving the corresponding values in - // `coverageMask` untouched, such that all previously covered examples in said range are still marked - // as covered, while previously uncovered examples are still marked as uncovered... - int64 currentStart, currentEnd; - uint32 i; - - if (descending) { - currentStart = end; - currentEnd = numTotalElements; - i = start; - } else { - currentStart = 0; - currentEnd = start; - i = 0; - } - - for (int64 r = currentStart; r < currentEnd; r++) { - filteredIterator[i].index = iterator[r].index; - filteredIterator[i].value = iterator[r].value; - i++; - } - } - - // Retain the indices at positions [currentStart, currentEnd), while leaving the corresponding values in - // `coverageMask` untouched, such that all previously covered examples in said range are still marked as - // covered, while previously uncovered examples are still marked as uncovered... - int64 currentStart, currentEnd; - uint32 i; - - if (descending) { - currentStart = 0; - currentEnd = start; - i = 0; - } else { - currentStart = end; - currentEnd = numTotalElements; - i = start; - } - - for (int64 r = currentStart; r < currentEnd; r++) { - filteredIterator[i].index = iterator[r].index; - filteredIterator[i].value = iterator[r].value; - i++; - } - - // Iterate the indices of examples with missing feature values and set the corresponding values in - // `coverageMask` to `numConditions`, which marks them as uncovered... - for (auto it = vector.missing_indices_cbegin(); it != vector.missing_indices_cend(); it++) { - uint32 index = *it; - coverageMaskIterator[index] = numConditions; - statistics.removeCoveredStatistic(index); - } - } - - filteredVector->setNumElements(numElements, true); - cacheEntry.numConditions = numConditions; -} - -/** - * Filters a given feature vector, such that the filtered vector does only contain the elements that are covered by the - * current rule. The filtered vector is stored in a given struct of type `FilteredCacheEntry`. - * - * @param vector A reference to an object of type `FeatureVector` that should be filtered - * @param cacheEntry A reference to a struct of type `FilteredCacheEntry` that should be used to store the filtered - * vector - * @param numConditions The total number of conditions in the current rule's body - * @param coverageMask A reference to an object of type `CoverageMask` that is used to keep track of the elements that - * are covered by the current rule - */ -static inline void filterAnyVector(const FeatureVector& vector, FilteredCacheEntry& cacheEntry, uint32 numConditions, - const CoverageMask& coverageMask) { - uint32 maxElements = vector.getNumElements(); - FeatureVector* filteredVector = cacheEntry.vectorPtr.get(); - - if (filteredVector) { - filteredVector->clearMissingIndices(); - } else { - cacheEntry.vectorPtr = std::make_unique(maxElements); - filteredVector = cacheEntry.vectorPtr.get(); - } - - // Filter the missing indices... - for (auto it = vector.missing_indices_cbegin(); it != vector.missing_indices_cend(); it++) { - uint32 index = *it; - - if (coverageMask.isCovered(index)) { - filteredVector->addMissingIndex(index); - } - } - - // Filter the feature values... - typename FeatureVector::const_iterator iterator = vector.cbegin(); - typename FeatureVector::iterator filteredIterator = filteredVector->begin(); - uint32 i = 0; - - for (uint32 r = 0; r < maxElements; r++) { - uint32 index = iterator[r].index; - - if (coverageMask.isCovered(index)) { - filteredIterator[i].index = index; - filteredIterator[i].value = iterator[r].value; - i++; - } - } - - filteredVector->setNumElements(i, true); - cacheEntry.numConditions = numConditions; -} - -/** - * Provides access to all thresholds that result from the feature values of the training examples. - */ -class ExactThresholds final : public AbstractThresholds { - private: - - /** - * Provides access to a subset of the thresholds that are stored by an instance of the class `ExactThresholds`. - * - * @tparam WeightVector The type of the vector that provides access to the weights of individual training - * examples - */ - template - class ThresholdsSubset final : public IThresholdsSubset { - private: - - /** - * A callback that allows to retrieve feature vectors. If available, the feature vectors are retrieved - * from the cache. Otherwise, they are fetched from the feature matrix. - */ - class Callback final : public IRuleRefinementCallback { - private: - - ThresholdsSubset& thresholdsSubset_; - - const uint32 featureIndex_; - - public: - - /** - * @param thresholdsSubset A reference to an object of type `ThresholdsSubset` that caches the - * feature vectors - * @param featureIndex The index of the feature for which the feature vector should be - * retrieved - */ - Callback(ThresholdsSubset& thresholdsSubset, uint32 featureIndex) - : thresholdsSubset_(thresholdsSubset), featureIndex_(featureIndex) {} - - Result get() override { - auto cacheFilteredIterator = thresholdsSubset_.cacheFiltered_.find(featureIndex_); - FilteredCacheEntry& cacheEntry = cacheFilteredIterator->second; - FeatureVector* featureVector = cacheEntry.vectorPtr.get(); - - if (!featureVector) { - auto cacheIterator = thresholdsSubset_.thresholds_.cache_.find(featureIndex_); - featureVector = cacheIterator->second.get(); - - if (!featureVector) { - thresholdsSubset_.thresholds_.featureMatrix_.fetchFeatureVector( - featureIndex_, cacheIterator->second); - cacheIterator->second->sortByValues(); - featureVector = cacheIterator->second.get(); - } - } - - // Filter feature vector, if only a subset of its elements are covered by the current - // rule... - uint32 numConditions = thresholdsSubset_.numModifications_; - - if (numConditions > cacheEntry.numConditions) { - filterAnyVector(*featureVector, cacheEntry, numConditions, - thresholdsSubset_.coverageMask_); - featureVector = cacheEntry.vectorPtr.get(); - } - - return Result(*thresholdsSubset_.weightedStatisticsPtr_, *featureVector); - } - }; - - ExactThresholds& thresholds_; - - std::unique_ptr weightedStatisticsPtr_; - - const WeightVector& weights_; - - uint32 numCoveredExamples_; - - CoverageMask coverageMask_; - - uint32 numModifications_; - - std::unordered_map cacheFiltered_; - - template - std::unique_ptr createExactRuleRefinement(const IndexVector& labelIndices, - uint32 featureIndex) { - // Retrieve the `FilteredCacheEntry` from the cache, or insert a new one if it does not already - // exist... - auto cacheFilteredIterator = cacheFiltered_.emplace(featureIndex, FilteredCacheEntry()).first; - FeatureVector* featureVector = cacheFilteredIterator->second.vectorPtr.get(); - - // If the `FilteredCacheEntry` in the cache does not refer to a `FeatureVector`, add an empty - // `unique_ptr` to the cache... - if (!featureVector) { - thresholds_.cache_.emplace(featureIndex, std::unique_ptr()); - } - - std::unique_ptr featureTypePtr = - thresholds_.featureInfo_.createFeatureType(featureIndex); - bool nominal = featureTypePtr->isNominal(); - std::unique_ptr callbackPtr = std::make_unique(*this, featureIndex); - return std::make_unique>( - labelIndices, numCoveredExamples_, featureIndex, nominal, weights_.hasZeroWeights(), - std::move(callbackPtr)); - } - - public: - - /** - * @param thresholds A reference to an object of type `ExactThresholds` that stores the - * thresholds - * @param weightedStatisticsPtr An unique pointer to an object of type `IWeightedStatistics` that - * provides access to the statistics - * @param weights A reference to an object of template type `WeightVector` that provides - * access to the weights of individual training examples - */ - ThresholdsSubset(ExactThresholds& thresholds, - std::unique_ptr weightedStatisticsPtr, - const WeightVector& weights) - : thresholds_(thresholds), weightedStatisticsPtr_(std::move(weightedStatisticsPtr)), - weights_(weights), numCoveredExamples_(weights.getNumNonZeroWeights()), - coverageMask_(CoverageMask(thresholds.featureMatrix_.getNumRows())), numModifications_(0) {} - - /** - * @param thresholdsSubset A reference to an object of type `ThresholdsSubset` to be copied - */ - ThresholdsSubset(const ThresholdsSubset& thresholdsSubset) - : thresholds_(thresholdsSubset.thresholds_), - weightedStatisticsPtr_(thresholdsSubset.weightedStatisticsPtr_->copy()), - weights_(thresholdsSubset.weights_), numCoveredExamples_(thresholdsSubset.numCoveredExamples_), - coverageMask_(CoverageMask(thresholdsSubset.coverageMask_)), - numModifications_(thresholdsSubset.numModifications_) {} - - std::unique_ptr copy() const override { - return std::make_unique>(*this); - } - - std::unique_ptr createRuleRefinement(const CompleteIndexVector& labelIndices, - uint32 featureIndex) override { - return createExactRuleRefinement(labelIndices, featureIndex); - } - - std::unique_ptr createRuleRefinement(const PartialIndexVector& labelIndices, - uint32 featureIndex) override { - return createExactRuleRefinement(labelIndices, featureIndex); - } - - void filterThresholds(const Condition& condition) override { - numModifications_++; - numCoveredExamples_ = condition.numCovered; - - uint32 featureIndex = condition.featureIndex; - auto cacheFilteredIterator = cacheFiltered_.emplace(featureIndex, FilteredCacheEntry()).first; - FilteredCacheEntry& cacheEntry = cacheFilteredIterator->second; - FeatureVector* featureVector = cacheEntry.vectorPtr.get(); - - if (!featureVector) { - auto cacheIterator = - thresholds_.cache_.emplace(featureIndex, std::unique_ptr()).first; - featureVector = cacheIterator->second.get(); - } - - // Identify the examples that are covered by the condition... - if (numModifications_ > cacheEntry.numConditions) { - filterAnyVector(*featureVector, cacheEntry, numModifications_, coverageMask_); - featureVector = cacheEntry.vectorPtr.get(); - } - - filterCurrentVector(*featureVector, cacheEntry, condition.start, condition.end, - condition.comparator, condition.covered, numModifications_, coverageMask_, - *weightedStatisticsPtr_); - } - - void resetThresholds() override { - numModifications_ = 0; - numCoveredExamples_ = weights_.getNumNonZeroWeights(); - cacheFiltered_.clear(); - coverageMask_.reset(); - } - - const ICoverageState& getCoverageState() const override { - return coverageMask_; - } - - Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageMask& coverageState, - const AbstractPrediction& head) const override { - return evaluateOutOfSampleInternally( - partition.cbegin(), partition.getNumElements(), weights_, coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - Quality evaluateOutOfSample(const BiPartition& partition, const CoverageMask& coverageState, - const AbstractPrediction& head) const override { - return evaluateOutOfSampleInternally( - partition.first_cbegin(), partition.getNumFirst(), weights_, coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - Quality evaluateOutOfSample(const SinglePartition& partition, const CoverageSet& coverageState, - const AbstractPrediction& head) const override { - return evaluateOutOfSampleInternally(weights_, coverageState, thresholds_.statisticsProvider_.get(), - head); - } - - Quality evaluateOutOfSample(BiPartition& partition, const CoverageSet& coverageState, - const AbstractPrediction& head) const override { - return evaluateOutOfSampleInternally(weights_, coverageState, partition, - thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(const SinglePartition& partition, const CoverageMask& coverageState, - AbstractPrediction& head) const override { - recalculatePredictionInternally( - partition.cbegin(), partition.getNumElements(), coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(const BiPartition& partition, const CoverageMask& coverageState, - AbstractPrediction& head) const override { - recalculatePredictionInternally( - partition.first_cbegin(), partition.getNumFirst(), coverageState, - thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(const SinglePartition& partition, const CoverageSet& coverageState, - AbstractPrediction& head) const override { - recalculatePredictionInternally(coverageState, thresholds_.statisticsProvider_.get(), head); - } - - void recalculatePrediction(BiPartition& partition, const CoverageSet& coverageState, - AbstractPrediction& head) const override { - recalculatePredictionInternally(coverageState, partition, thresholds_.statisticsProvider_.get(), - head); - } - - void applyPrediction(const AbstractPrediction& prediction) override { - IStatistics& statistics = thresholds_.statisticsProvider_.get(); - uint32 numStatistics = statistics.getNumStatistics(); - const CoverageMask* coverageMaskPtr = &coverageMask_; - const AbstractPrediction* predictionPtr = &prediction; - IStatistics* statisticsPtr = &statistics; - uint32 numThreads = thresholds_.numThreads_; - -#pragma omp parallel for firstprivate(numStatistics) firstprivate(coverageMaskPtr) firstprivate(predictionPtr) \ - firstprivate(statisticsPtr) schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numStatistics; i++) { - if (coverageMaskPtr->isCovered(i)) { - predictionPtr->apply(*statisticsPtr, i); - } - } - } - - void revertPrediction(const AbstractPrediction& prediction) override { - IStatistics& statistics = thresholds_.statisticsProvider_.get(); - uint32 numStatistics = statistics.getNumStatistics(); - const CoverageMask* coverageMaskPtr = &coverageMask_; - const AbstractPrediction* predictionPtr = &prediction; - IStatistics* statisticsPtr = &statistics; - uint32 numThreads = thresholds_.numThreads_; - -#pragma omp parallel for firstprivate(numStatistics) firstprivate(coverageMaskPtr) firstprivate(predictionPtr) \ - firstprivate(statisticsPtr) schedule(dynamic) num_threads(numThreads) - for (int64 i = 0; i < numStatistics; i++) { - if (coverageMaskPtr->isCovered(i)) { - predictionPtr->revert(*statisticsPtr, i); - } - } - } - }; - - const uint32 numThreads_; - - std::unordered_map> cache_; - - public: - - /** - * @param featureMatrix A reference to an object of type `IColumnWiseFeatureMatrix` that provides - * column-wise access to the feature values of individual training examples - * @param featureInfo A reference to an object of type `IFeatureInfo` that provides information about - * the types of individual features - * @param statisticsProvider A reference to an object of type `IStatisticsProvider` that provides access to - * statistics about the labels of the training examples - * @param numThreads The number of CPU threads to be used to update statistics in parallel - */ - ExactThresholds(const IColumnWiseFeatureMatrix& featureMatrix, const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider, uint32 numThreads) - : AbstractThresholds(featureMatrix, featureInfo, statisticsProvider), numThreads_(numThreads) {} - - std::unique_ptr createSubset(const EqualWeightVector& weights) override { - IStatistics& statistics = statisticsProvider_.get(); - std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>( - *this, std::move(weightedStatisticsPtr), weights); - } - - std::unique_ptr createSubset(const BitWeightVector& weights) override { - IStatistics& statistics = statisticsProvider_.get(); - std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>( - *this, std::move(weightedStatisticsPtr), weights); - } - - std::unique_ptr createSubset(const DenseWeightVector& weights) override { - IStatistics& statistics = statisticsProvider_.get(); - std::unique_ptr weightedStatisticsPtr = statistics.createWeightedStatistics(weights); - return std::make_unique>>( - *this, std::move(weightedStatisticsPtr), weights); - } -}; - -ExactThresholdsFactory::ExactThresholdsFactory(uint32 numThreads) : numThreads_(numThreads) {} - -std::unique_ptr ExactThresholdsFactory::create(const IColumnWiseFeatureMatrix& featureMatrix, - const IFeatureInfo& featureInfo, - IStatisticsProvider& statisticsProvider) const { - return std::make_unique(featureMatrix, featureInfo, statisticsProvider, numThreads_); -} diff --git a/doc/Doxyfile_boosting b/doc/Doxyfile_boosting deleted file mode 100644 index 9aac1eb0..00000000 --- a/doc/Doxyfile_boosting +++ /dev/null @@ -1,2770 +0,0 @@ -# Doxyfile 1.9.7 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project. -# -# All text after a double hash (##) is considered a comment and is placed in -# front of the TAG it is preceding. -# -# All text after a single hash (#) is considered a comment and will be ignored. -# The format is: -# TAG = value [value, ...] -# For lists, items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (\" \"). -# -# Note: -# -# Use doxygen to compare the used configuration file with the template -# configuration file: -# doxygen -x [configFile] -# Use doxygen to compare the used configuration file with the template -# configuration file without replacing the environment variables or CMake type -# replacement variables: -# doxygen -x_noenv [configFile] - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the configuration -# file that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# https://www.gnu.org/software/libiconv/ for the list of possible encodings. -# The default value is: UTF-8. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by -# double-quotes, unless you are using Doxywizard) that should identify the -# project for which the documentation is generated. This name is used in the -# title of most generated pages and in a few other places. -# The default value is: My Project. - -PROJECT_NAME = mlrlboosting - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. This -# could be handy for archiving the generated documentation or if some version -# control system is used. - -PROJECT_NUMBER = $(PROJECT_NUMBER) - -# Using the PROJECT_BRIEF tag one can provide an optional one line description -# for a project that appears at the top of each page and should give viewer a -# quick idea about the purpose of the project. Keep the description short. - -PROJECT_BRIEF = "An Algorithm for Learning Gradient Boosted Multi-label Classification Rules" - -# With the PROJECT_LOGO tag one can specify a logo or an icon that is included -# in the documentation. The maximum height of the logo should not exceed 55 -# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy -# the logo to the output directory. - -PROJECT_LOGO = - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path -# into which the generated documentation will be written. If a relative path is -# entered, it will be relative to the location where doxygen was started. If -# left blank the current directory will be used. - -OUTPUT_DIRECTORY = apidoc/api/cpp/boosting/ - -# If the CREATE_SUBDIRS tag is set to YES then doxygen will create up to 4096 -# sub-directories (in 2 levels) under the output directory of each output format -# and will distribute the generated files over these directories. Enabling this -# option can be useful when feeding doxygen a huge amount of source files, where -# putting all generated files in the same directory would otherwise causes -# performance problems for the file system. Adapt CREATE_SUBDIRS_LEVEL to -# control the number of sub-directories. -# The default value is: NO. - -CREATE_SUBDIRS = NO - -# Controls the number of sub-directories that will be created when -# CREATE_SUBDIRS tag is set to YES. Level 0 represents 16 directories, and every -# level increment doubles the number of directories, resulting in 4096 -# directories at level 8 which is the default and also the maximum value. The -# sub-directories are organized in 2 levels, the first level always has a fixed -# number of 16 directories. -# Minimum value: 0, maximum value: 8, default value: 8. -# This tag requires that the tag CREATE_SUBDIRS is set to YES. - -CREATE_SUBDIRS_LEVEL = 8 - -# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII -# characters to appear in the names of generated files. If set to NO, non-ASCII -# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode -# U+3044. -# The default value is: NO. - -ALLOW_UNICODE_NAMES = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Bulgarian, -# Catalan, Chinese, Chinese-Traditional, Croatian, Czech, Danish, Dutch, English -# (United States), Esperanto, Farsi (Persian), Finnish, French, German, Greek, -# Hindi, Hungarian, Indonesian, Italian, Japanese, Japanese-en (Japanese with -# English messages), Korean, Korean-en (Korean with English messages), Latvian, -# Lithuanian, Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, -# Romanian, Russian, Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, -# Swedish, Turkish, Ukrainian and Vietnamese. -# The default value is: English. - -OUTPUT_LANGUAGE = English - -# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member -# descriptions after the members that are listed in the file and class -# documentation (similar to Javadoc). Set to NO to disable this. -# The default value is: YES. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief -# description of a member or function before the detailed description -# -# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. -# The default value is: YES. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator that is -# used to form the text in various listings. Each string in this list, if found -# as the leading text of the brief description, will be stripped from the text -# and the result, after processing the whole list, is used as the annotated -# text. Otherwise, the brief description is used as-is. If left blank, the -# following values are used ($name is automatically replaced with the name of -# the entity):The $name class, The $name widget, The $name file, is, provides, -# specifies, contains, represents, a, an and the. - -ABBREVIATE_BRIEF = "The $name class" \ - "The $name widget" \ - "The $name file" \ - is \ - provides \ - specifies \ - contains \ - represents \ - a \ - an \ - the - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# doxygen will generate a detailed section even if there is only a brief -# description. -# The default value is: NO. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. -# The default value is: NO. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path -# before files name in the file list and in the header files. If set to NO the -# shortest path that makes the file name unique will be used -# The default value is: YES. - -FULL_PATH_NAMES = NO - -# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path. -# Stripping is only done if one of the specified strings matches the left-hand -# part of the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the path to -# strip. -# -# Note that you can specify absolute paths here, but also relative paths, which -# will be relative from the directory where doxygen is started. -# This tag requires that the tag FULL_PATH_NAMES is set to YES. - -STRIP_FROM_PATH = - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the -# path mentioned in the documentation of a class, which tells the reader which -# header file to include in order to use a class. If left blank only the name of -# the header file containing the class definition is used. Otherwise one should -# specify the list of include paths that are normally passed to the compiler -# using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but -# less readable) file names. This can be useful is your file systems doesn't -# support long names like on DOS, Mac, or CD-ROM. -# The default value is: NO. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the -# first line (until the first dot) of a Javadoc-style comment as the brief -# description. If set to NO, the Javadoc-style will behave just like regular Qt- -# style comments (thus requiring an explicit @brief command for a brief -# description.) -# The default value is: NO. - -JAVADOC_AUTOBRIEF = NO - -# If the JAVADOC_BANNER tag is set to YES then doxygen will interpret a line -# such as -# /*************** -# as being the beginning of a Javadoc-style comment "banner". If set to NO, the -# Javadoc-style will behave just like regular comments and it will not be -# interpreted by doxygen. -# The default value is: NO. - -JAVADOC_BANNER = NO - -# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first -# line (until the first dot) of a Qt-style comment as the brief description. If -# set to NO, the Qt-style will behave just like regular Qt-style comments (thus -# requiring an explicit \brief command for a brief description.) -# The default value is: NO. - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a -# multi-line C++ special comment block (i.e. a block of //! or /// comments) as -# a brief description. This used to be the default behavior. The new default is -# to treat a multi-line C++ comment block as a detailed description. Set this -# tag to YES if you prefer the old behavior instead. -# -# Note that setting this tag to YES also means that rational rose comments are -# not recognized any more. -# The default value is: NO. - -MULTILINE_CPP_IS_BRIEF = NO - -# By default Python docstrings are displayed as preformatted text and doxygen's -# special commands cannot be used. By setting PYTHON_DOCSTRING to NO the -# doxygen's special commands can be used and the contents of the docstring -# documentation blocks is shown as doxygen documentation. -# The default value is: YES. - -PYTHON_DOCSTRING = YES - -# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the -# documentation from any documented member that it re-implements. -# The default value is: YES. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new -# page for each member. If set to NO, the documentation of a member will be part -# of the file/class/namespace that contains it. -# The default value is: NO. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen -# uses this value to replace tabs by spaces in code fragments. -# Minimum value: 1, maximum value: 16, default value: 4. - -TAB_SIZE = 4 - -# This tag can be used to specify a number of aliases that act as commands in -# the documentation. An alias has the form: -# name=value -# For example adding -# "sideeffect=@par Side Effects:^^" -# will allow you to put the command \sideeffect (or @sideeffect) in the -# documentation, which will result in a user-defined paragraph with heading -# "Side Effects:". Note that you cannot put \n's in the value part of an alias -# to insert newlines (in the resulting output). You can put ^^ in the value part -# of an alias to insert a newline as if a physical newline was in the original -# file. When you need a literal { or } or , in the value part of an alias you -# have to escape them by means of a backslash (\), this can lead to conflicts -# with the commands \{ and \} for these it is advised to use the version @{ and -# @} or use a double escape (\\{ and \\}) - -ALIASES = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources -# only. Doxygen will then generate output that is more tailored for C. For -# instance, some of the names that are used will be different. The list of all -# members will be omitted, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or -# Python sources only. Doxygen will then generate output that is more tailored -# for that language. For instance, namespaces will be presented as packages, -# qualified scopes will look different, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources. Doxygen will then generate output that is tailored for Fortran. -# The default value is: NO. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for VHDL. -# The default value is: NO. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Set the OPTIMIZE_OUTPUT_SLICE tag to YES if your project consists of Slice -# sources only. Doxygen will then generate output that is more tailored for that -# language. For instance, namespaces will be presented as modules, types will be -# separated into more groups, etc. -# The default value is: NO. - -OPTIMIZE_OUTPUT_SLICE = NO - -# Doxygen selects the parser to use depending on the extension of the files it -# parses. With this tag you can assign which parser to use for a given -# extension. Doxygen has a built-in mapping, but you can override or extend it -# using this tag. The format is ext=language, where ext is a file extension, and -# language is one of the parsers supported by doxygen: IDL, Java, JavaScript, -# Csharp (C#), C, C++, Lex, D, PHP, md (Markdown), Objective-C, Python, Slice, -# VHDL, Fortran (fixed format Fortran: FortranFixed, free formatted Fortran: -# FortranFree, unknown formatted Fortran: Fortran. In the later case the parser -# tries to guess whether the code is fixed or free formatted code, this is the -# default for Fortran type files). For instance to make doxygen treat .inc files -# as Fortran files (default is PHP), and .f files as C (default is Fortran), -# use: inc=Fortran f=C. -# -# Note: For files without extension you can use no_extension as a placeholder. -# -# Note that for custom extensions you also need to set FILE_PATTERNS otherwise -# the files are not read by doxygen. When specifying no_extension you should add -# * to the FILE_PATTERNS. -# -# Note see also the list of default file extension mappings. - -EXTENSION_MAPPING = - -# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments -# according to the Markdown format, which allows for more readable -# documentation. See https://daringfireball.net/projects/markdown/ for details. -# The output of markdown processing is further processed by doxygen, so you can -# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in -# case of backward compatibilities issues. -# The default value is: YES. - -MARKDOWN_SUPPORT = YES - -# When the TOC_INCLUDE_HEADINGS tag is set to a non-zero value, all headings up -# to that level are automatically included in the table of contents, even if -# they do not have an id attribute. -# Note: This feature currently applies only to Markdown headings. -# Minimum value: 0, maximum value: 99, default value: 5. -# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. - -TOC_INCLUDE_HEADINGS = 5 - -# The MARKDOWN_ID_STYLE tag can be used to specify the algorithm used to -# generate identifiers for the Markdown headings. Note: Every identifier is -# unique. -# Possible values are: DOXYGEN Use a fixed 'autotoc_md' string followed by a -# sequence number starting at 0. and GITHUB Use the lower case version of title -# with any whitespace replaced by '-' and punctations characters removed.. -# The default value is: DOXYGEN. -# This tag requires that the tag MARKDOWN_SUPPORT is set to YES. - -MARKDOWN_ID_STYLE = DOXYGEN - -# When enabled doxygen tries to link words that correspond to documented -# classes, or namespaces to their corresponding documentation. Such a link can -# be prevented in individual cases by putting a % sign in front of the word or -# globally by setting AUTOLINK_SUPPORT to NO. -# The default value is: YES. - -AUTOLINK_SUPPORT = YES - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should set this -# tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); -# versus func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. -# The default value is: NO. - -BUILTIN_STL_SUPPORT = NO - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. -# The default value is: NO. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip (see: -# https://www.riverbankcomputing.com/software/sip/intro) sources only. Doxygen -# will parse them like normal C++ but will assume all classes use public instead -# of private inheritance when no explicit protection keyword is present. -# The default value is: NO. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate -# getter and setter methods for a property. Setting this option to YES will make -# doxygen to replace the get and set methods by a property in the documentation. -# This will only work if the methods are indeed getting or setting a simple -# type. If this is not the case, or you want to show the methods anyway, you -# should set this option to NO. -# The default value is: YES. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. -# The default value is: NO. - -DISTRIBUTE_GROUP_DOC = NO - -# If one adds a struct or class to a group and this option is enabled, then also -# any nested class or struct is added to the same group. By default this option -# is disabled and one has to add nested compounds explicitly via \ingroup. -# The default value is: NO. - -GROUP_NESTED_COMPOUNDS = NO - -# Set the SUBGROUPING tag to YES to allow class member groups of the same type -# (for instance a group of public functions) to be put as a subgroup of that -# type (e.g. under the Public Functions section). Set it to NO to prevent -# subgrouping. Alternatively, this can be done per class using the -# \nosubgrouping command. -# The default value is: YES. - -SUBGROUPING = YES - -# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions -# are shown inside the group in which they are included (e.g. using \ingroup) -# instead of on a separate page (for HTML and Man pages) or section (for LaTeX -# and RTF). -# -# Note that this feature does not work in combination with -# SEPARATE_MEMBER_PAGES. -# The default value is: NO. - -INLINE_GROUPED_CLASSES = NO - -# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions -# with only public data fields or simple typedef fields will be shown inline in -# the documentation of the scope in which they are defined (i.e. file, -# namespace, or group documentation), provided this scope is documented. If set -# to NO, structs, classes, and unions are shown on a separate page (for HTML and -# Man pages) or section (for LaTeX and RTF). -# The default value is: NO. - -INLINE_SIMPLE_STRUCTS = NO - -# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or -# enum is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically be -# useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. -# The default value is: NO. - -TYPEDEF_HIDES_STRUCT = NO - -# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This -# cache is used to resolve symbols given their name and scope. Since this can be -# an expensive process and often the same symbol appears multiple times in the -# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small -# doxygen will become slower. If the cache is too large, memory is wasted. The -# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range -# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536 -# symbols. At the end of a run doxygen will report the cache usage and suggest -# the optimal cache size from a speed point of view. -# Minimum value: 0, maximum value: 9, default value: 0. - -LOOKUP_CACHE_SIZE = 0 - -# The NUM_PROC_THREADS specifies the number of threads doxygen is allowed to use -# during processing. When set to 0 doxygen will based this on the number of -# cores available in the system. You can set it explicitly to a value larger -# than 0 to get more control over the balance between CPU load and processing -# speed. At this moment only the input processing can be done using multiple -# threads. Since this is still an experimental feature the default is set to 1, -# which effectively disables parallel processing. Please report any issues you -# encounter. Generating dot graphs in parallel is controlled by the -# DOT_NUM_THREADS setting. -# Minimum value: 0, maximum value: 32, default value: 1. - -NUM_PROC_THREADS = 1 - -# If the TIMESTAMP tag is set different from NO then each generated page will -# contain the date or date and time when the page was generated. Setting this to -# NO can help when comparing the output of multiple runs. -# Possible values are: YES, NO, DATETIME and DATE. -# The default value is: NO. - -TIMESTAMP = NO - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in -# documentation are documented, even if no documentation was available. Private -# class members and static file members will be hidden unless the -# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES. -# Note: This will also disable the warnings about undocumented members that are -# normally produced when WARNINGS is set to YES. -# The default value is: NO. - -EXTRACT_ALL = NO - -# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will -# be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIVATE = NO - -# If the EXTRACT_PRIV_VIRTUAL tag is set to YES, documented private virtual -# methods of a class will be included in the documentation. -# The default value is: NO. - -EXTRACT_PRIV_VIRTUAL = NO - -# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal -# scope will be included in the documentation. -# The default value is: NO. - -EXTRACT_PACKAGE = NO - -# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be -# included in the documentation. -# The default value is: NO. - -EXTRACT_STATIC = NO - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined -# locally in source files will be included in the documentation. If set to NO, -# only classes defined in header files are included. Does not have any effect -# for Java sources. -# The default value is: YES. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. If set to YES, local methods, -# which are defined in the implementation section but not in the interface are -# included in the documentation. If set to NO, only methods in the interface are -# included. -# The default value is: NO. - -EXTRACT_LOCAL_METHODS = NO - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base name of -# the file that contains the anonymous namespace. By default anonymous namespace -# are hidden. -# The default value is: NO. - -EXTRACT_ANON_NSPACES = NO - -# If this flag is set to YES, the name of an unnamed parameter in a declaration -# will be determined by the corresponding definition. By default unnamed -# parameters remain unnamed in the output. -# The default value is: YES. - -RESOLVE_UNNAMED_PARAMS = YES - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all -# undocumented members inside documented classes or files. If set to NO these -# members will be included in the various overviews, but no documentation -# section is generated. This option has no effect if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. If set -# to NO, these classes will be included in the various overviews. This option -# will also hide undocumented C++ concepts if enabled. This option has no effect -# if EXTRACT_ALL is enabled. -# The default value is: NO. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend -# declarations. If set to NO, these declarations will be included in the -# documentation. -# The default value is: NO. - -HIDE_FRIEND_COMPOUNDS = NO - -# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any -# documentation blocks found inside the body of a function. If set to NO, these -# blocks will be appended to the function's detailed documentation block. -# The default value is: NO. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation that is typed after a -# \internal command is included. If the tag is set to NO then the documentation -# will be excluded. Set it to YES to include the internal documentation. -# The default value is: NO. - -INTERNAL_DOCS = NO - -# With the correct setting of option CASE_SENSE_NAMES doxygen will better be -# able to match the capabilities of the underlying filesystem. In case the -# filesystem is case sensitive (i.e. it supports files in the same directory -# whose names only differ in casing), the option must be set to YES to properly -# deal with such files in case they appear in the input. For filesystems that -# are not case sensitive the option should be set to NO to properly deal with -# output files written for symbols that only differ in casing, such as for two -# classes, one named CLASS and the other named Class, and to also support -# references to files without having to specify the exact matching casing. On -# Windows (including Cygwin) and MacOS, users should typically set this option -# to NO, whereas on Linux or other Unix flavors it should typically be set to -# YES. -# Possible values are: SYSTEM, NO and YES. -# The default value is: SYSTEM. - -CASE_SENSE_NAMES = YES - -# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with -# their full class and namespace scopes in the documentation. If set to YES, the -# scope will be hidden. -# The default value is: NO. - -HIDE_SCOPE_NAMES = NO - -# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will -# append additional text to a page's title, such as Class Reference. If set to -# YES the compound reference will be hidden. -# The default value is: NO. - -HIDE_COMPOUND_REFERENCE= NO - -# If the SHOW_HEADERFILE tag is set to YES then the documentation for a class -# will show which file needs to be included to use the class. -# The default value is: YES. - -SHOW_HEADERFILE = YES - -# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of -# the files that are included by a file in the documentation of that file. -# The default value is: YES. - -SHOW_INCLUDE_FILES = YES - -# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each -# grouped member an include statement to the documentation, telling the reader -# which file to include in order to use the member. -# The default value is: NO. - -SHOW_GROUPED_MEMB_INC = NO - -# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include -# files with double quotes in the documentation rather than with sharp brackets. -# The default value is: NO. - -FORCE_LOCAL_INCLUDES = NO - -# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the -# documentation for inline members. -# The default value is: YES. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the -# (detailed) documentation of file and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. -# The default value is: YES. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief -# descriptions of file, namespace and class members alphabetically by member -# name. If set to NO, the members will appear in declaration order. Note that -# this will also influence the order of the classes in the class list. -# The default value is: NO. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the -# (brief and detailed) documentation of class members so that constructors and -# destructors are listed first. If set to NO the constructors will appear in the -# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS. -# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief -# member documentation. -# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting -# detailed member documentation. -# The default value is: NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy -# of group names into alphabetical order. If set to NO the group names will -# appear in their defined order. -# The default value is: NO. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by -# fully-qualified names, including namespaces. If set to NO, the class list will -# be sorted only by class name, not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the alphabetical -# list. -# The default value is: NO. - -SORT_BY_SCOPE_NAME = NO - -# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper -# type resolution of all parameters of a function it will reject a match between -# the prototype and the implementation of a member function even if there is -# only one candidate or it is obvious which candidate to choose by doing a -# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still -# accept a match between prototype and implementation in such cases. -# The default value is: NO. - -STRICT_PROTO_MATCHING = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo -# list. This list is created by putting \todo commands in the documentation. -# The default value is: YES. - -GENERATE_TODOLIST = YES - -# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test -# list. This list is created by putting \test commands in the documentation. -# The default value is: YES. - -GENERATE_TESTLIST = YES - -# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug -# list. This list is created by putting \bug commands in the documentation. -# The default value is: YES. - -GENERATE_BUGLIST = YES - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO) -# the deprecated list. This list is created by putting \deprecated commands in -# the documentation. -# The default value is: YES. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional documentation -# sections, marked by \if ... \endif and \cond -# ... \endcond blocks. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines that the -# initial value of a variable or macro / define can have for it to appear in the -# documentation. If the initializer consists of more lines than specified here -# it will be hidden. Use a value of 0 to hide initializers completely. The -# appearance of the value of individual variables and macros / defines can be -# controlled using \showinitializer or \hideinitializer command in the -# documentation regardless of this setting. -# Minimum value: 0, maximum value: 10000, default value: 30. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated at -# the bottom of the documentation of classes and structs. If set to YES, the -# list will mention the files that were used to generate the documentation. -# The default value is: YES. - -SHOW_USED_FILES = YES - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. This -# will remove the Files entry from the Quick Index and from the Folder Tree View -# (if specified). -# The default value is: YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the Namespaces -# page. This will remove the Namespaces entry from the Quick Index and from the -# Folder Tree View (if specified). -# The default value is: YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command command input-file, where command is the value of the -# FILE_VERSION_FILTER tag, and input-file is the name of an input file provided -# by doxygen. Whatever the program writes to standard output is used as the file -# version. For an example see the documentation. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed -# by doxygen. The layout file controls the global structure of the generated -# output files in an output format independent way. To create the layout file -# that represents doxygen's defaults, run doxygen with the -l option. You can -# optionally specify a file name after the option, if omitted DoxygenLayout.xml -# will be used as the name of the layout file. See also section "Changing the -# layout of pages" for information. -# -# Note that if you run doxygen from a directory containing a file called -# DoxygenLayout.xml, doxygen will parse it automatically even if the LAYOUT_FILE -# tag is left empty. - -LAYOUT_FILE = - -# The CITE_BIB_FILES tag can be used to specify one or more bib files containing -# the reference definitions. This must be a list of .bib files. The .bib -# extension is automatically appended if omitted. This requires the bibtex tool -# to be installed. See also https://en.wikipedia.org/wiki/BibTeX for more info. -# For LaTeX the style of the bibliography can be controlled using -# LATEX_BIB_STYLE. To use this feature you need bibtex and perl available in the -# search path. See also \cite for info how to create references. - -CITE_BIB_FILES = - -#--------------------------------------------------------------------------- -# Configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated to -# standard output by doxygen. If QUIET is set to YES this implies that the -# messages are off. -# The default value is: NO. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated to standard error (stderr) by doxygen. If WARNINGS is set to YES -# this implies that the warnings are on. -# -# Tip: Turn warnings on while writing the documentation. -# The default value is: YES. - -WARNINGS = YES - -# If the WARN_IF_UNDOCUMENTED tag is set to YES then doxygen will generate -# warnings for undocumented members. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: YES. - -WARN_IF_UNDOCUMENTED = YES - -# If the WARN_IF_DOC_ERROR tag is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as documenting some parameters in -# a documented function twice, or documenting parameters that don't exist or -# using markup commands wrongly. -# The default value is: YES. - -WARN_IF_DOC_ERROR = YES - -# If WARN_IF_INCOMPLETE_DOC is set to YES, doxygen will warn about incomplete -# function parameter documentation. If set to NO, doxygen will accept that some -# parameters have no documentation without warning. -# The default value is: YES. - -WARN_IF_INCOMPLETE_DOC = YES - -# This WARN_NO_PARAMDOC option can be enabled to get warnings for functions that -# are documented, but have no documentation for their parameters or return -# value. If set to NO, doxygen will only warn about wrong parameter -# documentation, but not about the absence of documentation. If EXTRACT_ALL is -# set to YES then this flag will automatically be disabled. See also -# WARN_IF_INCOMPLETE_DOC -# The default value is: NO. - -WARN_NO_PARAMDOC = NO - -# If WARN_IF_UNDOC_ENUM_VAL option is set to YES, doxygen will warn about -# undocumented enumeration values. If set to NO, doxygen will accept -# undocumented enumeration values. If EXTRACT_ALL is set to YES then this flag -# will automatically be disabled. -# The default value is: NO. - -WARN_IF_UNDOC_ENUM_VAL = NO - -# If the WARN_AS_ERROR tag is set to YES then doxygen will immediately stop when -# a warning is encountered. If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS -# then doxygen will continue running as if WARN_AS_ERROR tag is set to NO, but -# at the end of the doxygen process doxygen will return with a non-zero status. -# If the WARN_AS_ERROR tag is set to FAIL_ON_WARNINGS_PRINT then doxygen behaves -# like FAIL_ON_WARNINGS but in case no WARN_LOGFILE is defined doxygen will not -# write the warning messages in between other messages but write them at the end -# of a run, in case a WARN_LOGFILE is defined the warning messages will be -# besides being in the defined file also be shown at the end of a run, unless -# the WARN_LOGFILE is defined as - i.e. standard output (stdout) in that case -# the behavior will remain as with the setting FAIL_ON_WARNINGS. -# Possible values are: NO, YES, FAIL_ON_WARNINGS and FAIL_ON_WARNINGS_PRINT. -# The default value is: NO. - -WARN_AS_ERROR = YES - -# The WARN_FORMAT tag determines the format of the warning messages that doxygen -# can produce. The string should contain the $file, $line, and $text tags, which -# will be replaced by the file and line number from which the warning originated -# and the warning text. Optionally the format may contain $version, which will -# be replaced by the version of the file (if it could be obtained via -# FILE_VERSION_FILTER) -# See also: WARN_LINE_FORMAT -# The default value is: $file:$line: $text. - -WARN_FORMAT = "$file:$line: $text" - -# In the $text part of the WARN_FORMAT command it is possible that a reference -# to a more specific place is given. To make it easier to jump to this place -# (outside of doxygen) the user can define a custom "cut" / "paste" string. -# Example: -# WARN_LINE_FORMAT = "'vi $file +$line'" -# See also: WARN_FORMAT -# The default value is: at line $line of file $file. - -WARN_LINE_FORMAT = "at line $line of file $file" - -# The WARN_LOGFILE tag can be used to specify a file to which warning and error -# messages should be written. If left blank the output is written to standard -# error (stderr). In case the file specified cannot be opened for writing the -# warning and error messages are written to standard error. When as file - is -# specified the warning and error messages are written to standard output -# (stdout). - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# Configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag is used to specify the files and/or directories that contain -# documented source files. You may enter file names like myfile.cpp or -# directories like /usr/src/myproject. Separate the files or directories with -# spaces. See also FILE_PATTERNS and EXTENSION_MAPPING -# Note: If this tag is empty the current directory is searched. - -INPUT = ../cpp/subprojects/boosting/ - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding. Doxygen uses -# libiconv (or the iconv built into libc) for the transcoding. See the libiconv -# documentation (see: -# https://www.gnu.org/software/libiconv/) for the list of possible encodings. -# See also: INPUT_FILE_ENCODING -# The default value is: UTF-8. - -INPUT_ENCODING = UTF-8 - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses The INPUT_FILE_ENCODING tag can be used to specify -# character encoding on a per file pattern basis. Doxygen will compare the file -# name with each pattern and apply the encoding instead of the default -# INPUT_ENCODING) if there is a match. The character encodings are a list of the -# form: pattern=encoding (like *.php=ISO-8859-1). See cfg_input_encoding -# "INPUT_ENCODING" for further information on supported encodings. - -INPUT_FILE_ENCODING = - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard patterns (like *.cpp and -# *.h) to filter out the source-files in the directories. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# read by doxygen. -# -# Note the list of default checked file patterns might differ from the list of -# default file extension mappings. -# -# If left blank the following patterns are tested:*.c, *.cc, *.cxx, *.cpp, -# *.c++, *.java, *.ii, *.ixx, *.ipp, *.i++, *.inl, *.idl, *.ddl, *.odl, *.h, -# *.hh, *.hxx, *.hpp, *.h++, *.l, *.cs, *.d, *.php, *.php4, *.php5, *.phtml, -# *.inc, *.m, *.markdown, *.md, *.mm, *.dox (to be provided as doxygen C -# comment), *.py, *.pyw, *.f90, *.f95, *.f03, *.f08, *.f18, *.f, *.for, *.vhd, -# *.vhdl, *.ucf, *.qsf and *.ice. - -FILE_PATTERNS = *.c \ - *.cc \ - *.cxx \ - *.cpp \ - *.c++ \ - *.java \ - *.ii \ - *.ixx \ - *.ipp \ - *.i++ \ - *.inl \ - *.idl \ - *.ddl \ - *.odl \ - *.h \ - *.hh \ - *.hxx \ - *.hpp \ - *.h++ \ - *.cs \ - *.d \ - *.php \ - *.php4 \ - *.php5 \ - *.phtml \ - *.inc \ - *.m \ - *.markdown \ - *.md \ - *.mm \ - *.dox \ - *.py \ - *.pyw \ - *.f90 \ - *.f95 \ - *.f03 \ - *.f08 \ - *.f18 \ - *.f \ - *.for \ - *.vhd \ - *.vhdl \ - *.ucf \ - *.qsf \ - *.ice - -# The RECURSIVE tag can be used to specify whether or not subdirectories should -# be searched for input files as well. -# The default value is: NO. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should be -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. -# -# Note that relative paths are relative to the directory from which doxygen is -# run. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used to select whether or not files or -# directories that are symbolic links (a Unix file system feature) are excluded -# from the input. -# The default value is: NO. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. -# -# Note that the wildcards are matched against the file with absolute path, so to -# exclude all test directories for example use the pattern */test/* - -EXCLUDE_PATTERNS = */seco/* - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# ANamespace::AClass, ANamespace::*Test - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or directories -# that contain example code fragments that are included (see the \include -# command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp and -# *.h) to filter out the source-files in the directories. If left blank all -# files are included. - -EXAMPLE_PATTERNS = * - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude commands -# irrespective of the value of the RECURSIVE tag. -# The default value is: NO. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or directories -# that contain images that are to be included in the documentation (see the -# \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command: -# -# -# -# where is the value of the INPUT_FILTER tag, and is the -# name of an input file. Doxygen will then use the output that the filter -# program writes to standard output. If FILTER_PATTERNS is specified, this tag -# will be ignored. -# -# Note that the filter must not add or remove lines; it is applied before the -# code is scanned, but not when the output code is generated. If lines are added -# or removed, the anchors will not be placed correctly. -# -# Note that doxygen will use the data processed and written to standard output -# for further processing, therefore nothing else, like debug statements or used -# commands (so in case of a Windows batch file always use @echo OFF), should be -# written to standard output. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: pattern=filter -# (like *.cpp=my_cpp_filter). See INPUT_FILTER for further information on how -# filters are used. If the FILTER_PATTERNS tag is empty or if none of the -# patterns match the file name, INPUT_FILTER is applied. -# -# Note that for custom extensions or not directly supported extensions you also -# need to set EXTENSION_MAPPING for the extension otherwise the files are not -# properly processed by doxygen. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will also be used to filter the input files that are used for -# producing the source files to browse (i.e. when SOURCE_BROWSER is set to YES). -# The default value is: NO. - -FILTER_SOURCE_FILES = NO - -# The FILTER_SOURCE_PATTERNS tag can be used to specify source filters per file -# pattern. A pattern will override the setting for FILTER_PATTERN (if any) and -# it is also possible to disable source filtering for a specific pattern using -# *.ext= (so without naming a filter). -# This tag requires that the tag FILTER_SOURCE_FILES is set to YES. - -FILTER_SOURCE_PATTERNS = - -# If the USE_MDFILE_AS_MAINPAGE tag refers to the name of a markdown file that -# is part of the input, its contents will be placed on the main page -# (index.html). This can be useful if you have a project on for instance GitHub -# and want to reuse the introduction page also for the doxygen output. - -USE_MDFILE_AS_MAINPAGE = - -# The Fortran standard specifies that for fixed formatted Fortran code all -# characters from position 72 are to be considered as comment. A common -# extension is to allow longer lines before the automatic comment starts. The -# setting FORTRAN_COMMENT_AFTER will also make it possible that longer lines can -# be processed before the automatic comment starts. -# Minimum value: 7, maximum value: 10000, default value: 72. - -FORTRAN_COMMENT_AFTER = 72 - -#--------------------------------------------------------------------------- -# Configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will be -# generated. Documented entities will be cross-referenced with these sources. -# -# Note: To get rid of all source code in the generated output, make sure that -# also VERBATIM_HEADERS is set to NO. -# The default value is: NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body of functions, -# classes and enums directly into the documentation. -# The default value is: NO. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES will instruct doxygen to hide any -# special comment blocks from generated source code fragments. Normal C, C++ and -# Fortran comments will always remain visible. -# The default value is: YES. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES then for each documented -# entity all documented functions referencing it will be listed. -# The default value is: NO. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES then for each documented function -# all documented entities called/used by that function will be listed. -# The default value is: NO. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES and SOURCE_BROWSER tag is set -# to YES then the hyperlinks from functions in REFERENCES_RELATION and -# REFERENCED_BY_RELATION lists will link to the source code. Otherwise they will -# link to the documentation. -# The default value is: YES. - -REFERENCES_LINK_SOURCE = YES - -# If SOURCE_TOOLTIPS is enabled (the default) then hovering a hyperlink in the -# source code will show a tooltip with additional information such as prototype, -# brief description and links to the definition and documentation. Since this -# will make the HTML file larger and loading of large files a bit slower, you -# can opt to disable this feature. -# The default value is: YES. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -SOURCE_TOOLTIPS = YES - -# If the USE_HTAGS tag is set to YES then the references to source code will -# point to the HTML generated by the htags(1) tool instead of doxygen built-in -# source browser. The htags tool is part of GNU's global source tagging system -# (see https://www.gnu.org/software/global/global.html). You will need version -# 4.8.6 or higher. -# -# To use it do the following: -# - Install the latest version of global -# - Enable SOURCE_BROWSER and USE_HTAGS in the configuration file -# - Make sure the INPUT points to the root of the source tree -# - Run doxygen as normal -# -# Doxygen will invoke htags (and that will in turn invoke gtags), so these -# tools must be available from the command line (i.e. in the search path). -# -# The result: instead of the source browser generated by doxygen, the links to -# source code will now point to the output of htags. -# The default value is: NO. -# This tag requires that the tag SOURCE_BROWSER is set to YES. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set the YES then doxygen will generate a -# verbatim copy of the header file for each class for which an include is -# specified. Set to NO to disable this. -# See also: Section \class. -# The default value is: YES. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# Configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index of all -# compounds will be generated. Enable this if the project contains a lot of -# classes, structs, unions or interfaces. -# The default value is: YES. - -ALPHABETICAL_INDEX = YES - -# The IGNORE_PREFIX tag can be used to specify a prefix (or a list of prefixes) -# that should be ignored while generating the index headers. The IGNORE_PREFIX -# tag works for classes, function and member names. The entity will be placed in -# the alphabetical list under the first letter of the entity name that remains -# after removing the prefix. -# This tag requires that the tag ALPHABETICAL_INDEX is set to YES. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES, doxygen will generate HTML output -# The default value is: YES. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. If a -# relative path is entered the value of OUTPUT_DIRECTORY will be put in front of -# it. -# The default directory is: html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for each -# generated HTML page (for example: .htm, .php, .asp). -# The default value is: .html. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a user-defined HTML header file for -# each generated HTML page. If the tag is left blank doxygen will generate a -# standard header. -# -# To get valid HTML the header file that includes any scripts and style sheets -# that doxygen needs, which is dependent on the configuration options used (e.g. -# the setting GENERATE_TREEVIEW). It is highly recommended to start with a -# default header using -# doxygen -w html new_header.html new_footer.html new_stylesheet.css -# YourConfigFile -# and then modify the file new_header.html. See also section "Doxygen usage" -# for information on how to generate the default header that doxygen normally -# uses. -# Note: The header is subject to change so you typically have to regenerate the -# default header when upgrading to a newer version of doxygen. For a description -# of the possible markers and block names see the documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a user-defined HTML footer for each -# generated HTML page. If the tag is left blank doxygen will generate a standard -# footer. See HTML_HEADER for more information on how to generate a default -# footer and what special commands can be used inside the footer. See also -# section "Doxygen usage" for information on how to generate the default footer -# that doxygen normally uses. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading style -# sheet that is used by each HTML page. It can be used to fine-tune the look of -# the HTML output. If left blank doxygen will generate a default style sheet. -# See also section "Doxygen usage" for information on how to generate the style -# sheet that doxygen normally uses. -# Note: It is recommended to use HTML_EXTRA_STYLESHEET instead of this tag, as -# it is more robust and this tag (HTML_STYLESHEET) will in the future become -# obsolete. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_STYLESHEET = - -# The HTML_EXTRA_STYLESHEET tag can be used to specify additional user-defined -# cascading style sheets that are included after the standard style sheets -# created by doxygen. Using this option one can overrule certain style aspects. -# This is preferred over using HTML_STYLESHEET since it does not replace the -# standard style sheet and is therefore more robust against future updates. -# Doxygen will copy the style sheet files to the output directory. -# Note: The order of the extra style sheet files is of importance (e.g. the last -# style sheet in the list overrules the setting of the previous ones in the -# list). -# Note: Since the styling of scrollbars can currently not be overruled in -# Webkit/Chromium, the styling will be left out of the default doxygen.css if -# one or more extra stylesheets have been specified. So if scrollbar -# customization is desired it has to be added explicitly. For an example see the -# documentation. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_STYLESHEET = - -# The HTML_EXTRA_FILES tag can be used to specify one or more extra images or -# other source files which should be copied to the HTML output directory. Note -# that these files will be copied to the base HTML output directory. Use the -# $relpath^ marker in the HTML_HEADER and/or HTML_FOOTER files to load these -# files. In the HTML_STYLESHEET file, use the file name only. Also note that the -# files will be copied as-is; there are no commands or markers available. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_EXTRA_FILES = - -# The HTML_COLORSTYLE tag can be used to specify if the generated HTML output -# should be rendered with a dark or light theme. -# Possible values are: LIGHT always generate light mode output, DARK always -# generate dark mode output, AUTO_LIGHT automatically set the mode according to -# the user preference, use light mode if no preference is set (the default), -# AUTO_DARK automatically set the mode according to the user preference, use -# dark mode if no preference is set and TOGGLE allow to user to switch between -# light and dark mode via a button. -# The default value is: AUTO_LIGHT. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE = AUTO_LIGHT - -# The HTML_COLORSTYLE_HUE tag controls the color of the HTML output. Doxygen -# will adjust the colors in the style sheet and background images according to -# this color. Hue is specified as an angle on a color-wheel, see -# https://en.wikipedia.org/wiki/Hue for more information. For instance the value -# 0 represents red, 60 is yellow, 120 is green, 180 is cyan, 240 is blue, 300 -# purple, and 360 is red again. -# Minimum value: 0, maximum value: 359, default value: 220. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_HUE = 220 - -# The HTML_COLORSTYLE_SAT tag controls the purity (or saturation) of the colors -# in the HTML output. For a value of 0 the output will use gray-scales only. A -# value of 255 will produce the most vivid colors. -# Minimum value: 0, maximum value: 255, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_SAT = 100 - -# The HTML_COLORSTYLE_GAMMA tag controls the gamma correction applied to the -# luminance component of the colors in the HTML output. Values below 100 -# gradually make the output lighter, whereas values above 100 make the output -# darker. The value divided by 100 is the actual gamma applied, so 80 represents -# a gamma of 0.8, The value 220 represents a gamma of 2.2, and 100 does not -# change the gamma. -# Minimum value: 40, maximum value: 240, default value: 80. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_COLORSTYLE_GAMMA = 80 - -# If the HTML_DYNAMIC_MENUS tag is set to YES then the generated HTML -# documentation will contain a main index with vertical navigation menus that -# are dynamically created via JavaScript. If disabled, the navigation index will -# consists of multiple levels of tabs that are statically embedded in every HTML -# page. Disable this option to support browsers that do not have JavaScript, -# like the Qt help browser. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_MENUS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_DYNAMIC_SECTIONS = NO - -# With HTML_INDEX_NUM_ENTRIES one can control the preferred number of entries -# shown in the various tree structured indices initially; the user can expand -# and collapse entries dynamically later on. Doxygen will expand the tree to -# such a level that at most the specified number of entries are visible (unless -# a fully collapsed tree already exceeds this amount). So setting the number of -# entries 1 will produce a full collapsed tree by default. 0 is a special value -# representing an infinite number of entries and will result in a full expanded -# tree by default. -# Minimum value: 0, maximum value: 9999, default value: 100. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_INDEX_NUM_ENTRIES = 100 - -# If the GENERATE_DOCSET tag is set to YES, additional index files will be -# generated that can be used as input for Apple's Xcode 3 integrated development -# environment (see: -# https://developer.apple.com/xcode/), introduced with OSX 10.5 (Leopard). To -# create a documentation set, doxygen will generate a Makefile in the HTML -# output directory. Running make will produce the docset in that directory and -# running make install will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find it at -# startup. See https://developer.apple.com/library/archive/featuredarticles/Doxy -# genXcode/_index.html for more information. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_DOCSET = NO - -# This tag determines the name of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# The default value is: Doxygen generated docs. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# This tag determines the URL of the docset feed. A documentation feed provides -# an umbrella under which multiple documentation sets from a single provider -# (such as a company or product suite) can be grouped. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_FEEDURL = - -# This tag specifies a string that should uniquely identify the documentation -# set bundle. This should be a reverse domain-name style string, e.g. -# com.mycompany.MyDocSet. Doxygen will append .docset to the name. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# The DOCSET_PUBLISHER_ID tag specifies a string that should uniquely identify -# the documentation publisher. This should be a reverse domain-name style -# string, e.g. com.mycompany.MyDocSet.documentation. -# The default value is: org.doxygen.Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_ID = org.doxygen.Publisher - -# The DOCSET_PUBLISHER_NAME tag identifies the documentation publisher. -# The default value is: Publisher. -# This tag requires that the tag GENERATE_DOCSET is set to YES. - -DOCSET_PUBLISHER_NAME = Publisher - -# If the GENERATE_HTMLHELP tag is set to YES then doxygen generates three -# additional HTML index files: index.hhp, index.hhc, and index.hhk. The -# index.hhp is a project file that can be read by Microsoft's HTML Help Workshop -# on Windows. In the beginning of 2021 Microsoft took the original page, with -# a.o. the download links, offline the HTML help workshop was already many years -# in maintenance mode). You can download the HTML help workshop from the web -# archives at Installation executable (see: -# http://web.archive.org/web/20160201063255/http://download.microsoft.com/downlo -# ad/0/A/9/0A939EF6-E31C-430F-A3DF-DFAE7960D564/htmlhelp.exe). -# -# The HTML Help Workshop contains a compiler that can convert all HTML output -# generated by doxygen into a single compiled HTML file (.chm). Compiled HTML -# files are now used as the Windows 98 help format, and will replace the old -# Windows help format (.hlp) on all Windows platforms in the future. Compressed -# HTML files also contain an index, a table of contents, and you can search for -# words in the documentation. The HTML workshop also contains a viewer for -# compressed HTML files. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_HTMLHELP = NO - -# The CHM_FILE tag can be used to specify the file name of the resulting .chm -# file. You can add a path in front of the file if the result should not be -# written to the html output directory. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_FILE = - -# The HHC_LOCATION tag can be used to specify the location (absolute path -# including file name) of the HTML help compiler (hhc.exe). If non-empty, -# doxygen will try to run the HTML help compiler on the generated index.hhp. -# The file has to be specified with full path. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -HHC_LOCATION = - -# The GENERATE_CHI flag controls if a separate .chi index file is generated -# (YES) or that it should be included in the main .chm file (NO). -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -GENERATE_CHI = NO - -# The CHM_INDEX_ENCODING is used to encode HtmlHelp index (hhk), content (hhc) -# and project file content. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -CHM_INDEX_ENCODING = - -# The BINARY_TOC flag controls whether a binary table of contents is generated -# (YES) or a normal table of contents (NO) in the .chm file. Furthermore it -# enables the Previous and Next buttons. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members to -# the table of contents of the HTML help documentation and to the tree view. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTMLHELP is set to YES. - -TOC_EXPAND = NO - -# The SITEMAP_URL tag is used to specify the full URL of the place where the -# generated documentation will be placed on the server by the user during the -# deployment of the documentation. The generated sitemap is called sitemap.xml -# and placed on the directory specified by HTML_OUTPUT. In case no SITEMAP_URL -# is specified no sitemap is generated. For information about the sitemap -# protocol see https://www.sitemaps.org -# This tag requires that the tag GENERATE_HTML is set to YES. - -SITEMAP_URL = - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and -# QHP_VIRTUAL_FOLDER are set, an additional index file will be generated that -# can be used as input for Qt's qhelpgenerator to generate a Qt Compressed Help -# (.qch) of the generated HTML documentation. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can be used to specify -# the file name of the resulting .qch file. The path specified is relative to -# the HTML output folder. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating Qt Help -# Project output. For more information please see Qt Help Project / Namespace -# (see: -# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#namespace). -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_NAMESPACE = org.doxygen.Project - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating Qt -# Help Project output. For more information please see Qt Help Project / Virtual -# Folders (see: -# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#virtual-folders). -# The default value is: doc. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_VIRTUAL_FOLDER = doc - -# If the QHP_CUST_FILTER_NAME tag is set, it specifies the name of a custom -# filter to add. For more information please see Qt Help Project / Custom -# Filters (see: -# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILTER_ATTRS tag specifies the list of the attributes of the -# custom filter to add. For more information please see Qt Help Project / Custom -# Filters (see: -# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#custom-filters). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this -# project's filter section matches. Qt Help Project / Filter Attributes (see: -# https://doc.qt.io/archives/qt-4.8/qthelpproject.html#filter-attributes). -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHP_SECT_FILTER_ATTRS = - -# The QHG_LOCATION tag can be used to specify the location (absolute path -# including file name) of Qt's qhelpgenerator. If non-empty doxygen will try to -# run qhelpgenerator on the generated .qhp file. -# This tag requires that the tag GENERATE_QHP is set to YES. - -QHG_LOCATION = - -# If the GENERATE_ECLIPSEHELP tag is set to YES, additional index files will be -# generated, together with the HTML files, they form an Eclipse help plugin. To -# install this plugin and make it available under the help contents menu in -# Eclipse, the contents of the directory containing the HTML and XML files needs -# to be copied into the plugins directory of eclipse. The name of the directory -# within the plugins directory should be the same as the ECLIPSE_DOC_ID value. -# After copying Eclipse needs to be restarted before the help appears. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_ECLIPSEHELP = NO - -# A unique identifier for the Eclipse help plugin. When installing the plugin -# the directory name containing the HTML and XML files should also have this -# name. Each documentation set should have its own identifier. -# The default value is: org.doxygen.Project. -# This tag requires that the tag GENERATE_ECLIPSEHELP is set to YES. - -ECLIPSE_DOC_ID = org.doxygen.Project - -# If you want full control over the layout of the generated HTML pages it might -# be necessary to disable the index and replace it with your own. The -# DISABLE_INDEX tag can be used to turn on/off the condensed index (tabs) at top -# of each HTML page. A value of NO enables the index and the value YES disables -# it. Since the tabs in the index contain the same information as the navigation -# tree, you can set this option to YES if you also set GENERATE_TREEVIEW to YES. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -DISABLE_INDEX = NO - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. If the tag -# value is set to YES, a side panel will be generated containing a tree-like -# index structure (just like the one that is generated for HTML Help). For this -# to work a browser that supports JavaScript, DHTML, CSS and frames is required -# (i.e. any modern browser). Windows users are probably better off using the -# HTML help feature. Via custom style sheets (see HTML_EXTRA_STYLESHEET) one can -# further fine tune the look of the index (see "Fine-tuning the output"). As an -# example, the default style sheet generated by doxygen has an example that -# shows how to put an image at the root of the tree instead of the PROJECT_NAME. -# Since the tree basically has the same information as the tab index, you could -# consider setting DISABLE_INDEX to YES when enabling this option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -GENERATE_TREEVIEW = NO - -# When both GENERATE_TREEVIEW and DISABLE_INDEX are set to YES, then the -# FULL_SIDEBAR option determines if the side bar is limited to only the treeview -# area (value NO) or if it should extend to the full height of the window (value -# YES). Setting this to YES gives a layout similar to -# https://docs.readthedocs.io with more room for contents, but less room for the -# project logo, title, and description. If either GENERATE_TREEVIEW or -# DISABLE_INDEX is set to NO, this option has no effect. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FULL_SIDEBAR = NO - -# The ENUM_VALUES_PER_LINE tag can be used to set the number of enum values that -# doxygen will group on one line in the generated HTML documentation. -# -# Note that a value of 0 will completely suppress the enum values from appearing -# in the overview section. -# Minimum value: 0, maximum value: 20, default value: 4. -# This tag requires that the tag GENERATE_HTML is set to YES. - -ENUM_VALUES_PER_LINE = 4 - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be used -# to set the initial width (in pixels) of the frame in which the tree is shown. -# Minimum value: 0, maximum value: 1500, default value: 250. -# This tag requires that the tag GENERATE_HTML is set to YES. - -TREEVIEW_WIDTH = 250 - -# If the EXT_LINKS_IN_WINDOW option is set to YES, doxygen will open links to -# external symbols imported via tag files in a separate window. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -EXT_LINKS_IN_WINDOW = NO - -# If the OBFUSCATE_EMAILS tag is set to YES, doxygen will obfuscate email -# addresses. -# The default value is: YES. -# This tag requires that the tag GENERATE_HTML is set to YES. - -OBFUSCATE_EMAILS = NO - -# If the HTML_FORMULA_FORMAT option is set to svg, doxygen will use the pdf2svg -# tool (see https://github.com/dawbarton/pdf2svg) or inkscape (see -# https://inkscape.org) to generate formulas as SVG images instead of PNGs for -# the HTML output. These images will generally look nicer at scaled resolutions. -# Possible values are: png (the default) and svg (looks nicer but requires the -# pdf2svg or inkscape tool). -# The default value is: png. -# This tag requires that the tag GENERATE_HTML is set to YES. - -HTML_FORMULA_FORMAT = png - -# Use this tag to change the font size of LaTeX formulas included as images in -# the HTML documentation. When you change the font size after a successful -# doxygen run you need to manually remove any form_*.png images from the HTML -# output directory to force them to be regenerated. -# Minimum value: 8, maximum value: 50, default value: 10. -# This tag requires that the tag GENERATE_HTML is set to YES. - -FORMULA_FONTSIZE = 10 - -# The FORMULA_MACROFILE can contain LaTeX \newcommand and \renewcommand commands -# to create new LaTeX commands to be used in formulas as building blocks. See -# the section "Including formulas" for details. - -FORMULA_MACROFILE = - -# Enable the USE_MATHJAX option to render LaTeX formulas using MathJax (see -# https://www.mathjax.org) which uses client side JavaScript for the rendering -# instead of using pre-rendered bitmaps. Use this if you do not have LaTeX -# installed or if you want to formulas look prettier in the HTML output. When -# enabled you may also need to install MathJax separately and configure the path -# to it using the MATHJAX_RELPATH option. -# The default value is: NO. -# This tag requires that the tag GENERATE_HTML is set to YES. - -USE_MATHJAX = NO - -# With MATHJAX_VERSION it is possible to specify the MathJax version to be used. -# Note that the different versions of MathJax have different requirements with -# regards to the different settings, so it is possible that also other MathJax -# settings have to be changed when switching between the different MathJax -# versions. -# Possible values are: MathJax_2 and MathJax_3. -# The default value is: MathJax_2. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_VERSION = MathJax_2 - -# When MathJax is enabled you can set the default output format to be used for -# the MathJax output. For more details about the output format see MathJax -# version 2 (see: -# http://docs.mathjax.org/en/v2.7-latest/output.html) and MathJax version 3 -# (see: -# http://docs.mathjax.org/en/latest/web/components/output.html). -# Possible values are: HTML-CSS (which is slower, but has the best -# compatibility. This is the name for Mathjax version 2, for MathJax version 3 -# this will be translated into chtml), NativeMML (i.e. MathML. Only supported -# for NathJax 2. For MathJax version 3 chtml will be used instead.), chtml (This -# is the name for Mathjax version 3, for MathJax version 2 this will be -# translated into HTML-CSS) and SVG. -# The default value is: HTML-CSS. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_FORMAT = HTML-CSS - -# When MathJax is enabled you need to specify the location relative to the HTML -# output directory using the MATHJAX_RELPATH option. The destination directory -# should contain the MathJax.js script. For instance, if the mathjax directory -# is located at the same level as the HTML output directory, then -# MATHJAX_RELPATH should be ../mathjax. The default value points to the MathJax -# Content Delivery Network so you can quickly see the result without installing -# MathJax. However, it is strongly recommended to install a local copy of -# MathJax from https://www.mathjax.org before deployment. The default value is: -# - in case of MathJax version 2: https://cdn.jsdelivr.net/npm/mathjax@2 -# - in case of MathJax version 3: https://cdn.jsdelivr.net/npm/mathjax@3 -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_RELPATH = https://cdn.jsdelivr.net/npm/mathjax@2 - -# The MATHJAX_EXTENSIONS tag can be used to specify one or more MathJax -# extension names that should be enabled during MathJax rendering. For example -# for MathJax version 2 (see -# https://docs.mathjax.org/en/v2.7-latest/tex.html#tex-and-latex-extensions): -# MATHJAX_EXTENSIONS = TeX/AMSmath TeX/AMSsymbols -# For example for MathJax version 3 (see -# http://docs.mathjax.org/en/latest/input/tex/extensions/index.html): -# MATHJAX_EXTENSIONS = ams -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_EXTENSIONS = - -# The MATHJAX_CODEFILE tag can be used to specify a file with javascript pieces -# of code that will be used on startup of the MathJax code. See the MathJax site -# (see: -# http://docs.mathjax.org/en/v2.7-latest/output.html) for more details. For an -# example see the documentation. -# This tag requires that the tag USE_MATHJAX is set to YES. - -MATHJAX_CODEFILE = - -# When the SEARCHENGINE tag is enabled doxygen will generate a search box for -# the HTML output. The underlying search engine uses javascript and DHTML and -# should work on any modern browser. Note that when using HTML help -# (GENERATE_HTMLHELP), Qt help (GENERATE_QHP), or docsets (GENERATE_DOCSET) -# there is already a search function so this one should typically be disabled. -# For large projects the javascript based search engine can be slow, then -# enabling SERVER_BASED_SEARCH may provide a better solution. It is possible to -# search using the keyboard; to jump to the search box use + S -# (what the is depends on the OS and browser, but it is typically -# , /